Subzero. Code organization.

This CL does more than any CL should.

First, it moves all target-specific classes (TargetLowering, Assembler,
and Instructions) to a target-specific namespace. For example, the
::Ice::TargetX8632 class now lives in ::Ice::X8632::TargetX8632. Same
goes for ARM32, X8664, and MIPS32. Now, we have a ton of redundant
prefixes (it should be pretty obvious that ::Ice::X8632::TargetLowering
is an X8632 target lowering), but this is definitively not something
for this CL.

Second, this CL gets rid of the excessive use of 'typename Foo::Bar'
in the X86 templates. These changes appear more intimidating than they
really are, and they were fairly mechanical.

Third, the x86?? Traitses (gollum!) classes are no longer template
instatiation. The previous X86 templates were parameterized with a
X86 TargetLowering, and they assumed that a MachineTraits<Target>
was defined for that TargetLowering. The X86 templates are now
parameterized with a TraitsType, and different backends may have
completely unrelated traits.

Fourth, the X86 templates are no longer members of
::Ice::X86Internal. Instead, each file #include'ing a Ice*X86Base.h
file need to #define X86NAMESPACE to the namespace where the backend
is being defined. With this change, the template instantiation for
X8632 live in ::Ice::X8632, and, for X8664, in ::Ice::X8664.

BUG=
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1548363002 .
diff --git a/src/IceAssemblerX8632.h b/src/IceAssemblerX8632.h
index d84d232..6f3f819 100644
--- a/src/IceAssemblerX8632.h
+++ b/src/IceAssemblerX8632.h
@@ -16,47 +16,24 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// \brief Implements the Assembler class for x86-32.
+/// \brief Instantiates the Assembler for X86-32.
 ///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEASSEMBLERX8632_H
 #define SUBZERO_SRC_ICEASSEMBLERX8632_H
 
-#include "IceAssembler.h"
+#define X86NAMESPACE X8632
 #include "IceAssemblerX86Base.h"
-#include "IceDefs.h"
-#include "IceOperand.h"
+#undef X86NAMESPACE
 #include "IceTargetLoweringX8632Traits.h"
-#include "IceTypes.h"
-#include "IceUtils.h"
 
 namespace Ice {
-
-class TargetX8632;
-
 namespace X8632 {
 
-using Immediate = ::Ice::X86Internal::Immediate;
-using Label = ::Ice::X86Internal::Label;
-
-class AssemblerX8632 : public X86Internal::AssemblerX86Base<TargetX8632> {
-  AssemblerX8632(const AssemblerX8632 &) = delete;
-  AssemblerX8632 &operator=(const AssemblerX8632 &) = delete;
-
-public:
-  explicit AssemblerX8632(bool use_far_branches = false)
-      : X86Internal::AssemblerX86Base<TargetX8632>(Asm_X8632,
-                                                   use_far_branches) {}
-  ~AssemblerX8632() override = default;
-
-  static bool classof(const Assembler *Asm) {
-    return Asm->getKind() == Asm_X8632;
-  }
-
-private:
-  ENABLE_MAKE_UNIQUE;
-};
+using AssemblerX8632 = AssemblerX86Base<X8632::Traits>;
+using Label = AssemblerX8632::Label;
+using Immediate = AssemblerX8632::Immediate;
 
 } // end of namespace X8632
 } // end of namespace Ice
diff --git a/src/IceAssemblerX8664.h b/src/IceAssemblerX8664.h
index 2ab46e1..18215cd 100644
--- a/src/IceAssemblerX8664.h
+++ b/src/IceAssemblerX8664.h
@@ -16,47 +16,24 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// \brief Implements the Assembler class for x86-64.
+/// \brief Instantiates the Assembler for X86-64.
 ///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEASSEMBLERX8664_H
 #define SUBZERO_SRC_ICEASSEMBLERX8664_H
 
-#include "IceAssembler.h"
+#define X86NAMESPACE X8664
 #include "IceAssemblerX86Base.h"
-#include "IceDefs.h"
-#include "IceOperand.h"
+#undef X86NAMESPACE
 #include "IceTargetLoweringX8664Traits.h"
-#include "IceTypes.h"
-#include "IceUtils.h"
 
 namespace Ice {
-
-class TargetX8664;
-
 namespace X8664 {
 
-using Immediate = ::Ice::X86Internal::Immediate;
-using Label = ::Ice::X86Internal::Label;
-
-class AssemblerX8664 : public X86Internal::AssemblerX86Base<TargetX8664> {
-  AssemblerX8664(const AssemblerX8664 &) = delete;
-  AssemblerX8664 &operator=(const AssemblerX8664 &) = delete;
-
-public:
-  explicit AssemblerX8664(bool use_far_branches = false)
-      : X86Internal::AssemblerX86Base<TargetX8664>(Asm_X8664,
-                                                   use_far_branches) {}
-  ~AssemblerX8664() override = default;
-
-  static bool classof(const Assembler *Asm) {
-    return Asm->getKind() == Asm_X8664;
-  }
-
-private:
-  ENABLE_MAKE_UNIQUE;
-};
+using AssemblerX8664 = AssemblerX86Base<X8664::Traits>;
+using Label = AssemblerX8664::Label;
+using Immediate = AssemblerX8664::Immediate;
 
 } // end of namespace X8664
 } // end of namespace Ice
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index e3791fa..5a72b6c 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -32,102 +32,116 @@
 
 namespace Ice {
 
-namespace X86Internal {
+#ifndef X86NAMESPACE
+#error "You must define the X86 Target namespace."
+#endif
 
-template <class Machine> class AssemblerX86Base;
-template <class Machine> struct MachineTraits;
+namespace X86NAMESPACE {
 
-constexpr int MAX_NOP_SIZE = 8;
-
-class Immediate {
-  Immediate(const Immediate &) = delete;
-  Immediate &operator=(const Immediate &) = delete;
-
-public:
-  explicit Immediate(int32_t value) : value_(value) {}
-
-  Immediate(RelocOffsetT offset, AssemblerFixup *fixup)
-      : value_(offset), fixup_(fixup) {
-    // Use the Offset in the "value" for now. If we decide to process fixups,
-    // we'll need to patch that offset with the true value.
-  }
-
-  int32_t value() const { return value_; }
-  AssemblerFixup *fixup() const { return fixup_; }
-
-  bool is_int8() const {
-    // We currently only allow 32-bit fixups, and they usually have value = 0,
-    // so if fixup_ != nullptr, it shouldn't be classified as int8/16.
-    return fixup_ == nullptr && Utils::IsInt(8, value_);
-  }
-  bool is_uint8() const {
-    return fixup_ == nullptr && Utils::IsUint(8, value_);
-  }
-  bool is_uint16() const {
-    return fixup_ == nullptr && Utils::IsUint(16, value_);
-  }
-
-private:
-  const int32_t value_;
-  AssemblerFixup *fixup_ = nullptr;
-};
-
-/// X86 allows near and far jumps.
-class Label final : public Ice::Label {
-  Label(const Label &) = delete;
-  Label &operator=(const Label &) = delete;
-
-public:
-  Label() = default;
-  ~Label() = default;
-
-  void finalCheck() const override {
-    Ice::Label::finalCheck();
-    assert(!hasNear());
-  }
-
-  /// Returns the position of an earlier branch instruction which assumes that
-  /// this label is "near", and bumps iterator to the next near position.
-  intptr_t getNearPosition() {
-    assert(hasNear());
-    intptr_t Pos = UnresolvedNearPositions.back();
-    UnresolvedNearPositions.pop_back();
-    return Pos;
-  }
-
-  bool hasNear() const { return !UnresolvedNearPositions.empty(); }
-  bool isUnused() const override {
-    return Ice::Label::isUnused() && !hasNear();
-  }
-
-private:
-  void nearLinkTo(const Assembler &Asm, intptr_t position) {
-    if (Asm.getPreliminary())
-      return;
-    assert(!isBound());
-    UnresolvedNearPositions.push_back(position);
-  }
-
-  llvm::SmallVector<intptr_t, 20> UnresolvedNearPositions;
-
-  template <class> friend class AssemblerX86Base;
-};
-
-template <class Machine> class AssemblerX86Base : public Assembler {
+template <typename TraitsType>
+class AssemblerX86Base : public ::Ice::Assembler {
   AssemblerX86Base(const AssemblerX86Base &) = delete;
   AssemblerX86Base &operator=(const AssemblerX86Base &) = delete;
 
 protected:
-  AssemblerX86Base(AssemblerKind Kind, bool use_far_branches)
-      : Assembler(Kind) {
+  explicit AssemblerX86Base(bool use_far_branches = false)
+      : Assembler(Traits::AsmKind) {
     // This mode is only needed and implemented for MIPS and ARM.
     assert(!use_far_branches);
     (void)use_far_branches;
   }
 
 public:
-  using Traits = MachineTraits<Machine>;
+  using Traits = TraitsType;
+  using Address = typename Traits::Address;
+  using ByteRegister = typename Traits::ByteRegister;
+  using BrCond = typename Traits::Cond::BrCond;
+  using CmppsCond = typename Traits::Cond::CmppsCond;
+  using GPRRegister = typename Traits::GPRRegister;
+  using Operand = typename Traits::Operand;
+  using XmmRegister = typename Traits::XmmRegister;
 
+  static constexpr int MAX_NOP_SIZE = 8;
+
+  static bool classof(const Assembler *Asm) {
+    return Asm->getKind() == Traits::AsmKind;
+  }
+
+  class Immediate {
+    Immediate(const Immediate &) = delete;
+    Immediate &operator=(const Immediate &) = delete;
+
+  public:
+    explicit Immediate(int32_t value) : value_(value) {}
+
+    Immediate(RelocOffsetT offset, AssemblerFixup *fixup)
+        : value_(offset), fixup_(fixup) {
+      // Use the Offset in the "value" for now. If we decide to process fixups,
+      // we'll need to patch that offset with the true value.
+    }
+
+    int32_t value() const { return value_; }
+    AssemblerFixup *fixup() const { return fixup_; }
+
+    bool is_int8() const {
+      // We currently only allow 32-bit fixups, and they usually have value = 0,
+      // so if fixup_ != nullptr, it shouldn't be classified as int8/16.
+      return fixup_ == nullptr && Utils::IsInt(8, value_);
+    }
+    bool is_uint8() const {
+      return fixup_ == nullptr && Utils::IsUint(8, value_);
+    }
+    bool is_uint16() const {
+      return fixup_ == nullptr && Utils::IsUint(16, value_);
+    }
+
+  private:
+    const int32_t value_;
+    AssemblerFixup *fixup_ = nullptr;
+  };
+
+  /// X86 allows near and far jumps.
+  class Label final : public Ice::Label {
+    Label(const Label &) = delete;
+    Label &operator=(const Label &) = delete;
+
+  public:
+    Label() = default;
+    ~Label() = default;
+
+    void finalCheck() const override {
+      Ice::Label::finalCheck();
+      assert(!hasNear());
+    }
+
+    /// Returns the position of an earlier branch instruction which assumes that
+    /// this label is "near", and bumps iterator to the next near position.
+    intptr_t getNearPosition() {
+      assert(hasNear());
+      intptr_t Pos = UnresolvedNearPositions.back();
+      UnresolvedNearPositions.pop_back();
+      return Pos;
+    }
+
+    bool hasNear() const { return !UnresolvedNearPositions.empty(); }
+    bool isUnused() const override {
+      return Ice::Label::isUnused() && !hasNear();
+    }
+
+  private:
+    friend class AssemblerX86Base<TraitsType>;
+
+    void nearLinkTo(const Assembler &Asm, intptr_t position) {
+      if (Asm.getPreliminary())
+        return;
+      assert(!isBound());
+      UnresolvedNearPositions.push_back(position);
+    }
+
+    llvm::SmallVector<intptr_t, 20> UnresolvedNearPositions;
+  };
+
+public:
   ~AssemblerX86Base() override;
 
   static const bool kNearJump = true;
@@ -166,21 +180,19 @@
   }
 
   // Operations to emit GPR instructions (and dispatch on operand type).
-  using TypedEmitGPR = void (AssemblerX86Base::*)(Type,
-                                                  typename Traits::GPRRegister);
-  using TypedEmitAddr =
-      void (AssemblerX86Base::*)(Type, const typename Traits::Address &);
+  using TypedEmitGPR = void (AssemblerX86Base::*)(Type, GPRRegister);
+  using TypedEmitAddr = void (AssemblerX86Base::*)(Type, const Address &);
   struct GPREmitterOneOp {
     TypedEmitGPR Reg;
     TypedEmitAddr Addr;
   };
 
-  using TypedEmitGPRGPR = void (AssemblerX86Base::*)(
-      Type, typename Traits::GPRRegister, typename Traits::GPRRegister);
-  using TypedEmitGPRAddr = void (AssemblerX86Base::*)(
-      Type, typename Traits::GPRRegister, const typename Traits::Address &);
-  using TypedEmitGPRImm = void (AssemblerX86Base::*)(
-      Type, typename Traits::GPRRegister, const Immediate &);
+  using TypedEmitGPRGPR = void (AssemblerX86Base::*)(Type, GPRRegister,
+                                                     GPRRegister);
+  using TypedEmitGPRAddr = void (AssemblerX86Base::*)(Type, GPRRegister,
+                                                      const Address &);
+  using TypedEmitGPRImm = void (AssemblerX86Base::*)(Type, GPRRegister,
+                                                     const Immediate &);
   struct GPREmitterRegOp {
     TypedEmitGPRGPR GPRGPR;
     TypedEmitGPRAddr GPRAddr;
@@ -188,15 +200,15 @@
   };
 
   struct GPREmitterShiftOp {
-    // Technically, Addr/GPR and Addr/Imm are also allowed, but */Addr are not.
-    // In practice, we always normalize the Dest to a Register first.
+    // Technically, Addr/GPR and Addr/Imm are also allowed, but */Addr are
+    // not. In practice, we always normalize the Dest to a Register first.
     TypedEmitGPRGPR GPRGPR;
     TypedEmitGPRImm GPRImm;
   };
 
-  using TypedEmitGPRGPRImm = void (AssemblerX86Base::*)(
-      Type, typename Traits::GPRRegister, typename Traits::GPRRegister,
-      const Immediate &);
+  using TypedEmitGPRGPRImm = void (AssemblerX86Base::*)(Type, GPRRegister,
+                                                        GPRRegister,
+                                                        const Immediate &);
   struct GPREmitterShiftD {
     // Technically AddrGPR and AddrGPRImm are also allowed, but in practice we
     // always normalize Dest to a Register first.
@@ -204,39 +216,36 @@
     TypedEmitGPRGPRImm GPRGPRImm;
   };
 
-  using TypedEmitAddrGPR = void (AssemblerX86Base::*)(
-      Type, const typename Traits::Address &, typename Traits::GPRRegister);
-  using TypedEmitAddrImm = void (AssemblerX86Base::*)(
-      Type, const typename Traits::Address &, const Immediate &);
+  using TypedEmitAddrGPR = void (AssemblerX86Base::*)(Type, const Address &,
+                                                      GPRRegister);
+  using TypedEmitAddrImm = void (AssemblerX86Base::*)(Type, const Address &,
+                                                      const Immediate &);
   struct GPREmitterAddrOp {
     TypedEmitAddrGPR AddrGPR;
     TypedEmitAddrImm AddrImm;
   };
 
   // Operations to emit XMM instructions (and dispatch on operand type).
-  using TypedEmitXmmXmm = void (AssemblerX86Base::*)(
-      Type, typename Traits::XmmRegister, typename Traits::XmmRegister);
-  using TypedEmitXmmAddr = void (AssemblerX86Base::*)(
-      Type, typename Traits::XmmRegister, const typename Traits::Address &);
+  using TypedEmitXmmXmm = void (AssemblerX86Base::*)(Type, XmmRegister,
+                                                     XmmRegister);
+  using TypedEmitXmmAddr = void (AssemblerX86Base::*)(Type, XmmRegister,
+                                                      const Address &);
   struct XmmEmitterRegOp {
     TypedEmitXmmXmm XmmXmm;
     TypedEmitXmmAddr XmmAddr;
   };
 
-  using EmitXmmXmm = void (AssemblerX86Base::*)(typename Traits::XmmRegister,
-                                                typename Traits::XmmRegister);
-  using EmitXmmAddr = void (AssemblerX86Base::*)(
-      typename Traits::XmmRegister, const typename Traits::Address &);
-  using EmitAddrXmm = void (AssemblerX86Base::*)(
-      const typename Traits::Address &, typename Traits::XmmRegister);
+  using EmitXmmXmm = void (AssemblerX86Base::*)(XmmRegister, XmmRegister);
+  using EmitXmmAddr = void (AssemblerX86Base::*)(XmmRegister, const Address &);
+  using EmitAddrXmm = void (AssemblerX86Base::*)(const Address &, XmmRegister);
   struct XmmEmitterMovOps {
     EmitXmmXmm XmmXmm;
     EmitXmmAddr XmmAddr;
     EmitAddrXmm AddrXmm;
   };
 
-  using TypedEmitXmmImm = void (AssemblerX86Base::*)(
-      Type, typename Traits::XmmRegister, const Immediate &);
+  using TypedEmitXmmImm = void (AssemblerX86Base::*)(Type, XmmRegister,
+                                                     const Immediate &);
 
   struct XmmEmitterShiftOp {
     TypedEmitXmmXmm XmmXmm;
@@ -248,8 +257,8 @@
   template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
     using TypedEmitRegs = void (AssemblerX86Base::*)(Type, DReg_t, Type,
                                                      SReg_t);
-    using TypedEmitAddr = void (AssemblerX86Base::*)(
-        Type, DReg_t, Type, const typename Traits::Address &);
+    using TypedEmitAddr = void (AssemblerX86Base::*)(Type, DReg_t, Type,
+                                                     const Address &);
 
     TypedEmitRegs RegReg;
     TypedEmitAddr RegAddr;
@@ -260,8 +269,9 @@
   template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
     using TypedEmitRegRegImm = void (AssemblerX86Base::*)(Type, DReg_t, SReg_t,
                                                           const Immediate &);
-    using TypedEmitRegAddrImm = void (AssemblerX86Base::*)(
-        Type, DReg_t, const typename Traits::Address &, const Immediate &);
+    using TypedEmitRegAddrImm = void (AssemblerX86Base::*)(Type, DReg_t,
+                                                           const Address &,
+                                                           const Immediate &);
 
     TypedEmitRegRegImm RegRegImm;
     TypedEmitRegAddrImm RegAddrImm;
@@ -270,17 +280,17 @@
   /*
    * Emit Machine Instructions.
    */
-  void call(typename Traits::GPRRegister reg);
-  void call(const typename Traits::Address &address);
+  void call(GPRRegister reg);
+  void call(const Address &address);
   void call(const ConstantRelocatable *label); // not testable.
   void call(const Immediate &abs_address);
 
   static const intptr_t kCallExternalLabelSize = 5;
 
-  void pushl(typename Traits::GPRRegister reg);
+  void pushl(GPRRegister reg);
 
-  void popl(typename Traits::GPRRegister reg);
-  void popl(const typename Traits::Address &address);
+  void popl(GPRRegister reg);
+  void popl(const Address &address);
 
   template <typename T = Traits,
             typename = typename std::enable_if<T::HasPusha>::type>
@@ -289,309 +299,190 @@
             typename = typename std::enable_if<T::HasPopa>::type>
   void popal();
 
-  void setcc(typename Traits::Cond::BrCond condition,
-             typename Traits::ByteRegister dst);
-  void setcc(typename Traits::Cond::BrCond condition,
-             const typename Traits::Address &address);
+  void setcc(BrCond condition, ByteRegister dst);
+  void setcc(BrCond condition, const Address &address);
 
-  void mov(Type Ty, typename Traits::GPRRegister dst, const Immediate &src);
-  void mov(Type Ty, typename Traits::GPRRegister dst,
-           typename Traits::GPRRegister src);
-  void mov(Type Ty, typename Traits::GPRRegister dst,
-           const typename Traits::Address &src);
-  void mov(Type Ty, const typename Traits::Address &dst,
-           typename Traits::GPRRegister src);
-  void mov(Type Ty, const typename Traits::Address &dst, const Immediate &imm);
+  void mov(Type Ty, GPRRegister dst, const Immediate &src);
+  void mov(Type Ty, GPRRegister dst, GPRRegister src);
+  void mov(Type Ty, GPRRegister dst, const Address &src);
+  void mov(Type Ty, const Address &dst, GPRRegister src);
+  void mov(Type Ty, const Address &dst, const Immediate &imm);
 
   template <typename T = Traits>
-  typename std::enable_if<T::Is64Bit, void>::type
-  movabs(const typename Traits::GPRRegister Dst, uint64_t Imm64);
+  typename std::enable_if<T::Is64Bit, void>::type movabs(const GPRRegister Dst,
+                                                         uint64_t Imm64);
   template <typename T = Traits>
-  typename std::enable_if<!T::Is64Bit, void>::type
-  movabs(const typename Traits::GPRRegister, uint64_t) {
+  typename std::enable_if<!T::Is64Bit, void>::type movabs(const GPRRegister,
+                                                          uint64_t) {
     llvm::report_fatal_error("movabs is only supported in 64-bit x86 targets.");
   }
 
-  void movzx(Type Ty, typename Traits::GPRRegister dst,
-             typename Traits::GPRRegister src);
-  void movzx(Type Ty, typename Traits::GPRRegister dst,
-             const typename Traits::Address &src);
-  void movsx(Type Ty, typename Traits::GPRRegister dst,
-             typename Traits::GPRRegister src);
-  void movsx(Type Ty, typename Traits::GPRRegister dst,
-             const typename Traits::Address &src);
+  void movzx(Type Ty, GPRRegister dst, GPRRegister src);
+  void movzx(Type Ty, GPRRegister dst, const Address &src);
+  void movsx(Type Ty, GPRRegister dst, GPRRegister src);
+  void movsx(Type Ty, GPRRegister dst, const Address &src);
 
-  void lea(Type Ty, typename Traits::GPRRegister dst,
-           const typename Traits::Address &src);
+  void lea(Type Ty, GPRRegister dst, const Address &src);
 
-  void cmov(Type Ty, typename Traits::Cond::BrCond cond,
-            typename Traits::GPRRegister dst, typename Traits::GPRRegister src);
-  void cmov(Type Ty, typename Traits::Cond::BrCond cond,
-            typename Traits::GPRRegister dst,
-            const typename Traits::Address &src);
+  void cmov(Type Ty, BrCond cond, GPRRegister dst, GPRRegister src);
+  void cmov(Type Ty, BrCond cond, GPRRegister dst, const Address &src);
 
   void rep_movsb();
 
-  void movss(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void movss(Type Ty, const typename Traits::Address &dst,
-             typename Traits::XmmRegister src);
-  void movss(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
+  void movss(Type Ty, XmmRegister dst, const Address &src);
+  void movss(Type Ty, const Address &dst, XmmRegister src);
+  void movss(Type Ty, XmmRegister dst, XmmRegister src);
 
-  void movd(Type SrcTy, typename Traits::XmmRegister dst,
-            typename Traits::GPRRegister src);
-  void movd(Type SrcTy, typename Traits::XmmRegister dst,
-            const typename Traits::Address &src);
-  void movd(Type DestTy, typename Traits::GPRRegister dst,
-            typename Traits::XmmRegister src);
-  void movd(Type DestTy, const typename Traits::Address &dst,
-            typename Traits::XmmRegister src);
+  void movd(Type SrcTy, XmmRegister dst, GPRRegister src);
+  void movd(Type SrcTy, XmmRegister dst, const Address &src);
+  void movd(Type DestTy, GPRRegister dst, XmmRegister src);
+  void movd(Type DestTy, const Address &dst, XmmRegister src);
 
-  void movq(typename Traits::XmmRegister dst, typename Traits::XmmRegister src);
-  void movq(const typename Traits::Address &dst,
-            typename Traits::XmmRegister src);
-  void movq(typename Traits::XmmRegister dst,
-            const typename Traits::Address &src);
+  void movq(XmmRegister dst, XmmRegister src);
+  void movq(const Address &dst, XmmRegister src);
+  void movq(XmmRegister dst, const Address &src);
 
-  void addss(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void addss(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void subss(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void subss(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void mulss(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void mulss(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void divss(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void divss(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
+  void addss(Type Ty, XmmRegister dst, XmmRegister src);
+  void addss(Type Ty, XmmRegister dst, const Address &src);
+  void subss(Type Ty, XmmRegister dst, XmmRegister src);
+  void subss(Type Ty, XmmRegister dst, const Address &src);
+  void mulss(Type Ty, XmmRegister dst, XmmRegister src);
+  void mulss(Type Ty, XmmRegister dst, const Address &src);
+  void divss(Type Ty, XmmRegister dst, XmmRegister src);
+  void divss(Type Ty, XmmRegister dst, const Address &src);
 
-  void movaps(typename Traits::XmmRegister dst,
-              typename Traits::XmmRegister src);
+  void movaps(XmmRegister dst, XmmRegister src);
 
-  void movups(typename Traits::XmmRegister dst,
-              typename Traits::XmmRegister src);
-  void movups(typename Traits::XmmRegister dst,
-              const typename Traits::Address &src);
-  void movups(const typename Traits::Address &dst,
-              typename Traits::XmmRegister src);
+  void movups(XmmRegister dst, XmmRegister src);
+  void movups(XmmRegister dst, const Address &src);
+  void movups(const Address &dst, XmmRegister src);
 
-  void padd(Type Ty, typename Traits::XmmRegister dst,
-            typename Traits::XmmRegister src);
-  void padd(Type Ty, typename Traits::XmmRegister dst,
-            const typename Traits::Address &src);
-  void pand(Type Ty, typename Traits::XmmRegister dst,
-            typename Traits::XmmRegister src);
-  void pand(Type Ty, typename Traits::XmmRegister dst,
-            const typename Traits::Address &src);
-  void pandn(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void pandn(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void pmull(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void pmull(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void pmuludq(Type Ty, typename Traits::XmmRegister dst,
-               typename Traits::XmmRegister src);
-  void pmuludq(Type Ty, typename Traits::XmmRegister dst,
-               const typename Traits::Address &src);
-  void por(Type Ty, typename Traits::XmmRegister dst,
-           typename Traits::XmmRegister src);
-  void por(Type Ty, typename Traits::XmmRegister dst,
-           const typename Traits::Address &src);
-  void psub(Type Ty, typename Traits::XmmRegister dst,
-            typename Traits::XmmRegister src);
-  void psub(Type Ty, typename Traits::XmmRegister dst,
-            const typename Traits::Address &src);
-  void pxor(Type Ty, typename Traits::XmmRegister dst,
-            typename Traits::XmmRegister src);
-  void pxor(Type Ty, typename Traits::XmmRegister dst,
-            const typename Traits::Address &src);
+  void padd(Type Ty, XmmRegister dst, XmmRegister src);
+  void padd(Type Ty, XmmRegister dst, const Address &src);
+  void pand(Type Ty, XmmRegister dst, XmmRegister src);
+  void pand(Type Ty, XmmRegister dst, const Address &src);
+  void pandn(Type Ty, XmmRegister dst, XmmRegister src);
+  void pandn(Type Ty, XmmRegister dst, const Address &src);
+  void pmull(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmull(Type Ty, XmmRegister dst, const Address &src);
+  void pmuludq(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmuludq(Type Ty, XmmRegister dst, const Address &src);
+  void por(Type Ty, XmmRegister dst, XmmRegister src);
+  void por(Type Ty, XmmRegister dst, const Address &src);
+  void psub(Type Ty, XmmRegister dst, XmmRegister src);
+  void psub(Type Ty, XmmRegister dst, const Address &src);
+  void pxor(Type Ty, XmmRegister dst, XmmRegister src);
+  void pxor(Type Ty, XmmRegister dst, const Address &src);
 
-  void psll(Type Ty, typename Traits::XmmRegister dst,
-            typename Traits::XmmRegister src);
-  void psll(Type Ty, typename Traits::XmmRegister dst,
-            const typename Traits::Address &src);
-  void psll(Type Ty, typename Traits::XmmRegister dst, const Immediate &src);
+  void psll(Type Ty, XmmRegister dst, XmmRegister src);
+  void psll(Type Ty, XmmRegister dst, const Address &src);
+  void psll(Type Ty, XmmRegister dst, const Immediate &src);
 
-  void psra(Type Ty, typename Traits::XmmRegister dst,
-            typename Traits::XmmRegister src);
-  void psra(Type Ty, typename Traits::XmmRegister dst,
-            const typename Traits::Address &src);
-  void psra(Type Ty, typename Traits::XmmRegister dst, const Immediate &src);
-  void psrl(Type Ty, typename Traits::XmmRegister dst,
-            typename Traits::XmmRegister src);
-  void psrl(Type Ty, typename Traits::XmmRegister dst,
-            const typename Traits::Address &src);
-  void psrl(Type Ty, typename Traits::XmmRegister dst, const Immediate &src);
+  void psra(Type Ty, XmmRegister dst, XmmRegister src);
+  void psra(Type Ty, XmmRegister dst, const Address &src);
+  void psra(Type Ty, XmmRegister dst, const Immediate &src);
+  void psrl(Type Ty, XmmRegister dst, XmmRegister src);
+  void psrl(Type Ty, XmmRegister dst, const Address &src);
+  void psrl(Type Ty, XmmRegister dst, const Immediate &src);
 
-  void addps(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void addps(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void subps(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void subps(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void divps(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void divps(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void mulps(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void mulps(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void minps(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void minps(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void minss(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void minss(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void maxps(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void maxps(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void maxss(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void maxss(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void andnps(Type Ty, typename Traits::XmmRegister dst,
-              const typename Traits::Address &src);
-  void andnps(Type Ty, typename Traits::XmmRegister dst,
-              typename Traits::XmmRegister src);
-  void andps(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void andps(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
-  void orps(Type Ty, typename Traits::XmmRegister dst,
-            const typename Traits::Address &src);
-  void orps(Type Ty, typename Traits::XmmRegister dst,
-            typename Traits::XmmRegister src);
+  void addps(Type Ty, XmmRegister dst, XmmRegister src);
+  void addps(Type Ty, XmmRegister dst, const Address &src);
+  void subps(Type Ty, XmmRegister dst, XmmRegister src);
+  void subps(Type Ty, XmmRegister dst, const Address &src);
+  void divps(Type Ty, XmmRegister dst, XmmRegister src);
+  void divps(Type Ty, XmmRegister dst, const Address &src);
+  void mulps(Type Ty, XmmRegister dst, XmmRegister src);
+  void mulps(Type Ty, XmmRegister dst, const Address &src);
+  void minps(Type Ty, XmmRegister dst, const Address &src);
+  void minps(Type Ty, XmmRegister dst, XmmRegister src);
+  void minss(Type Ty, XmmRegister dst, const Address &src);
+  void minss(Type Ty, XmmRegister dst, XmmRegister src);
+  void maxps(Type Ty, XmmRegister dst, const Address &src);
+  void maxps(Type Ty, XmmRegister dst, XmmRegister src);
+  void maxss(Type Ty, XmmRegister dst, const Address &src);
+  void maxss(Type Ty, XmmRegister dst, XmmRegister src);
+  void andnps(Type Ty, XmmRegister dst, const Address &src);
+  void andnps(Type Ty, XmmRegister dst, XmmRegister src);
+  void andps(Type Ty, XmmRegister dst, const Address &src);
+  void andps(Type Ty, XmmRegister dst, XmmRegister src);
+  void orps(Type Ty, XmmRegister dst, const Address &src);
+  void orps(Type Ty, XmmRegister dst, XmmRegister src);
 
-  void blendvps(Type Ty, typename Traits::XmmRegister dst,
-                typename Traits::XmmRegister src);
-  void blendvps(Type Ty, typename Traits::XmmRegister dst,
-                const typename Traits::Address &src);
-  void pblendvb(Type Ty, typename Traits::XmmRegister dst,
-                typename Traits::XmmRegister src);
-  void pblendvb(Type Ty, typename Traits::XmmRegister dst,
-                const typename Traits::Address &src);
+  void blendvps(Type Ty, XmmRegister dst, XmmRegister src);
+  void blendvps(Type Ty, XmmRegister dst, const Address &src);
+  void pblendvb(Type Ty, XmmRegister dst, XmmRegister src);
+  void pblendvb(Type Ty, XmmRegister dst, const Address &src);
 
-  void cmpps(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src,
-             typename Traits::Cond::CmppsCond CmpCondition);
-  void cmpps(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src,
-             typename Traits::Cond::CmppsCond CmpCondition);
+  void cmpps(Type Ty, XmmRegister dst, XmmRegister src, CmppsCond CmpCondition);
+  void cmpps(Type Ty, XmmRegister dst, const Address &src,
+             CmppsCond CmpCondition);
 
-  void sqrtps(typename Traits::XmmRegister dst);
-  void rsqrtps(typename Traits::XmmRegister dst);
-  void reciprocalps(typename Traits::XmmRegister dst);
+  void sqrtps(XmmRegister dst);
+  void rsqrtps(XmmRegister dst);
+  void reciprocalps(XmmRegister dst);
 
-  void movhlps(typename Traits::XmmRegister dst,
-               typename Traits::XmmRegister src);
-  void movlhps(typename Traits::XmmRegister dst,
-               typename Traits::XmmRegister src);
-  void unpcklps(typename Traits::XmmRegister dst,
-                typename Traits::XmmRegister src);
-  void unpckhps(typename Traits::XmmRegister dst,
-                typename Traits::XmmRegister src);
-  void unpcklpd(typename Traits::XmmRegister dst,
-                typename Traits::XmmRegister src);
-  void unpckhpd(typename Traits::XmmRegister dst,
-                typename Traits::XmmRegister src);
+  void movhlps(XmmRegister dst, XmmRegister src);
+  void movlhps(XmmRegister dst, XmmRegister src);
+  void unpcklps(XmmRegister dst, XmmRegister src);
+  void unpckhps(XmmRegister dst, XmmRegister src);
+  void unpcklpd(XmmRegister dst, XmmRegister src);
+  void unpckhpd(XmmRegister dst, XmmRegister src);
 
-  void set1ps(typename Traits::XmmRegister dst,
-              typename Traits::GPRRegister tmp, const Immediate &imm);
+  void set1ps(XmmRegister dst, GPRRegister tmp, const Immediate &imm);
 
-  void sqrtpd(typename Traits::XmmRegister dst);
+  void sqrtpd(XmmRegister dst);
 
-  void pshufd(Type Ty, typename Traits::XmmRegister dst,
-              typename Traits::XmmRegister src, const Immediate &mask);
-  void pshufd(Type Ty, typename Traits::XmmRegister dst,
-              const typename Traits::Address &src, const Immediate &mask);
-  void shufps(Type Ty, typename Traits::XmmRegister dst,
-              typename Traits::XmmRegister src, const Immediate &mask);
-  void shufps(Type Ty, typename Traits::XmmRegister dst,
-              const typename Traits::Address &src, const Immediate &mask);
+  void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
+  void pshufd(Type Ty, XmmRegister dst, const Address &src,
+              const Immediate &mask);
+  void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
+  void shufps(Type Ty, XmmRegister dst, const Address &src,
+              const Immediate &mask);
 
-  void cvtdq2ps(Type, typename Traits::XmmRegister dst,
-                typename Traits::XmmRegister src);
-  void cvtdq2ps(Type, typename Traits::XmmRegister dst,
-                const typename Traits::Address &src);
+  void cvtdq2ps(Type, XmmRegister dst, XmmRegister src);
+  void cvtdq2ps(Type, XmmRegister dst, const Address &src);
 
-  void cvttps2dq(Type, typename Traits::XmmRegister dst,
-                 typename Traits::XmmRegister src);
-  void cvttps2dq(Type, typename Traits::XmmRegister dst,
-                 const typename Traits::Address &src);
+  void cvttps2dq(Type, XmmRegister dst, XmmRegister src);
+  void cvttps2dq(Type, XmmRegister dst, const Address &src);
 
-  void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, Type SrcTy,
-                typename Traits::GPRRegister src);
-  void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, Type SrcTy,
-                const typename Traits::Address &src);
+  void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, GPRRegister src);
+  void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, const Address &src);
 
-  void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
-                      typename Traits::XmmRegister src);
-  void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
-                      const typename Traits::Address &src);
+  void cvtfloat2float(Type SrcTy, XmmRegister dst, XmmRegister src);
+  void cvtfloat2float(Type SrcTy, XmmRegister dst, const Address &src);
 
-  void cvttss2si(Type DestTy, typename Traits::GPRRegister dst, Type SrcTy,
-                 typename Traits::XmmRegister src);
-  void cvttss2si(Type DestTy, typename Traits::GPRRegister dst, Type SrcTy,
-                 const typename Traits::Address &src);
+  void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
+  void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
 
-  void ucomiss(Type Ty, typename Traits::XmmRegister a,
-               typename Traits::XmmRegister b);
-  void ucomiss(Type Ty, typename Traits::XmmRegister a,
-               const typename Traits::Address &b);
+  void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
+  void ucomiss(Type Ty, XmmRegister a, const Address &b);
 
-  void movmskpd(typename Traits::GPRRegister dst,
-                typename Traits::XmmRegister src);
-  void movmskps(typename Traits::GPRRegister dst,
-                typename Traits::XmmRegister src);
+  void movmskpd(GPRRegister dst, XmmRegister src);
+  void movmskps(GPRRegister dst, XmmRegister src);
 
-  void sqrtss(Type Ty, typename Traits::XmmRegister dst,
-              const typename Traits::Address &src);
-  void sqrtss(Type Ty, typename Traits::XmmRegister dst,
-              typename Traits::XmmRegister src);
+  void sqrtss(Type Ty, XmmRegister dst, const Address &src);
+  void sqrtss(Type Ty, XmmRegister dst, XmmRegister src);
 
-  void xorps(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src);
-  void xorps(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::XmmRegister src);
+  void xorps(Type Ty, XmmRegister dst, const Address &src);
+  void xorps(Type Ty, XmmRegister dst, XmmRegister src);
 
-  void insertps(Type Ty, typename Traits::XmmRegister dst,
-                typename Traits::XmmRegister src, const Immediate &imm);
-  void insertps(Type Ty, typename Traits::XmmRegister dst,
-                const typename Traits::Address &src, const Immediate &imm);
+  void insertps(Type Ty, XmmRegister dst, XmmRegister src,
+                const Immediate &imm);
+  void insertps(Type Ty, XmmRegister dst, const Address &src,
+                const Immediate &imm);
 
-  void pinsr(Type Ty, typename Traits::XmmRegister dst,
-             typename Traits::GPRRegister src, const Immediate &imm);
-  void pinsr(Type Ty, typename Traits::XmmRegister dst,
-             const typename Traits::Address &src, const Immediate &imm);
+  void pinsr(Type Ty, XmmRegister dst, GPRRegister src, const Immediate &imm);
+  void pinsr(Type Ty, XmmRegister dst, const Address &src,
+             const Immediate &imm);
 
-  void pextr(Type Ty, typename Traits::GPRRegister dst,
-             typename Traits::XmmRegister src, const Immediate &imm);
+  void pextr(Type Ty, GPRRegister dst, XmmRegister src, const Immediate &imm);
 
-  void pmovsxdq(typename Traits::XmmRegister dst,
-                typename Traits::XmmRegister src);
+  void pmovsxdq(XmmRegister dst, XmmRegister src);
 
-  void pcmpeq(Type Ty, typename Traits::XmmRegister dst,
-              typename Traits::XmmRegister src);
-  void pcmpeq(Type Ty, typename Traits::XmmRegister dst,
-              const typename Traits::Address &src);
-  void pcmpgt(Type Ty, typename Traits::XmmRegister dst,
-              typename Traits::XmmRegister src);
-  void pcmpgt(Type Ty, typename Traits::XmmRegister dst,
-              const typename Traits::Address &src);
+  void pcmpeq(Type Ty, XmmRegister dst, XmmRegister src);
+  void pcmpeq(Type Ty, XmmRegister dst, const Address &src);
+  void pcmpgt(Type Ty, XmmRegister dst, XmmRegister src);
+  void pcmpgt(Type Ty, XmmRegister dst, const Address &src);
 
   enum RoundingMode {
     kRoundToNearest = 0x0,
@@ -599,8 +490,7 @@
     kRoundUp = 0x2,
     kRoundToZero = 0x3
   };
-  void roundsd(typename Traits::XmmRegister dst,
-               typename Traits::XmmRegister src, RoundingMode mode);
+  void roundsd(XmmRegister dst, XmmRegister src, RoundingMode mode);
 
   //----------------------------------------------------------------------------
   //
@@ -646,93 +536,58 @@
   //
   //----------------------------------------------------------------------------
 
-  void cmp(Type Ty, typename Traits::GPRRegister reg0,
-           typename Traits::GPRRegister reg1);
-  void cmp(Type Ty, typename Traits::GPRRegister reg,
-           const typename Traits::Address &address);
-  void cmp(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
-  void cmp(Type Ty, const typename Traits::Address &address,
-           typename Traits::GPRRegister reg);
-  void cmp(Type Ty, const typename Traits::Address &address,
-           const Immediate &imm);
+  void cmp(Type Ty, GPRRegister reg0, GPRRegister reg1);
+  void cmp(Type Ty, GPRRegister reg, const Address &address);
+  void cmp(Type Ty, GPRRegister reg, const Immediate &imm);
+  void cmp(Type Ty, const Address &address, GPRRegister reg);
+  void cmp(Type Ty, const Address &address, const Immediate &imm);
 
-  void test(Type Ty, typename Traits::GPRRegister reg0,
-            typename Traits::GPRRegister reg1);
-  void test(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
-  void test(Type Ty, const typename Traits::Address &address,
-            typename Traits::GPRRegister reg);
-  void test(Type Ty, const typename Traits::Address &address,
-            const Immediate &imm);
+  void test(Type Ty, GPRRegister reg0, GPRRegister reg1);
+  void test(Type Ty, GPRRegister reg, const Immediate &imm);
+  void test(Type Ty, const Address &address, GPRRegister reg);
+  void test(Type Ty, const Address &address, const Immediate &imm);
 
-  void And(Type Ty, typename Traits::GPRRegister dst,
-           typename Traits::GPRRegister src);
-  void And(Type Ty, typename Traits::GPRRegister dst,
-           const typename Traits::Address &address);
-  void And(Type Ty, typename Traits::GPRRegister dst, const Immediate &imm);
-  void And(Type Ty, const typename Traits::Address &address,
-           typename Traits::GPRRegister reg);
-  void And(Type Ty, const typename Traits::Address &address,
-           const Immediate &imm);
+  void And(Type Ty, GPRRegister dst, GPRRegister src);
+  void And(Type Ty, GPRRegister dst, const Address &address);
+  void And(Type Ty, GPRRegister dst, const Immediate &imm);
+  void And(Type Ty, const Address &address, GPRRegister reg);
+  void And(Type Ty, const Address &address, const Immediate &imm);
 
-  void Or(Type Ty, typename Traits::GPRRegister dst,
-          typename Traits::GPRRegister src);
-  void Or(Type Ty, typename Traits::GPRRegister dst,
-          const typename Traits::Address &address);
-  void Or(Type Ty, typename Traits::GPRRegister dst, const Immediate &imm);
-  void Or(Type Ty, const typename Traits::Address &address,
-          typename Traits::GPRRegister reg);
-  void Or(Type Ty, const typename Traits::Address &address,
-          const Immediate &imm);
+  void Or(Type Ty, GPRRegister dst, GPRRegister src);
+  void Or(Type Ty, GPRRegister dst, const Address &address);
+  void Or(Type Ty, GPRRegister dst, const Immediate &imm);
+  void Or(Type Ty, const Address &address, GPRRegister reg);
+  void Or(Type Ty, const Address &address, const Immediate &imm);
 
-  void Xor(Type Ty, typename Traits::GPRRegister dst,
-           typename Traits::GPRRegister src);
-  void Xor(Type Ty, typename Traits::GPRRegister dst,
-           const typename Traits::Address &address);
-  void Xor(Type Ty, typename Traits::GPRRegister dst, const Immediate &imm);
-  void Xor(Type Ty, const typename Traits::Address &address,
-           typename Traits::GPRRegister reg);
-  void Xor(Type Ty, const typename Traits::Address &address,
-           const Immediate &imm);
+  void Xor(Type Ty, GPRRegister dst, GPRRegister src);
+  void Xor(Type Ty, GPRRegister dst, const Address &address);
+  void Xor(Type Ty, GPRRegister dst, const Immediate &imm);
+  void Xor(Type Ty, const Address &address, GPRRegister reg);
+  void Xor(Type Ty, const Address &address, const Immediate &imm);
 
-  void add(Type Ty, typename Traits::GPRRegister dst,
-           typename Traits::GPRRegister src);
-  void add(Type Ty, typename Traits::GPRRegister reg,
-           const typename Traits::Address &address);
-  void add(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
-  void add(Type Ty, const typename Traits::Address &address,
-           typename Traits::GPRRegister reg);
-  void add(Type Ty, const typename Traits::Address &address,
-           const Immediate &imm);
+  void add(Type Ty, GPRRegister dst, GPRRegister src);
+  void add(Type Ty, GPRRegister reg, const Address &address);
+  void add(Type Ty, GPRRegister reg, const Immediate &imm);
+  void add(Type Ty, const Address &address, GPRRegister reg);
+  void add(Type Ty, const Address &address, const Immediate &imm);
 
-  void adc(Type Ty, typename Traits::GPRRegister dst,
-           typename Traits::GPRRegister src);
-  void adc(Type Ty, typename Traits::GPRRegister dst,
-           const typename Traits::Address &address);
-  void adc(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
-  void adc(Type Ty, const typename Traits::Address &address,
-           typename Traits::GPRRegister reg);
-  void adc(Type Ty, const typename Traits::Address &address,
-           const Immediate &imm);
+  void adc(Type Ty, GPRRegister dst, GPRRegister src);
+  void adc(Type Ty, GPRRegister dst, const Address &address);
+  void adc(Type Ty, GPRRegister reg, const Immediate &imm);
+  void adc(Type Ty, const Address &address, GPRRegister reg);
+  void adc(Type Ty, const Address &address, const Immediate &imm);
 
-  void sub(Type Ty, typename Traits::GPRRegister dst,
-           typename Traits::GPRRegister src);
-  void sub(Type Ty, typename Traits::GPRRegister reg,
-           const typename Traits::Address &address);
-  void sub(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
-  void sub(Type Ty, const typename Traits::Address &address,
-           typename Traits::GPRRegister reg);
-  void sub(Type Ty, const typename Traits::Address &address,
-           const Immediate &imm);
+  void sub(Type Ty, GPRRegister dst, GPRRegister src);
+  void sub(Type Ty, GPRRegister reg, const Address &address);
+  void sub(Type Ty, GPRRegister reg, const Immediate &imm);
+  void sub(Type Ty, const Address &address, GPRRegister reg);
+  void sub(Type Ty, const Address &address, const Immediate &imm);
 
-  void sbb(Type Ty, typename Traits::GPRRegister dst,
-           typename Traits::GPRRegister src);
-  void sbb(Type Ty, typename Traits::GPRRegister reg,
-           const typename Traits::Address &address);
-  void sbb(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
-  void sbb(Type Ty, const typename Traits::Address &address,
-           typename Traits::GPRRegister reg);
-  void sbb(Type Ty, const typename Traits::Address &address,
-           const Immediate &imm);
+  void sbb(Type Ty, GPRRegister dst, GPRRegister src);
+  void sbb(Type Ty, GPRRegister reg, const Address &address);
+  void sbb(Type Ty, GPRRegister reg, const Immediate &imm);
+  void sbb(Type Ty, const Address &address, GPRRegister reg);
+  void sbb(Type Ty, const Address &address, const Immediate &imm);
 
   void cbw();
   void cwd();
@@ -744,93 +599,71 @@
     llvm::report_fatal_error("CQO is only available in 64-bit x86 backends.");
   }
 
-  void div(Type Ty, typename Traits::GPRRegister reg);
-  void div(Type Ty, const typename Traits::Address &address);
+  void div(Type Ty, GPRRegister reg);
+  void div(Type Ty, const Address &address);
 
-  void idiv(Type Ty, typename Traits::GPRRegister reg);
-  void idiv(Type Ty, const typename Traits::Address &address);
+  void idiv(Type Ty, GPRRegister reg);
+  void idiv(Type Ty, const Address &address);
 
-  void imul(Type Ty, typename Traits::GPRRegister dst,
-            typename Traits::GPRRegister src);
-  void imul(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
-  void imul(Type Ty, typename Traits::GPRRegister reg,
-            const typename Traits::Address &address);
+  void imul(Type Ty, GPRRegister dst, GPRRegister src);
+  void imul(Type Ty, GPRRegister reg, const Immediate &imm);
+  void imul(Type Ty, GPRRegister reg, const Address &address);
 
-  void imul(Type Ty, typename Traits::GPRRegister reg);
-  void imul(Type Ty, const typename Traits::Address &address);
+  void imul(Type Ty, GPRRegister reg);
+  void imul(Type Ty, const Address &address);
 
-  void imul(Type Ty, typename Traits::GPRRegister dst,
-            typename Traits::GPRRegister src, const Immediate &imm);
-  void imul(Type Ty, typename Traits::GPRRegister dst,
-            const typename Traits::Address &address, const Immediate &imm);
+  void imul(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
+  void imul(Type Ty, GPRRegister dst, const Address &address,
+            const Immediate &imm);
 
-  void mul(Type Ty, typename Traits::GPRRegister reg);
-  void mul(Type Ty, const typename Traits::Address &address);
+  void mul(Type Ty, GPRRegister reg);
+  void mul(Type Ty, const Address &address);
 
   template <class T = Traits,
             typename = typename std::enable_if<!T::Is64Bit>::type>
-  void incl(typename Traits::GPRRegister reg);
-  void incl(const typename Traits::Address &address);
+  void incl(GPRRegister reg);
+  void incl(const Address &address);
 
   template <class T = Traits,
             typename = typename std::enable_if<!T::Is64Bit>::type>
-  void decl(typename Traits::GPRRegister reg);
-  void decl(const typename Traits::Address &address);
+  void decl(GPRRegister reg);
+  void decl(const Address &address);
 
-  void rol(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
-  void rol(Type Ty, typename Traits::GPRRegister operand,
-           typename Traits::GPRRegister shifter);
-  void rol(Type Ty, const typename Traits::Address &operand,
-           typename Traits::GPRRegister shifter);
+  void rol(Type Ty, GPRRegister reg, const Immediate &imm);
+  void rol(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void rol(Type Ty, const Address &operand, GPRRegister shifter);
 
-  void shl(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
-  void shl(Type Ty, typename Traits::GPRRegister operand,
-           typename Traits::GPRRegister shifter);
-  void shl(Type Ty, const typename Traits::Address &operand,
-           typename Traits::GPRRegister shifter);
+  void shl(Type Ty, GPRRegister reg, const Immediate &imm);
+  void shl(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void shl(Type Ty, const Address &operand, GPRRegister shifter);
 
-  void shr(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
-  void shr(Type Ty, typename Traits::GPRRegister operand,
-           typename Traits::GPRRegister shifter);
-  void shr(Type Ty, const typename Traits::Address &operand,
-           typename Traits::GPRRegister shifter);
+  void shr(Type Ty, GPRRegister reg, const Immediate &imm);
+  void shr(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void shr(Type Ty, const Address &operand, GPRRegister shifter);
 
-  void sar(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
-  void sar(Type Ty, typename Traits::GPRRegister operand,
-           typename Traits::GPRRegister shifter);
-  void sar(Type Ty, const typename Traits::Address &address,
-           typename Traits::GPRRegister shifter);
+  void sar(Type Ty, GPRRegister reg, const Immediate &imm);
+  void sar(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void sar(Type Ty, const Address &address, GPRRegister shifter);
 
-  void shld(Type Ty, typename Traits::GPRRegister dst,
-            typename Traits::GPRRegister src);
-  void shld(Type Ty, typename Traits::GPRRegister dst,
-            typename Traits::GPRRegister src, const Immediate &imm);
-  void shld(Type Ty, const typename Traits::Address &operand,
-            typename Traits::GPRRegister src);
-  void shrd(Type Ty, typename Traits::GPRRegister dst,
-            typename Traits::GPRRegister src);
-  void shrd(Type Ty, typename Traits::GPRRegister dst,
-            typename Traits::GPRRegister src, const Immediate &imm);
-  void shrd(Type Ty, const typename Traits::Address &dst,
-            typename Traits::GPRRegister src);
+  void shld(Type Ty, GPRRegister dst, GPRRegister src);
+  void shld(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
+  void shld(Type Ty, const Address &operand, GPRRegister src);
+  void shrd(Type Ty, GPRRegister dst, GPRRegister src);
+  void shrd(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
+  void shrd(Type Ty, const Address &dst, GPRRegister src);
 
-  void neg(Type Ty, typename Traits::GPRRegister reg);
-  void neg(Type Ty, const typename Traits::Address &addr);
-  void notl(typename Traits::GPRRegister reg);
+  void neg(Type Ty, GPRRegister reg);
+  void neg(Type Ty, const Address &addr);
+  void notl(GPRRegister reg);
 
-  void bsf(Type Ty, typename Traits::GPRRegister dst,
-           typename Traits::GPRRegister src);
-  void bsf(Type Ty, typename Traits::GPRRegister dst,
-           const typename Traits::Address &src);
-  void bsr(Type Ty, typename Traits::GPRRegister dst,
-           typename Traits::GPRRegister src);
-  void bsr(Type Ty, typename Traits::GPRRegister dst,
-           const typename Traits::Address &src);
+  void bsf(Type Ty, GPRRegister dst, GPRRegister src);
+  void bsf(Type Ty, GPRRegister dst, const Address &src);
+  void bsr(Type Ty, GPRRegister dst, GPRRegister src);
+  void bsr(Type Ty, GPRRegister dst, const Address &src);
 
-  void bswap(Type Ty, typename Traits::GPRRegister reg);
+  void bswap(Type Ty, GPRRegister reg);
 
-  void bt(typename Traits::GPRRegister base,
-          typename Traits::GPRRegister offset);
+  void bt(GPRRegister base, GPRRegister offset);
 
   void ret();
   void ret(const Immediate &imm);
@@ -842,27 +675,21 @@
   void ud2();
 
   // j(Label) is fully tested.
-  void j(typename Traits::Cond::BrCond condition, Label *label,
-         bool near = kFarJump);
-  void j(typename Traits::Cond::BrCond condition,
-         const ConstantRelocatable *label); // not testable.
+  void j(BrCond condition, Label *label, bool near = kFarJump);
+  void j(BrCond condition, const ConstantRelocatable *label); // not testable.
 
-  void jmp(typename Traits::GPRRegister reg);
+  void jmp(GPRRegister reg);
   void jmp(Label *label, bool near = kFarJump);
   void jmp(const ConstantRelocatable *label); // not testable.
 
   void mfence();
 
   void lock();
-  void cmpxchg(Type Ty, const typename Traits::Address &address,
-               typename Traits::GPRRegister reg, bool Locked);
-  void cmpxchg8b(const typename Traits::Address &address, bool Locked);
-  void xadd(Type Ty, const typename Traits::Address &address,
-            typename Traits::GPRRegister reg, bool Locked);
-  void xchg(Type Ty, typename Traits::GPRRegister reg0,
-            typename Traits::GPRRegister reg1);
-  void xchg(Type Ty, const typename Traits::Address &address,
-            typename Traits::GPRRegister reg);
+  void cmpxchg(Type Ty, const Address &address, GPRRegister reg, bool Locked);
+  void cmpxchg8b(const Address &address, bool Locked);
+  void xadd(Type Ty, const Address &address, GPRRegister reg, bool Locked);
+  void xchg(Type Ty, GPRRegister reg0, GPRRegister reg1);
+  void xchg(Type Ty, const Address &address, GPRRegister reg);
 
   /// \name Intel Architecture Code Analyzer markers.
   /// @{
@@ -882,9 +709,11 @@
   inline void emitUint8(uint8_t value);
 
 private:
+  ENABLE_MAKE_UNIQUE;
+
   static constexpr Type RexTypeIrrelevant = IceType_i32;
   static constexpr Type RexTypeForceRexW = IceType_i64;
-  static constexpr typename Traits::GPRRegister RexRegIrrelevant =
+  static constexpr GPRRegister RexRegIrrelevant =
       Traits::GPRRegister::Encoded_Reg_eax;
 
   inline void emitInt16(int16_t value);
@@ -894,21 +723,19 @@
   inline void emitXmmRegisterOperand(RegType reg, RmType rm);
   inline void emitOperandSizeOverride();
 
-  void emitOperand(int rm, const typename Traits::Operand &operand);
+  void emitOperand(int rm, const Operand &operand);
   void emitImmediate(Type ty, const Immediate &imm);
-  void emitComplexI8(int rm, const typename Traits::Operand &operand,
+  void emitComplexI8(int rm, const Operand &operand,
                      const Immediate &immediate);
-  void emitComplex(Type Ty, int rm, const typename Traits::Operand &operand,
+  void emitComplex(Type Ty, int rm, const Operand &operand,
                    const Immediate &immediate);
   void emitLabel(Label *label, intptr_t instruction_size);
   void emitLabelLink(Label *label);
   void emitNearLabelLink(Label *label);
 
-  void emitGenericShift(int rm, Type Ty, typename Traits::GPRRegister reg,
-                        const Immediate &imm);
-  void emitGenericShift(int rm, Type Ty,
-                        const typename Traits::Operand &operand,
-                        typename Traits::GPRRegister shifter);
+  void emitGenericShift(int rm, Type Ty, GPRRegister reg, const Immediate &imm);
+  void emitGenericShift(int rm, Type Ty, const Operand &operand,
+                        GPRRegister shifter);
 
   using LabelVector = std::vector<Label *>;
   // A vector of pool-allocated x86 labels for CFG nodes.
@@ -930,24 +757,19 @@
   // of add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag
   // parameter is statically asserted to be less than 8.
   template <uint32_t Tag>
-  void arith_int(Type Ty, typename Traits::GPRRegister reg,
-                 const Immediate &imm);
+  void arith_int(Type Ty, GPRRegister reg, const Immediate &imm);
 
   template <uint32_t Tag>
-  void arith_int(Type Ty, typename Traits::GPRRegister reg0,
-                 typename Traits::GPRRegister reg1);
+  void arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1);
 
   template <uint32_t Tag>
-  void arith_int(Type Ty, typename Traits::GPRRegister reg,
-                 const typename Traits::Address &address);
+  void arith_int(Type Ty, GPRRegister reg, const Address &address);
 
   template <uint32_t Tag>
-  void arith_int(Type Ty, const typename Traits::Address &address,
-                 typename Traits::GPRRegister reg);
+  void arith_int(Type Ty, const Address &address, GPRRegister reg);
 
   template <uint32_t Tag>
-  void arith_int(Type Ty, const typename Traits::Address &address,
-                 const Immediate &imm);
+  void arith_int(Type Ty, const Address &address, const Immediate &imm);
 
   // gprEncoding returns Reg encoding for operand emission. For x86-64 we mask
   // out the 4th bit as it is encoded in the REX.[RXB] bits. No other bits are
@@ -955,8 +777,7 @@
   template <typename RegType, typename T = Traits>
   typename std::enable_if<T::Is64Bit, typename T::GPRRegister>::type
   gprEncoding(const RegType Reg) {
-    return static_cast<typename Traits::GPRRegister>(static_cast<uint8_t>(Reg) &
-                                                     ~0x08);
+    return static_cast<GPRRegister>(static_cast<uint8_t>(Reg) & ~0x08);
   }
 
   template <typename RegType, typename T = Traits>
@@ -968,18 +789,16 @@
   template <typename RegType>
   bool is8BitRegisterRequiringRex(const Type Ty, const RegType Reg) {
     static constexpr bool IsGPR =
-        std::is_same<typename std::decay<RegType>::type,
-                     typename Traits::ByteRegister>::value ||
-        std::is_same<typename std::decay<RegType>::type,
-                     typename Traits::GPRRegister>::value;
+        std::is_same<typename std::decay<RegType>::type, ByteRegister>::value ||
+        std::is_same<typename std::decay<RegType>::type, GPRRegister>::value;
 
     // At this point in the assembler, we have encoded regs, so it is not
     // possible to distinguish between the "new" low byte registers introduced
-    // in x86-64 and the legacy [abcd]h registers. Because x86, we may still see
-    // ah (div) in the assembler, so we whitelist it here.
+    // in x86-64 and the legacy [abcd]h registers. Because x86, we may still
+    // see ah (div) in the assembler, so we whitelist it here.
     //
-    // The "local" uint32_t Encoded_Reg_ah is needed because RegType is an enum
-    // that is not necessarily the same type of
+    // The "local" uint32_t Encoded_Reg_ah is needed because RegType is an
+    // enum that is not necessarily the same type of
     // Traits::RegisterSet::Encoded_Reg_ah.
     constexpr uint32_t Encoded_Reg_ah = Traits::RegisterSet::Encoded_Reg_ah;
     return IsGPR && (Reg & 0x04) != 0 && (Reg & 0x08) == 0 &&
@@ -988,9 +807,10 @@
 
   // assembleAndEmitRex is used for determining which (if any) rex prefix
   // should be emitted for the current instruction. It allows different types
-  // for Reg and Rm because they could be of different types (e.g., in mov[sz]x
-  // instructions.) If Addr is not nullptr, then Rm is ignored, and Rex.B is
-  // determined by Addr instead. TyRm is still used to determine Addr's size.
+  // for Reg and Rm because they could be of different types (e.g., in
+  // mov[sz]x instructions.) If Addr is not nullptr, then Rm is ignored, and
+  // Rex.B is determined by Addr instead. TyRm is still used to determine
+  // Addr's size.
   template <typename RegType, typename RmType, typename T = Traits>
   typename std::enable_if<T::Is64Bit, void>::type
   assembleAndEmitRex(const Type TyReg, const RegType Reg, const Type TyRm,
@@ -1021,8 +841,8 @@
   assembleAndEmitRex(const Type, const RegType, const Type, const RmType,
                      const typename T::Address * = nullptr) {}
 
-  // emitRexRB is used for emitting a Rex prefix instructions with two explicit
-  // register operands in its mod-rm byte.
+  // emitRexRB is used for emitting a Rex prefix instructions with two
+  // explicit register operands in its mod-rm byte.
   template <typename RegType, typename RmType>
   void emitRexRB(const Type Ty, const RegType Reg, const RmType Rm) {
     assembleAndEmitRex(Ty, Reg, Ty, Rm);
@@ -1044,53 +864,52 @@
   // emitRex is used for emitting a Rex prefix for an address and a GPR. The
   // address may contain zero, one, or two registers.
   template <typename RegType>
-  void emitRex(const Type Ty, const typename Traits::Address &Addr,
-               const RegType Reg) {
+  void emitRex(const Type Ty, const Address &Addr, const RegType Reg) {
     assembleAndEmitRex(Ty, Reg, Ty, RexRegIrrelevant, &Addr);
   }
 
   template <typename RegType>
-  void emitRex(const Type AddrTy, const typename Traits::Address &Addr,
-               const Type TyReg, const RegType Reg) {
+  void emitRex(const Type AddrTy, const Address &Addr, const Type TyReg,
+               const RegType Reg) {
     assembleAndEmitRex(TyReg, Reg, AddrTy, RexRegIrrelevant, &Addr);
   }
 };
 
-template <class Machine>
-inline void AssemblerX86Base<Machine>::emitUint8(uint8_t value) {
+template <typename TraitsType>
+inline void AssemblerX86Base<TraitsType>::emitUint8(uint8_t value) {
   Buffer.emit<uint8_t>(value);
 }
 
-template <class Machine>
-inline void AssemblerX86Base<Machine>::emitInt16(int16_t value) {
+template <typename TraitsType>
+inline void AssemblerX86Base<TraitsType>::emitInt16(int16_t value) {
   Buffer.emit<int16_t>(value);
 }
 
-template <class Machine>
-inline void AssemblerX86Base<Machine>::emitInt32(int32_t value) {
+template <typename TraitsType>
+inline void AssemblerX86Base<TraitsType>::emitInt32(int32_t value) {
   Buffer.emit<int32_t>(value);
 }
 
-template <class Machine>
-inline void AssemblerX86Base<Machine>::emitRegisterOperand(int reg, int rm) {
+template <typename TraitsType>
+inline void AssemblerX86Base<TraitsType>::emitRegisterOperand(int reg, int rm) {
   assert(reg >= 0 && reg < 8);
   assert(rm >= 0 && rm < 8);
   Buffer.emit<uint8_t>(0xC0 + (reg << 3) + rm);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename RegType, typename RmType>
-inline void AssemblerX86Base<Machine>::emitXmmRegisterOperand(RegType reg,
-                                                              RmType rm) {
+inline void AssemblerX86Base<TraitsType>::emitXmmRegisterOperand(RegType reg,
+                                                                 RmType rm) {
   emitRegisterOperand(gprEncoding(reg), gprEncoding(rm));
 }
 
-template <class Machine>
-inline void AssemblerX86Base<Machine>::emitOperandSizeOverride() {
+template <typename TraitsType>
+inline void AssemblerX86Base<TraitsType>::emitOperandSizeOverride() {
   emitUint8(0x66);
 }
 
-} // end of namespace X86Internal
+} // end of namespace X86NAMESPACE
 
 } // end of namespace Ice
 
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index a32d97c..040fed8 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -27,10 +27,10 @@
 #include "IceOperand.h"
 
 namespace Ice {
-namespace X86Internal {
+namespace X86NAMESPACE {
 
-template <class Machine>
-AssemblerX86Base<Machine>::~AssemblerX86Base<Machine>() {
+template <typename TraitsType>
+AssemblerX86Base<TraitsType>::~AssemblerX86Base() {
   if (BuildDefs::asserts()) {
     for (const Label *Label : CfgNodeLabels) {
       Label->finalCheck();
@@ -41,7 +41,8 @@
   }
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::alignFunction() {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::alignFunction() {
   const SizeT Align = 1 << getBundleAlignLog2Bytes();
   SizeT BytesNeeded = Utils::OffsetToAlignment(Buffer.getPosition(), Align);
   constexpr SizeT HltSize = 1;
@@ -51,9 +52,10 @@
   }
 }
 
-template <class Machine>
-Label *AssemblerX86Base<Machine>::getOrCreateLabel(SizeT Number,
-                                                   LabelVector &Labels) {
+template <typename TraitsType>
+typename AssemblerX86Base<TraitsType>::Label *
+AssemblerX86Base<TraitsType>::getOrCreateLabel(SizeT Number,
+                                               LabelVector &Labels) {
   Label *L = nullptr;
   if (Number == Labels.size()) {
     L = new (this->allocate<Label>()) Label();
@@ -71,46 +73,48 @@
   return L;
 }
 
-template <class Machine>
-Ice::Label *AssemblerX86Base<Machine>::getCfgNodeLabel(SizeT NodeNumber) {
+template <typename TraitsType>
+Ice::Label *AssemblerX86Base<TraitsType>::getCfgNodeLabel(SizeT NodeNumber) {
   assert(NodeNumber < CfgNodeLabels.size());
   return CfgNodeLabels[NodeNumber];
 }
 
-template <class Machine>
-Label *AssemblerX86Base<Machine>::getOrCreateCfgNodeLabel(SizeT NodeNumber) {
+template <typename TraitsType>
+typename AssemblerX86Base<TraitsType>::Label *
+AssemblerX86Base<TraitsType>::getOrCreateCfgNodeLabel(SizeT NodeNumber) {
   return getOrCreateLabel(NodeNumber, CfgNodeLabels);
 }
 
-template <class Machine>
-Label *AssemblerX86Base<Machine>::getOrCreateLocalLabel(SizeT Number) {
+template <typename TraitsType>
+typename AssemblerX86Base<TraitsType>::Label *
+AssemblerX86Base<TraitsType>::getOrCreateLocalLabel(SizeT Number) {
   return getOrCreateLabel(Number, LocalLabels);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::bindCfgNodeLabel(const CfgNode *Node) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::bindCfgNodeLabel(const CfgNode *Node) {
   assert(!getPreliminary());
   Label *L = getOrCreateCfgNodeLabel(Node->getIndex());
   this->bind(L);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::bindLocalLabel(SizeT Number) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::bindLocalLabel(SizeT Number) {
   Label *L = getOrCreateLocalLabel(Number);
   if (!getPreliminary())
     this->bind(L);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::call(typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::call(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(RexTypeIrrelevant, reg);
   emitUint8(0xFF);
   emitRegisterOperand(2, gprEncoding(reg));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::call(const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::call(const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
@@ -118,8 +122,8 @@
   emitOperand(2, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::call(const ConstantRelocatable *label) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::call(const ConstantRelocatable *label) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   intptr_t call_start = Buffer.getPosition();
   emitUint8(0xE8);
@@ -129,8 +133,8 @@
   (void)call_start;
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::call(const Immediate &abs_address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::call(const Immediate &abs_address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   intptr_t call_start = Buffer.getPosition();
   emitUint8(0xE8);
@@ -140,15 +144,15 @@
   (void)call_start;
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pushl(typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pushl(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(RexTypeIrrelevant, reg);
   emitUint8(0x50 + gprEncoding(reg));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::popl(typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::popl(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // Any type that would not force a REX prefix to be emitted can be provided
   // here.
@@ -156,8 +160,8 @@
   emitUint8(0x58 + gprEncoding(reg));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::popl(const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::popl(const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
@@ -165,23 +169,22 @@
   emitOperand(0, address);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename, typename>
-void AssemblerX86Base<Machine>::pushal() {
+void AssemblerX86Base<TraitsType>::pushal() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x60);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename, typename>
-void AssemblerX86Base<Machine>::popal() {
+void AssemblerX86Base<TraitsType>::popal() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x61);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::setcc(typename Traits::Cond::BrCond condition,
-                                      typename Traits::ByteRegister dst) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::setcc(BrCond condition, ByteRegister dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(IceType_i8, dst);
   emitUint8(0x0F);
@@ -189,9 +192,9 @@
   emitUint8(0xC0 + gprEncoding(dst));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::setcc(typename Traits::Cond::BrCond condition,
-                                      const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::setcc(BrCond condition,
+                                         const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
@@ -200,9 +203,9 @@
   emitOperand(0, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::mov(Type Ty, typename Traits::GPRRegister dst,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
+                                       const Immediate &imm) {
   assert(Ty != IceType_i64 && "i64 not supported yet.");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -219,9 +222,9 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::mov(Type Ty, typename Traits::GPRRegister dst,
-                                    typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
+                                       GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -234,9 +237,9 @@
   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::mov(Type Ty, typename Traits::GPRRegister dst,
-                                    const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
+                                       const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -250,10 +253,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::mov(Type Ty,
-                                    const typename Traits::Address &dst,
-                                    typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::mov(Type Ty, const Address &dst,
+                                       GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -267,10 +269,9 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::mov(Type Ty,
-                                    const typename Traits::Address &dst,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::mov(Type Ty, const Address &dst,
+                                       const Immediate &imm) {
   assert(Ty != IceType_i64 && "i64 not supported yet.");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -288,11 +289,10 @@
   }
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T>
 typename std::enable_if<T::Is64Bit, void>::type
-AssemblerX86Base<Machine>::movabs(const typename Traits::GPRRegister Dst,
-                                  uint64_t Imm64) {
+AssemblerX86Base<TraitsType>::movabs(const GPRRegister Dst, uint64_t Imm64) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   const bool NeedsRexW = (Imm64 & ~0xFFFFFFFFull) != 0;
   const Type RexType = NeedsRexW ? RexTypeForceRexW : RexTypeIrrelevant;
@@ -306,10 +306,9 @@
     emitInt32((Imm64 >> 32) & 0xFFFFFFFF);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movzx(Type SrcTy,
-                                      typename Traits::GPRRegister dst,
-                                      typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movzx(Type SrcTy, GPRRegister dst,
+                                         GPRRegister src) {
   if (Traits::Is64Bit && SrcTy == IceType_i32) {
     // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
     // operand to 64-bit.
@@ -326,10 +325,9 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movzx(Type SrcTy,
-                                      typename Traits::GPRRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movzx(Type SrcTy, GPRRegister dst,
+                                         const Address &src) {
   if (Traits::Is64Bit && SrcTy == IceType_i32) {
     // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
     // operand to 64-bit.
@@ -347,10 +345,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movsx(Type SrcTy,
-                                      typename Traits::GPRRegister dst,
-                                      typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movsx(Type SrcTy, GPRRegister dst,
+                                         GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   bool ByteSized = isByteSizedType(SrcTy);
   emitRexRB(RexTypeForceRexW, dst, SrcTy, src);
@@ -364,10 +361,9 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movsx(Type SrcTy,
-                                      typename Traits::GPRRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movsx(Type SrcTy, GPRRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   bool ByteSized = isByteSizedType(SrcTy);
   emitAddrSizeOverridePrefix();
@@ -382,9 +378,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::lea(Type Ty, typename Traits::GPRRegister dst,
-                                    const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::lea(Type Ty, GPRRegister dst,
+                                       const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -395,11 +391,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cmov(Type Ty,
-                                     typename Traits::Cond::BrCond cond,
-                                     typename Traits::GPRRegister dst,
-                                     typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cmov(Type Ty, BrCond cond, GPRRegister dst,
+                                        GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -411,11 +405,9 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cmov(Type Ty,
-                                     typename Traits::Cond::BrCond cond,
-                                     typename Traits::GPRRegister dst,
-                                     const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cmov(Type Ty, BrCond cond, GPRRegister dst,
+                                        const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -428,15 +420,15 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::rep_movsb() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::rep_movsb() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF3);
   emitUint8(0xA4);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movss(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movss(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -446,10 +438,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movss(Type Ty,
-                                      const typename Traits::Address &dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movss(Type Ty, const Address &dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -459,9 +450,9 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movss(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movss(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, src, dst);
@@ -470,10 +461,9 @@
   emitXmmRegisterOperand(src, dst);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movd(Type SrcTy,
-                                     typename Traits::XmmRegister dst,
-                                     typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movd(Type SrcTy, XmmRegister dst,
+                                        GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(SrcTy, dst, src);
@@ -482,10 +472,9 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movd(Type SrcTy,
-                                     typename Traits::XmmRegister dst,
-                                     const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movd(Type SrcTy, XmmRegister dst,
+                                        const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -495,10 +484,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movd(Type DestTy,
-                                     typename Traits::GPRRegister dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movd(Type DestTy, GPRRegister dst,
+                                        XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(DestTy, src, dst);
@@ -507,10 +495,9 @@
   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movd(Type DestTy,
-                                     const typename Traits::Address &dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movd(Type DestTy, const Address &dst,
+                                        XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -520,9 +507,8 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movq(typename Traits::XmmRegister dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movq(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF3);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -531,9 +517,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movq(const typename Traits::Address &dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movq(const Address &dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -543,9 +528,8 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movq(typename Traits::XmmRegister dst,
-                                     const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movq(XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF3);
   emitAddrSizeOverridePrefix();
@@ -555,9 +539,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::addss(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::addss(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -566,9 +550,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::addss(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::addss(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -578,9 +562,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::subss(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::subss(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -589,9 +573,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::subss(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::subss(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -601,9 +585,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::mulss(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::mulss(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -612,9 +596,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::mulss(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::mulss(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -624,9 +608,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::divss(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::divss(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -635,9 +619,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::divss(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::divss(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -647,35 +631,36 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T, typename>
-void AssemblerX86Base<Machine>::fld(Type Ty, const typename T::Address &src) {
+void AssemblerX86Base<TraitsType>::fld(Type Ty,
+                                       const typename T::Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
   emitOperand(0, src);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T, typename>
-void AssemblerX86Base<Machine>::fstp(Type Ty, const typename T::Address &dst) {
+void AssemblerX86Base<TraitsType>::fstp(Type Ty,
+                                        const typename T::Address &dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
   emitOperand(3, dst);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T, typename>
-void AssemblerX86Base<Machine>::fstp(typename T::X87STRegister st) {
+void AssemblerX86Base<TraitsType>::fstp(typename T::X87STRegister st) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xDD);
   emitUint8(0xD8 + st);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movaps(typename Traits::XmmRegister dst,
-                                       typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movaps(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -683,9 +668,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movups(typename Traits::XmmRegister dst,
-                                       typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movups(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -693,9 +677,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movups(typename Traits::XmmRegister dst,
-                                       const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movups(XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, src, dst);
@@ -704,9 +687,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movups(const typename Traits::Address &dst,
-                                       typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movups(const Address &dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, dst, src);
@@ -715,9 +697,9 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::padd(Type Ty, typename Traits::XmmRegister dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::padd(Type Ty, XmmRegister dst,
+                                        XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -732,9 +714,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::padd(Type Ty, typename Traits::XmmRegister dst,
-                                     const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::padd(Type Ty, XmmRegister dst,
+                                        const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -750,10 +732,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pand(Type /* Ty */,
-                                     typename Traits::XmmRegister dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pand(Type /* Ty */, XmmRegister dst,
+                                        XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -762,10 +743,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pand(Type /* Ty */,
-                                     typename Traits::XmmRegister dst,
-                                     const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pand(Type /* Ty */, XmmRegister dst,
+                                        const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -775,10 +755,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pandn(Type /* Ty */,
-                                      typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pandn(Type /* Ty */, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -787,10 +766,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pandn(Type /* Ty */,
-                                      typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pandn(Type /* Ty */, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -800,9 +778,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pmull(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmull(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -817,9 +795,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pmull(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmull(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -835,10 +813,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pmuludq(Type /* Ty */,
-                                        typename Traits::XmmRegister dst,
-                                        typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmuludq(Type /* Ty */, XmmRegister dst,
+                                           XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -847,10 +824,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pmuludq(Type /* Ty */,
-                                        typename Traits::XmmRegister dst,
-                                        const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmuludq(Type /* Ty */, XmmRegister dst,
+                                           const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -860,10 +836,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::por(Type /* Ty */,
-                                    typename Traits::XmmRegister dst,
-                                    typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::por(Type /* Ty */, XmmRegister dst,
+                                       XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -872,10 +847,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::por(Type /* Ty */,
-                                    typename Traits::XmmRegister dst,
-                                    const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::por(Type /* Ty */, XmmRegister dst,
+                                       const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -885,9 +859,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::psub(Type Ty, typename Traits::XmmRegister dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psub(Type Ty, XmmRegister dst,
+                                        XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -902,9 +876,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::psub(Type Ty, typename Traits::XmmRegister dst,
-                                     const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psub(Type Ty, XmmRegister dst,
+                                        const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -920,10 +894,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pxor(Type /* Ty */,
-                                     typename Traits::XmmRegister dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pxor(Type /* Ty */, XmmRegister dst,
+                                        XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -932,10 +905,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pxor(Type /* Ty */,
-                                     typename Traits::XmmRegister dst,
-                                     const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pxor(Type /* Ty */, XmmRegister dst,
+                                        const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -945,9 +917,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::psll(Type Ty, typename Traits::XmmRegister dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
+                                        XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -961,9 +933,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::psll(Type Ty, typename Traits::XmmRegister dst,
-                                     const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
+                                        const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -978,9 +950,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::psll(Type Ty, typename Traits::XmmRegister dst,
-                                     const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
+                                        const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_int8());
   emitUint8(0x66);
@@ -996,9 +968,9 @@
   emitUint8(imm.value() & 0xFF);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::psra(Type Ty, typename Traits::XmmRegister dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
+                                        XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1012,9 +984,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::psra(Type Ty, typename Traits::XmmRegister dst,
-                                     const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
+                                        const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -1029,9 +1001,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::psra(Type Ty, typename Traits::XmmRegister dst,
-                                     const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
+                                        const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_int8());
   emitUint8(0x66);
@@ -1047,9 +1019,9 @@
   emitUint8(imm.value() & 0xFF);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::psrl(Type Ty, typename Traits::XmmRegister dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
+                                        XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1065,9 +1037,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::psrl(Type Ty, typename Traits::XmmRegister dst,
-                                     const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
+                                        const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -1084,9 +1056,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::psrl(Type Ty, typename Traits::XmmRegister dst,
-                                     const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
+                                        const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_int8());
   emitUint8(0x66);
@@ -1107,10 +1079,9 @@
 // {add,sub,mul,div}ps are given a Ty parameter for consistency with
 // {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows addpd, etc.,
 // we can use the Ty parameter to decide on adding a 0x66 prefix.
-template <class Machine>
-void AssemblerX86Base<Machine>::addps(Type /* Ty */,
-                                      typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::addps(Type /* Ty */, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1118,10 +1089,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::addps(Type /* Ty */,
-                                      typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::addps(Type /* Ty */, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1130,10 +1100,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::subps(Type /* Ty */,
-                                      typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::subps(Type /* Ty */, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1141,10 +1110,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::subps(Type /* Ty */,
-                                      typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::subps(Type /* Ty */, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1153,10 +1121,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::divps(Type /* Ty */,
-                                      typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::divps(Type /* Ty */, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1164,10 +1131,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::divps(Type /* Ty */,
-                                      typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::divps(Type /* Ty */, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1176,10 +1142,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::mulps(Type /* Ty */,
-                                      typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::mulps(Type /* Ty */, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1187,10 +1152,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::mulps(Type /* Ty */,
-                                      typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::mulps(Type /* Ty */, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1199,9 +1163,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::minps(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::minps(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1211,9 +1175,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::minps(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::minps(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1224,9 +1188,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::minss(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::minss(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1235,9 +1199,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::minss(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::minss(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -1247,9 +1211,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::maxps(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::maxps(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1259,9 +1223,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::maxps(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::maxps(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1272,9 +1236,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::maxss(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::maxss(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1283,9 +1247,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::maxss(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::maxss(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -1295,10 +1259,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::andnps(Type Ty,
-                                       typename Traits::XmmRegister dst,
-                                       typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::andnps(Type Ty, XmmRegister dst,
+                                          XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1308,10 +1271,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::andnps(Type Ty,
-                                       typename Traits::XmmRegister dst,
-                                       const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::andnps(Type Ty, XmmRegister dst,
+                                          const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1322,9 +1284,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::andps(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::andps(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1334,9 +1296,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::andps(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::andps(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1347,9 +1309,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::orps(Type Ty, typename Traits::XmmRegister dst,
-                                     typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::orps(Type Ty, XmmRegister dst,
+                                        XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1359,9 +1321,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::orps(Type Ty, typename Traits::XmmRegister dst,
-                                     const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::orps(Type Ty, XmmRegister dst,
+                                        const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1372,10 +1334,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::blendvps(Type /* Ty */,
-                                         typename Traits::XmmRegister dst,
-                                         typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::blendvps(Type /* Ty */, XmmRegister dst,
+                                            XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1385,10 +1346,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::blendvps(Type /* Ty */,
-                                         typename Traits::XmmRegister dst,
-                                         const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::blendvps(Type /* Ty */, XmmRegister dst,
+                                            const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -1399,10 +1359,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pblendvb(Type /* Ty */,
-                                         typename Traits::XmmRegister dst,
-                                         typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pblendvb(Type /* Ty */, XmmRegister dst,
+                                            XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1412,10 +1371,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pblendvb(Type /* Ty */,
-                                         typename Traits::XmmRegister dst,
-                                         const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pblendvb(Type /* Ty */, XmmRegister dst,
+                                            const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -1426,10 +1384,10 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cmpps(
-    Type Ty, typename Traits::XmmRegister dst, typename Traits::XmmRegister src,
-    typename Traits::Cond::CmppsCond CmpCondition) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cmpps(Type Ty, XmmRegister dst,
+                                         XmmRegister src,
+                                         CmppsCond CmpCondition) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_f64)
     emitUint8(0x66);
@@ -1440,11 +1398,10 @@
   emitUint8(CmpCondition);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cmpps(
-    Type Ty, typename Traits::XmmRegister dst,
-    const typename Traits::Address &src,
-    typename Traits::Cond::CmppsCond CmpCondition) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cmpps(Type Ty, XmmRegister dst,
+                                         const Address &src,
+                                         CmppsCond CmpCondition) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_f64)
     emitUint8(0x66);
@@ -1456,8 +1413,8 @@
   emitUint8(CmpCondition);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sqrtps(typename Traits::XmmRegister dst) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sqrtps(XmmRegister dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, dst);
   emitUint8(0x0F);
@@ -1465,8 +1422,8 @@
   emitXmmRegisterOperand(dst, dst);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::rsqrtps(typename Traits::XmmRegister dst) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::rsqrtps(XmmRegister dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, dst);
   emitUint8(0x0F);
@@ -1474,8 +1431,8 @@
   emitXmmRegisterOperand(dst, dst);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::reciprocalps(typename Traits::XmmRegister dst) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::reciprocalps(XmmRegister dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, dst);
   emitUint8(0x0F);
@@ -1483,9 +1440,8 @@
   emitXmmRegisterOperand(dst, dst);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movhlps(typename Traits::XmmRegister dst,
-                                        typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movhlps(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1493,9 +1449,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movlhps(typename Traits::XmmRegister dst,
-                                        typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movlhps(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1503,9 +1458,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::unpcklps(typename Traits::XmmRegister dst,
-                                         typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::unpcklps(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1513,9 +1467,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::unpckhps(typename Traits::XmmRegister dst,
-                                         typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::unpckhps(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1523,9 +1476,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::unpcklpd(typename Traits::XmmRegister dst,
-                                         typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::unpcklpd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1534,9 +1486,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::unpckhpd(typename Traits::XmmRegister dst,
-                                         typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::unpckhpd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1545,10 +1496,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::set1ps(typename Traits::XmmRegister dst,
-                                       typename Traits::GPRRegister tmp1,
-                                       const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::set1ps(XmmRegister dst, GPRRegister tmp1,
+                                          const Immediate &imm) {
   // Load 32-bit immediate value into tmp1.
   mov(IceType_i32, tmp1, imm);
   // Move value from tmp1 into dst.
@@ -1557,11 +1507,10 @@
   shufps(RexTypeIrrelevant, dst, dst, Immediate(0x0));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pshufd(Type /* Ty */,
-                                       typename Traits::XmmRegister dst,
-                                       typename Traits::XmmRegister src,
-                                       const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst,
+                                          XmmRegister src,
+                                          const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1572,11 +1521,10 @@
   emitUint8(imm.value());
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pshufd(Type /* Ty */,
-                                       typename Traits::XmmRegister dst,
-                                       const typename Traits::Address &src,
-                                       const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst,
+                                          const Address &src,
+                                          const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -1588,11 +1536,10 @@
   emitUint8(imm.value());
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shufps(Type /* Ty */,
-                                       typename Traits::XmmRegister dst,
-                                       typename Traits::XmmRegister src,
-                                       const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
+                                          XmmRegister src,
+                                          const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1602,11 +1549,10 @@
   emitUint8(imm.value());
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shufps(Type /* Ty */,
-                                       typename Traits::XmmRegister dst,
-                                       const typename Traits::Address &src,
-                                       const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
+                                          const Address &src,
+                                          const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1617,8 +1563,8 @@
   emitUint8(imm.value());
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sqrtpd(typename Traits::XmmRegister dst) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sqrtpd(XmmRegister dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, dst);
@@ -1627,10 +1573,9 @@
   emitXmmRegisterOperand(dst, dst);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cvtdq2ps(Type /* Ignore */,
-                                         typename Traits::XmmRegister dst,
-                                         typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
+                                            XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1638,10 +1583,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cvtdq2ps(Type /* Ignore */,
-                                         typename Traits::XmmRegister dst,
-                                         const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
+                                            const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1650,10 +1594,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cvttps2dq(Type /* Ignore */,
-                                          typename Traits::XmmRegister dst,
-                                          typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cvttps2dq(Type /* Ignore */, XmmRegister dst,
+                                             XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF3);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1662,10 +1605,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cvttps2dq(Type /* Ignore */,
-                                          typename Traits::XmmRegister dst,
-                                          const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cvttps2dq(Type /* Ignore */, XmmRegister dst,
+                                             const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF3);
   emitAddrSizeOverridePrefix();
@@ -1675,11 +1617,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cvtsi2ss(Type DestTy,
-                                         typename Traits::XmmRegister dst,
-                                         Type SrcTy,
-                                         typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cvtsi2ss(Type DestTy, XmmRegister dst,
+                                            Type SrcTy, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
   emitRexRB(SrcTy, dst, src);
@@ -1688,11 +1628,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cvtsi2ss(Type DestTy,
-                                         typename Traits::XmmRegister dst,
-                                         Type SrcTy,
-                                         const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cvtsi2ss(Type DestTy, XmmRegister dst,
+                                            Type SrcTy, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -1702,10 +1640,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cvtfloat2float(
-    Type SrcTy, typename Traits::XmmRegister dst,
-    typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cvtfloat2float(Type SrcTy, XmmRegister dst,
+                                                  XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // ss2sd or sd2ss
   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
@@ -1715,10 +1652,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cvtfloat2float(
-    Type SrcTy, typename Traits::XmmRegister dst,
-    const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cvtfloat2float(Type SrcTy, XmmRegister dst,
+                                                  const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -1728,11 +1664,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cvttss2si(Type DestTy,
-                                          typename Traits::GPRRegister dst,
-                                          Type SrcTy,
-                                          typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cvttss2si(Type DestTy, GPRRegister dst,
+                                             Type SrcTy, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   emitRexRB(DestTy, dst, src);
@@ -1741,11 +1675,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cvttss2si(Type DestTy,
-                                          typename Traits::GPRRegister dst,
-                                          Type SrcTy,
-                                          const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cvttss2si(Type DestTy, GPRRegister dst,
+                                             Type SrcTy, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -1755,9 +1687,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::ucomiss(Type Ty, typename Traits::XmmRegister a,
-                                        typename Traits::XmmRegister b) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::ucomiss(Type Ty, XmmRegister a,
+                                           XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_f64)
     emitUint8(0x66);
@@ -1767,9 +1699,9 @@
   emitXmmRegisterOperand(a, b);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::ucomiss(Type Ty, typename Traits::XmmRegister a,
-                                        const typename Traits::Address &b) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::ucomiss(Type Ty, XmmRegister a,
+                                           const Address &b) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_f64)
     emitUint8(0x66);
@@ -1780,9 +1712,8 @@
   emitOperand(gprEncoding(a), b);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movmskpd(typename Traits::GPRRegister dst,
-                                         typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movmskpd(GPRRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1791,9 +1722,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::movmskps(typename Traits::GPRRegister dst,
-                                         typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::movmskps(GPRRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1801,10 +1731,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sqrtss(Type Ty,
-                                       typename Traits::XmmRegister dst,
-                                       const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sqrtss(Type Ty, XmmRegister dst,
+                                          const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitAddrSizeOverridePrefix();
@@ -1814,10 +1743,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sqrtss(Type Ty,
-                                       typename Traits::XmmRegister dst,
-                                       typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sqrtss(Type Ty, XmmRegister dst,
+                                          XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1826,9 +1754,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::xorps(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::xorps(Type Ty, XmmRegister dst,
+                                         const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1839,9 +1767,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::xorps(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::xorps(Type Ty, XmmRegister dst,
+                                         XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1851,11 +1779,10 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::insertps(Type Ty,
-                                         typename Traits::XmmRegister dst,
-                                         typename Traits::XmmRegister src,
-                                         const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::insertps(Type Ty, XmmRegister dst,
+                                            XmmRegister src,
+                                            const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_uint8());
   assert(isVectorFloatingType(Ty));
@@ -1869,11 +1796,10 @@
   emitUint8(imm.value());
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::insertps(Type Ty,
-                                         typename Traits::XmmRegister dst,
-                                         const typename Traits::Address &src,
-                                         const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::insertps(Type Ty, XmmRegister dst,
+                                            const Address &src,
+                                            const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_uint8());
   assert(isVectorFloatingType(Ty));
@@ -1888,10 +1814,10 @@
   emitUint8(imm.value());
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pinsr(Type Ty, typename Traits::XmmRegister dst,
-                                      typename Traits::GPRRegister src,
-                                      const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pinsr(Type Ty, XmmRegister dst,
+                                         GPRRegister src,
+                                         const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_uint8());
   emitUint8(0x66);
@@ -1907,10 +1833,10 @@
   emitUint8(imm.value());
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pinsr(Type Ty, typename Traits::XmmRegister dst,
-                                      const typename Traits::Address &src,
-                                      const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pinsr(Type Ty, XmmRegister dst,
+                                         const Address &src,
+                                         const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_uint8());
   emitUint8(0x66);
@@ -1927,10 +1853,10 @@
   emitUint8(imm.value());
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pextr(Type Ty, typename Traits::GPRRegister dst,
-                                      typename Traits::XmmRegister src,
-                                      const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pextr(Type Ty, GPRRegister dst,
+                                         XmmRegister src,
+                                         const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_uint8());
   if (Ty == IceType_i16) {
@@ -1953,9 +1879,8 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pmovsxdq(typename Traits::XmmRegister dst,
-                                         typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pmovsxdq(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1965,10 +1890,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pcmpeq(Type Ty,
-                                       typename Traits::XmmRegister dst,
-                                       typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pcmpeq(Type Ty, XmmRegister dst,
+                                          XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1983,10 +1907,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pcmpeq(Type Ty,
-                                       typename Traits::XmmRegister dst,
-                                       const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pcmpeq(Type Ty, XmmRegister dst,
+                                          const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -2002,10 +1925,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pcmpgt(Type Ty,
-                                       typename Traits::XmmRegister dst,
-                                       typename Traits::XmmRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst,
+                                          XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -2020,10 +1942,9 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::pcmpgt(Type Ty,
-                                       typename Traits::XmmRegister dst,
-                                       const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst,
+                                          const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitAddrSizeOverridePrefix();
@@ -2039,10 +1960,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::roundsd(typename Traits::XmmRegister dst,
-                                        typename Traits::XmmRegister src,
-                                        RoundingMode mode) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::roundsd(XmmRegister dst, XmmRegister src,
+                                           RoundingMode mode) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -2054,90 +1974,88 @@
   emitUint8(static_cast<uint8_t>(mode) | 0x8);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T, typename>
-void AssemblerX86Base<Machine>::fnstcw(const typename T::Address &dst) {
+void AssemblerX86Base<TraitsType>::fnstcw(const typename T::Address &dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitUint8(0xD9);
   emitOperand(7, dst);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T, typename>
-void AssemblerX86Base<Machine>::fldcw(const typename T::Address &src) {
+void AssemblerX86Base<TraitsType>::fldcw(const typename T::Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitUint8(0xD9);
   emitOperand(5, src);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T, typename>
-void AssemblerX86Base<Machine>::fistpl(const typename T::Address &dst) {
+void AssemblerX86Base<TraitsType>::fistpl(const typename T::Address &dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitUint8(0xDF);
   emitOperand(7, dst);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T, typename>
-void AssemblerX86Base<Machine>::fistps(const typename T::Address &dst) {
+void AssemblerX86Base<TraitsType>::fistps(const typename T::Address &dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitUint8(0xDB);
   emitOperand(3, dst);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T, typename>
-void AssemblerX86Base<Machine>::fildl(const typename T::Address &src) {
+void AssemblerX86Base<TraitsType>::fildl(const typename T::Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitUint8(0xDF);
   emitOperand(5, src);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T, typename>
-void AssemblerX86Base<Machine>::filds(const typename T::Address &src) {
+void AssemblerX86Base<TraitsType>::filds(const typename T::Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitUint8(0xDB);
   emitOperand(0, src);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename, typename>
-void AssemblerX86Base<Machine>::fincstp() {
+void AssemblerX86Base<TraitsType>::fincstp() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xD9);
   emitUint8(0xF7);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <uint32_t Tag>
-void AssemblerX86Base<Machine>::arith_int(Type Ty,
-                                          typename Traits::GPRRegister reg,
-                                          const Immediate &imm) {
+void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg,
+                                             const Immediate &imm) {
   static_assert(Tag < 8, "Tag must be between 0..7");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
   emitRexB(Ty, reg);
   if (isByteSizedType(Ty)) {
-    emitComplexI8(Tag, typename Traits::Operand(reg), imm);
+    emitComplexI8(Tag, Operand(reg), imm);
   } else {
-    emitComplex(Ty, Tag, typename Traits::Operand(reg), imm);
+    emitComplex(Ty, Tag, Operand(reg), imm);
   }
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <uint32_t Tag>
-void AssemblerX86Base<Machine>::arith_int(Type Ty,
-                                          typename Traits::GPRRegister reg0,
-                                          typename Traits::GPRRegister reg1) {
+void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg0,
+                                             GPRRegister reg1) {
   static_assert(Tag < 8, "Tag must be between 0..7");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -2150,11 +2068,10 @@
   emitRegisterOperand(gprEncoding(reg0), gprEncoding(reg1));
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <uint32_t Tag>
-void AssemblerX86Base<Machine>::arith_int(
-    Type Ty, typename Traits::GPRRegister reg,
-    const typename Traits::Address &address) {
+void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg,
+                                             const Address &address) {
   static_assert(Tag < 8, "Tag must be between 0..7");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -2168,11 +2085,10 @@
   emitOperand(gprEncoding(reg), address);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <uint32_t Tag>
-void AssemblerX86Base<Machine>::arith_int(
-    Type Ty, const typename Traits::Address &address,
-    typename Traits::GPRRegister reg) {
+void AssemblerX86Base<TraitsType>::arith_int(Type Ty, const Address &address,
+                                             GPRRegister reg) {
   static_assert(Tag < 8, "Tag must be between 0..7");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -2186,10 +2102,10 @@
   emitOperand(gprEncoding(reg), address);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <uint32_t Tag>
-void AssemblerX86Base<Machine>::arith_int(
-    Type Ty, const typename Traits::Address &address, const Immediate &imm) {
+void AssemblerX86Base<TraitsType>::arith_int(Type Ty, const Address &address,
+                                             const Immediate &imm) {
   static_assert(Tag < 8, "Tag must be between 0..7");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -2203,41 +2119,39 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cmp(Type Ty, typename Traits::GPRRegister reg,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg,
+                                       const Immediate &imm) {
   arith_int<7>(Ty, reg, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cmp(Type Ty, typename Traits::GPRRegister reg0,
-                                    typename Traits::GPRRegister reg1) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg0,
+                                       GPRRegister reg1) {
   arith_int<7>(Ty, reg0, reg1);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cmp(Type Ty, typename Traits::GPRRegister reg,
-                                    const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg,
+                                       const Address &address) {
   arith_int<7>(Ty, reg, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cmp(Type Ty,
-                                    const typename Traits::Address &address,
-                                    typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cmp(Type Ty, const Address &address,
+                                       GPRRegister reg) {
   arith_int<7>(Ty, address, reg);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cmp(Type Ty,
-                                    const typename Traits::Address &address,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cmp(Type Ty, const Address &address,
+                                       const Immediate &imm) {
   arith_int<7>(Ty, address, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::test(Type Ty, typename Traits::GPRRegister reg1,
-                                     typename Traits::GPRRegister reg2) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::test(Type Ty, GPRRegister reg1,
+                                        GPRRegister reg2) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2249,10 +2163,9 @@
   emitRegisterOperand(gprEncoding(reg1), gprEncoding(reg2));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::test(Type Ty,
-                                     const typename Traits::Address &addr,
-                                     typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::test(Type Ty, const Address &addr,
+                                        GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2265,9 +2178,9 @@
   emitOperand(gprEncoding(reg), addr);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::test(Type Ty, typename Traits::GPRRegister reg,
-                                     const Immediate &immediate) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::test(Type Ty, GPRRegister reg,
+                                        const Immediate &immediate) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // For registers that have a byte variant (EAX, EBX, ECX, and EDX) we only
   // test the byte register to keep the encoding short. This is legal even if
@@ -2300,10 +2213,9 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::test(Type Ty,
-                                     const typename Traits::Address &addr,
-                                     const Immediate &immediate) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::test(Type Ty, const Address &addr,
+                                        const Immediate &immediate) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // If the immediate is short, we only test the byte addr to keep the encoding
   // short.
@@ -2325,258 +2237,244 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::And(Type Ty, typename Traits::GPRRegister dst,
-                                    typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
+                                       GPRRegister src) {
   arith_int<4>(Ty, dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::And(Type Ty, typename Traits::GPRRegister dst,
-                                    const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
+                                       const Address &address) {
   arith_int<4>(Ty, dst, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::And(Type Ty, typename Traits::GPRRegister dst,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
+                                       const Immediate &imm) {
   arith_int<4>(Ty, dst, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::And(Type Ty,
-                                    const typename Traits::Address &address,
-                                    typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::And(Type Ty, const Address &address,
+                                       GPRRegister reg) {
   arith_int<4>(Ty, address, reg);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::And(Type Ty,
-                                    const typename Traits::Address &address,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::And(Type Ty, const Address &address,
+                                       const Immediate &imm) {
   arith_int<4>(Ty, address, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::Or(Type Ty, typename Traits::GPRRegister dst,
-                                   typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
+                                      GPRRegister src) {
   arith_int<1>(Ty, dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::Or(Type Ty, typename Traits::GPRRegister dst,
-                                   const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
+                                      const Address &address) {
   arith_int<1>(Ty, dst, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::Or(Type Ty, typename Traits::GPRRegister dst,
-                                   const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
+                                      const Immediate &imm) {
   arith_int<1>(Ty, dst, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::Or(Type Ty,
-                                   const typename Traits::Address &address,
-                                   typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::Or(Type Ty, const Address &address,
+                                      GPRRegister reg) {
   arith_int<1>(Ty, address, reg);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::Or(Type Ty,
-                                   const typename Traits::Address &address,
-                                   const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::Or(Type Ty, const Address &address,
+                                      const Immediate &imm) {
   arith_int<1>(Ty, address, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::Xor(Type Ty, typename Traits::GPRRegister dst,
-                                    typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
+                                       GPRRegister src) {
   arith_int<6>(Ty, dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::Xor(Type Ty, typename Traits::GPRRegister dst,
-                                    const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
+                                       const Address &address) {
   arith_int<6>(Ty, dst, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::Xor(Type Ty, typename Traits::GPRRegister dst,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
+                                       const Immediate &imm) {
   arith_int<6>(Ty, dst, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::Xor(Type Ty,
-                                    const typename Traits::Address &address,
-                                    typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::Xor(Type Ty, const Address &address,
+                                       GPRRegister reg) {
   arith_int<6>(Ty, address, reg);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::Xor(Type Ty,
-                                    const typename Traits::Address &address,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::Xor(Type Ty, const Address &address,
+                                       const Immediate &imm) {
   arith_int<6>(Ty, address, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::add(Type Ty, typename Traits::GPRRegister dst,
-                                    typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister dst,
+                                       GPRRegister src) {
   arith_int<0>(Ty, dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::add(Type Ty, typename Traits::GPRRegister reg,
-                                    const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister reg,
+                                       const Address &address) {
   arith_int<0>(Ty, reg, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::add(Type Ty, typename Traits::GPRRegister reg,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister reg,
+                                       const Immediate &imm) {
   arith_int<0>(Ty, reg, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::add(Type Ty,
-                                    const typename Traits::Address &address,
-                                    typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::add(Type Ty, const Address &address,
+                                       GPRRegister reg) {
   arith_int<0>(Ty, address, reg);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::add(Type Ty,
-                                    const typename Traits::Address &address,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::add(Type Ty, const Address &address,
+                                       const Immediate &imm) {
   arith_int<0>(Ty, address, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::adc(Type Ty, typename Traits::GPRRegister dst,
-                                    typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister dst,
+                                       GPRRegister src) {
   arith_int<2>(Ty, dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::adc(Type Ty, typename Traits::GPRRegister dst,
-                                    const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister dst,
+                                       const Address &address) {
   arith_int<2>(Ty, dst, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::adc(Type Ty, typename Traits::GPRRegister reg,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister reg,
+                                       const Immediate &imm) {
   arith_int<2>(Ty, reg, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::adc(Type Ty,
-                                    const typename Traits::Address &address,
-                                    typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::adc(Type Ty, const Address &address,
+                                       GPRRegister reg) {
   arith_int<2>(Ty, address, reg);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::adc(Type Ty,
-                                    const typename Traits::Address &address,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::adc(Type Ty, const Address &address,
+                                       const Immediate &imm) {
   arith_int<2>(Ty, address, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sub(Type Ty, typename Traits::GPRRegister dst,
-                                    typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister dst,
+                                       GPRRegister src) {
   arith_int<5>(Ty, dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sub(Type Ty, typename Traits::GPRRegister reg,
-                                    const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister reg,
+                                       const Address &address) {
   arith_int<5>(Ty, reg, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sub(Type Ty, typename Traits::GPRRegister reg,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister reg,
+                                       const Immediate &imm) {
   arith_int<5>(Ty, reg, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sub(Type Ty,
-                                    const typename Traits::Address &address,
-                                    typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sub(Type Ty, const Address &address,
+                                       GPRRegister reg) {
   arith_int<5>(Ty, address, reg);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sub(Type Ty,
-                                    const typename Traits::Address &address,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sub(Type Ty, const Address &address,
+                                       const Immediate &imm) {
   arith_int<5>(Ty, address, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sbb(Type Ty, typename Traits::GPRRegister dst,
-                                    typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister dst,
+                                       GPRRegister src) {
   arith_int<3>(Ty, dst, src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sbb(Type Ty, typename Traits::GPRRegister dst,
-                                    const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister dst,
+                                       const Address &address) {
   arith_int<3>(Ty, dst, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sbb(Type Ty, typename Traits::GPRRegister reg,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister reg,
+                                       const Immediate &imm) {
   arith_int<3>(Ty, reg, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sbb(Type Ty,
-                                    const typename Traits::Address &address,
-                                    typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sbb(Type Ty, const Address &address,
+                                       GPRRegister reg) {
   arith_int<3>(Ty, address, reg);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sbb(Type Ty,
-                                    const typename Traits::Address &address,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sbb(Type Ty, const Address &address,
+                                       const Immediate &imm) {
   arith_int<3>(Ty, address, imm);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::cbw() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::cbw() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitOperandSizeOverride();
   emitUint8(0x98);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::cwd() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::cwd() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitOperandSizeOverride();
   emitUint8(0x99);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::cdq() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::cdq() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x99);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T>
 typename std::enable_if<T::Is64Bit, void>::type
-AssemblerX86Base<Machine>::cqo() {
+AssemblerX86Base<TraitsType>::cqo() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(RexTypeForceRexW, RexRegIrrelevant);
   emitUint8(0x99);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::div(Type Ty, typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::div(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2588,9 +2486,8 @@
   emitRegisterOperand(6, gprEncoding(reg));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::div(Type Ty,
-                                    const typename Traits::Address &addr) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::div(Type Ty, const Address &addr) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2603,9 +2500,8 @@
   emitOperand(6, addr);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::idiv(Type Ty,
-                                     typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::idiv(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2617,9 +2513,8 @@
   emitRegisterOperand(7, gprEncoding(reg));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::idiv(Type Ty,
-                                     const typename Traits::Address &addr) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::idiv(Type Ty, const Address &addr) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2632,9 +2527,9 @@
   emitOperand(7, addr);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister dst,
-                                     typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
+                                        GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -2646,9 +2541,9 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister reg,
-                                     const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg,
+                                        const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -2661,9 +2556,9 @@
   emitOperand(gprEncoding(reg), address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister reg,
-                                     const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg,
+                                        const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -2680,9 +2575,8 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::imul(Type Ty,
-                                     typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2694,9 +2588,8 @@
   emitRegisterOperand(5, gprEncoding(reg));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::imul(Type Ty,
-                                     const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::imul(Type Ty, const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2709,10 +2602,9 @@
   emitOperand(5, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister dst,
-                                     typename Traits::GPRRegister src,
-                                     const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
+                                        GPRRegister src, const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -2729,10 +2621,10 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister dst,
-                                     const typename Traits::Address &address,
-                                     const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
+                                        const Address &address,
+                                        const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -2750,8 +2642,8 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::mul(Type Ty, typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::mul(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2763,9 +2655,8 @@
   emitRegisterOperand(4, gprEncoding(reg));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::mul(Type Ty,
-                                    const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::mul(Type Ty, const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2778,15 +2669,15 @@
   emitOperand(4, address);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename, typename>
-void AssemblerX86Base<Machine>::incl(typename Traits::GPRRegister reg) {
+void AssemblerX86Base<TraitsType>::incl(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x40 + reg);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::incl(const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::incl(const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(IceType_i32, address, RexRegIrrelevant);
@@ -2794,15 +2685,15 @@
   emitOperand(0, address);
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename, typename>
-void AssemblerX86Base<Machine>::decl(typename Traits::GPRRegister reg) {
+void AssemblerX86Base<TraitsType>::decl(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x48 + reg);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::decl(const typename Traits::Address &address) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::decl(const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitAddrSizeOverridePrefix();
   emitRex(IceType_i32, address, RexRegIrrelevant);
@@ -2810,89 +2701,81 @@
   emitOperand(1, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::rol(Type Ty, typename Traits::GPRRegister reg,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::rol(Type Ty, GPRRegister reg,
+                                       const Immediate &imm) {
   emitGenericShift(0, Ty, reg, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::rol(Type Ty,
-                                    typename Traits::GPRRegister operand,
-                                    typename Traits::GPRRegister shifter) {
-  emitGenericShift(0, Ty, typename Traits::Operand(operand), shifter);
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::rol(Type Ty, GPRRegister operand,
+                                       GPRRegister shifter) {
+  emitGenericShift(0, Ty, Operand(operand), shifter);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::rol(Type Ty,
-                                    const typename Traits::Address &operand,
-                                    typename Traits::GPRRegister shifter) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::rol(Type Ty, const Address &operand,
+                                       GPRRegister shifter) {
   emitGenericShift(0, Ty, operand, shifter);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shl(Type Ty, typename Traits::GPRRegister reg,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shl(Type Ty, GPRRegister reg,
+                                       const Immediate &imm) {
   emitGenericShift(4, Ty, reg, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shl(Type Ty,
-                                    typename Traits::GPRRegister operand,
-                                    typename Traits::GPRRegister shifter) {
-  emitGenericShift(4, Ty, typename Traits::Operand(operand), shifter);
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shl(Type Ty, GPRRegister operand,
+                                       GPRRegister shifter) {
+  emitGenericShift(4, Ty, Operand(operand), shifter);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shl(Type Ty,
-                                    const typename Traits::Address &operand,
-                                    typename Traits::GPRRegister shifter) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shl(Type Ty, const Address &operand,
+                                       GPRRegister shifter) {
   emitGenericShift(4, Ty, operand, shifter);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shr(Type Ty, typename Traits::GPRRegister reg,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shr(Type Ty, GPRRegister reg,
+                                       const Immediate &imm) {
   emitGenericShift(5, Ty, reg, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shr(Type Ty,
-                                    typename Traits::GPRRegister operand,
-                                    typename Traits::GPRRegister shifter) {
-  emitGenericShift(5, Ty, typename Traits::Operand(operand), shifter);
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shr(Type Ty, GPRRegister operand,
+                                       GPRRegister shifter) {
+  emitGenericShift(5, Ty, Operand(operand), shifter);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shr(Type Ty,
-                                    const typename Traits::Address &operand,
-                                    typename Traits::GPRRegister shifter) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shr(Type Ty, const Address &operand,
+                                       GPRRegister shifter) {
   emitGenericShift(5, Ty, operand, shifter);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sar(Type Ty, typename Traits::GPRRegister reg,
-                                    const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sar(Type Ty, GPRRegister reg,
+                                       const Immediate &imm) {
   emitGenericShift(7, Ty, reg, imm);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sar(Type Ty,
-                                    typename Traits::GPRRegister operand,
-                                    typename Traits::GPRRegister shifter) {
-  emitGenericShift(7, Ty, typename Traits::Operand(operand), shifter);
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sar(Type Ty, GPRRegister operand,
+                                       GPRRegister shifter) {
+  emitGenericShift(7, Ty, Operand(operand), shifter);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::sar(Type Ty,
-                                    const typename Traits::Address &address,
-                                    typename Traits::GPRRegister shifter) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::sar(Type Ty, const Address &address,
+                                       GPRRegister shifter) {
   emitGenericShift(7, Ty, address, shifter);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shld(Type Ty, typename Traits::GPRRegister dst,
-                                     typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shld(Type Ty, GPRRegister dst,
+                                        GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -2903,10 +2786,9 @@
   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shld(Type Ty, typename Traits::GPRRegister dst,
-                                     typename Traits::GPRRegister src,
-                                     const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shld(Type Ty, GPRRegister dst,
+                                        GPRRegister src, const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   assert(imm.is_int8());
@@ -2919,10 +2801,9 @@
   emitUint8(imm.value() & 0xFF);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shld(Type Ty,
-                                     const typename Traits::Address &operand,
-                                     typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shld(Type Ty, const Address &operand,
+                                        GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -2934,9 +2815,9 @@
   emitOperand(gprEncoding(src), operand);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shrd(Type Ty, typename Traits::GPRRegister dst,
-                                     typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shrd(Type Ty, GPRRegister dst,
+                                        GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -2947,10 +2828,9 @@
   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shrd(Type Ty, typename Traits::GPRRegister dst,
-                                     typename Traits::GPRRegister src,
-                                     const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shrd(Type Ty, GPRRegister dst,
+                                        GPRRegister src, const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   assert(imm.is_int8());
@@ -2963,10 +2843,9 @@
   emitUint8(imm.value() & 0xFF);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::shrd(Type Ty,
-                                     const typename Traits::Address &dst,
-                                     typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::shrd(Type Ty, const Address &dst,
+                                        GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -2978,8 +2857,8 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::neg(Type Ty, typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::neg(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2991,9 +2870,8 @@
   emitRegisterOperand(3, gprEncoding(reg));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::neg(Type Ty,
-                                    const typename Traits::Address &addr) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::neg(Type Ty, const Address &addr) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3006,17 +2884,16 @@
   emitOperand(3, addr);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::notl(typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::notl(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(IceType_i32, reg);
   emitUint8(0xF7);
   emitUint8(0xD0 | gprEncoding(reg));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::bswap(Type Ty,
-                                      typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::bswap(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
   emitRexB(Ty, reg);
@@ -3024,9 +2901,9 @@
   emitUint8(0xC8 | gprEncoding(reg));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::bsf(Type Ty, typename Traits::GPRRegister dst,
-                                    typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::bsf(Type Ty, GPRRegister dst,
+                                       GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -3038,9 +2915,9 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::bsf(Type Ty, typename Traits::GPRRegister dst,
-                                    const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::bsf(Type Ty, GPRRegister dst,
+                                       const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -3053,9 +2930,9 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::bsr(Type Ty, typename Traits::GPRRegister dst,
-                                    typename Traits::GPRRegister src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::bsr(Type Ty, GPRRegister dst,
+                                       GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -3067,9 +2944,9 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::bsr(Type Ty, typename Traits::GPRRegister dst,
-                                    const typename Traits::Address &src) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::bsr(Type Ty, GPRRegister dst,
+                                       const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -3082,9 +2959,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::bt(typename Traits::GPRRegister base,
-                                   typename Traits::GPRRegister offset) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::bt(GPRRegister base, GPRRegister offset) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(IceType_i32, offset, base);
   emitUint8(0x0F);
@@ -3092,13 +2968,13 @@
   emitRegisterOperand(gprEncoding(offset), gprEncoding(base));
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::ret() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::ret() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xC3);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::ret(const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::ret(const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xC2);
   assert(imm.is_uint16());
@@ -3106,7 +2982,8 @@
   emitUint8((imm.value() >> 8) & 0xFF);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::nop(int size) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::nop(int size) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // There are nops up to size 15, but for now just provide up to size 8.
   assert(0 < size && size <= MAX_NOP_SIZE);
@@ -3168,25 +3045,25 @@
   }
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::int3() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::int3() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xCC);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::hlt() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::hlt() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF4);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::ud2() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::ud2() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x0F);
   emitUint8(0x0B);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::j(typename Traits::Cond::BrCond condition,
-                                  Label *label, bool near) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::j(BrCond condition, Label *label,
+                                     bool near) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (label->isBound()) {
     static const int kShortSize = 2;
@@ -3218,9 +3095,9 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::j(typename Traits::Cond::BrCond condition,
-                                  const ConstantRelocatable *label) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::j(BrCond condition,
+                                     const ConstantRelocatable *label) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x0F);
   emitUint8(0x80 + condition);
@@ -3228,16 +3105,16 @@
   emitInt32(-4);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::jmp(typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::jmp(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(RexTypeIrrelevant, reg);
   emitUint8(0xFF);
   emitRegisterOperand(4, gprEncoding(reg));
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::jmp(Label *label, bool near) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::jmp(Label *label, bool near) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (label->isBound()) {
     static const int kShortSize = 2;
@@ -3260,31 +3137,29 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::jmp(const ConstantRelocatable *label) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::jmp(const ConstantRelocatable *label) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xE9);
   emitFixup(this->createFixup(Traits::PcRelFixup, label));
   emitInt32(-4);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::mfence() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::mfence() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x0F);
   emitUint8(0xAE);
   emitUint8(0xF0);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::lock() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::lock() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF0);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cmpxchg(Type Ty,
-                                        const typename Traits::Address &address,
-                                        typename Traits::GPRRegister reg,
-                                        bool Locked) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cmpxchg(Type Ty, const Address &address,
+                                           GPRRegister reg, bool Locked) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3300,9 +3175,9 @@
   emitOperand(gprEncoding(reg), address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::cmpxchg8b(
-    const typename Traits::Address &address, bool Locked) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::cmpxchg8b(const Address &address,
+                                             bool Locked) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Locked)
     emitUint8(0xF0);
@@ -3313,11 +3188,9 @@
   emitOperand(1, address);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::xadd(Type Ty,
-                                     const typename Traits::Address &addr,
-                                     typename Traits::GPRRegister reg,
-                                     bool Locked) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::xadd(Type Ty, const Address &addr,
+                                        GPRRegister reg, bool Locked) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3333,9 +3206,9 @@
   emitOperand(gprEncoding(reg), addr);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::xchg(Type Ty, typename Traits::GPRRegister reg0,
-                                     typename Traits::GPRRegister reg1) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::xchg(Type Ty, GPRRegister reg0,
+                                        GPRRegister reg1) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3356,10 +3229,9 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::xchg(Type Ty,
-                                     const typename Traits::Address &addr,
-                                     typename Traits::GPRRegister reg) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::xchg(Type Ty, const Address &addr,
+                                        GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3372,14 +3244,13 @@
   emitOperand(gprEncoding(reg), addr);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::iaca_start() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::iaca_start() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x0F);
   emitUint8(0x0B);
 
   // mov $111, ebx
-  constexpr typename Traits::GPRRegister dst =
-      Traits::GPRRegister::Encoded_Reg_ebx;
+  constexpr GPRRegister dst = Traits::GPRRegister::Encoded_Reg_ebx;
   constexpr Type Ty = IceType_i32;
   emitRexB(Ty, dst);
   emitUint8(0xB8 + gprEncoding(dst));
@@ -3390,12 +3261,11 @@
   emitUint8(0x90);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::iaca_end() {
+template <typename TraitsType> void AssemblerX86Base<TraitsType>::iaca_end() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
 
   // mov $222, ebx
-  constexpr typename Traits::GPRRegister dst =
-      Traits::GPRRegister::Encoded_Reg_ebx;
+  constexpr GPRRegister dst = Traits::GPRRegister::Encoded_Reg_ebx;
   constexpr Type Ty = IceType_i32;
   emitRexB(Ty, dst);
   emitUint8(0xB8 + gprEncoding(dst));
@@ -3409,14 +3279,14 @@
   emitUint8(0x0B);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::emitSegmentOverride(uint8_t prefix) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::emitSegmentOverride(uint8_t prefix) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(prefix);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::align(intptr_t alignment, intptr_t offset) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::align(intptr_t alignment, intptr_t offset) {
   assert(llvm::isPowerOf2_32(alignment));
   intptr_t pos = offset + Buffer.getPosition();
   intptr_t mod = pos & (alignment - 1);
@@ -3434,7 +3304,8 @@
   assert(((offset + Buffer.getPosition()) & (alignment - 1)) == 0);
 }
 
-template <class Machine> void AssemblerX86Base<Machine>::bind(Label *label) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::bind(Label *label) {
   intptr_t bound = Buffer.size();
   assert(!label->isBound()); // Labels can only be bound once.
   while (label->isLinked()) {
@@ -3452,9 +3323,8 @@
   label->bindTo(bound);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::emitOperand(
-    int rm, const typename Traits::Operand &operand) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::emitOperand(int rm, const Operand &operand) {
   assert(rm >= 0 && rm < 8);
   const intptr_t length = operand.length_;
   assert(length > 0);
@@ -3479,8 +3349,9 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::emitImmediate(Type Ty, const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::emitImmediate(Type Ty,
+                                                 const Immediate &imm) {
   if (Ty == IceType_i16) {
     assert(!imm.fixup());
     emitInt16(imm.value());
@@ -3492,10 +3363,9 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::emitComplexI8(
-    int rm, const typename Traits::Operand &operand,
-    const Immediate &immediate) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::emitComplexI8(int rm, const Operand &operand,
+                                                 const Immediate &immediate) {
   assert(rm >= 0 && rm < 8);
   assert(immediate.is_int8());
   if (operand.IsRegister(Traits::Encoded_Reg_Accumulator)) {
@@ -3510,10 +3380,10 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::emitComplex(
-    Type Ty, int rm, const typename Traits::Operand &operand,
-    const Immediate &immediate) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::emitComplex(Type Ty, int rm,
+                                               const Operand &operand,
+                                               const Immediate &immediate) {
   assert(rm >= 0 && rm < 8);
   if (immediate.is_int8()) {
     // Use sign-extended 8-bit immediate.
@@ -3531,9 +3401,9 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::emitLabel(Label *label,
-                                          intptr_t instruction_size) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::emitLabel(Label *label,
+                                             intptr_t instruction_size) {
   if (label->isBound()) {
     intptr_t offset = label->getPosition() - Buffer.size();
     assert(offset <= 0);
@@ -3543,25 +3413,26 @@
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::emitLabelLink(Label *Label) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::emitLabelLink(Label *Label) {
   assert(!Label->isBound());
   intptr_t Position = Buffer.size();
   emitInt32(Label->Position);
   Label->linkTo(*this, Position);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::emitNearLabelLink(Label *Label) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::emitNearLabelLink(Label *Label) {
   assert(!Label->isBound());
   intptr_t Position = Buffer.size();
   emitUint8(0);
   Label->nearLinkTo(*this, Position);
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::emitGenericShift(
-    int rm, Type Ty, typename Traits::GPRRegister reg, const Immediate &imm) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::emitGenericShift(int rm, Type Ty,
+                                                    GPRRegister reg,
+                                                    const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // We don't assert that imm fits into 8 bits; instead, it gets masked below.
   // Note that we don't mask it further (e.g. to 5 bits) because we want the
@@ -3572,18 +3443,18 @@
   emitRexB(Ty, reg);
   if (imm.value() == 1) {
     emitUint8(isByteSizedArithType(Ty) ? 0xD0 : 0xD1);
-    emitOperand(rm, typename Traits::Operand(reg));
+    emitOperand(rm, Operand(reg));
   } else {
     emitUint8(isByteSizedArithType(Ty) ? 0xC0 : 0xC1);
-    emitOperand(rm, typename Traits::Operand(reg));
+    emitOperand(rm, Operand(reg));
     emitUint8(imm.value() & 0xFF);
   }
 }
 
-template <class Machine>
-void AssemblerX86Base<Machine>::emitGenericShift(
-    int rm, Type Ty, const typename Traits::Operand &operand,
-    typename Traits::GPRRegister shifter) {
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::emitGenericShift(int rm, Type Ty,
+                                                    const Operand &operand,
+                                                    GPRRegister shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(shifter == Traits::Encoded_Reg_Counter);
   (void)shifter;
@@ -3594,5 +3465,5 @@
   emitOperand(rm, operand);
 }
 
-} // end of namespace X86Internal
+} // end of namespace X86NAMESPACE
 } // end of namespace Ice
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index a09c5fc..4919600 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -24,6 +24,7 @@
 #include "IceTargetLoweringARM32.h"
 
 namespace Ice {
+namespace ARM32 {
 
 namespace {
 
@@ -2151,4 +2152,5 @@
 template class InstARM32CmpLike<InstARM32::Cmp>;
 template class InstARM32CmpLike<InstARM32::Tst>;
 
+} // end of namespace ARM32
 } // end of namespace Ice
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index a2aba57..75f48a7 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -25,6 +25,7 @@
 #include "IceOperand.h"
 
 namespace Ice {
+namespace ARM32 {
 
 class TargetARM32;
 
@@ -1376,6 +1377,7 @@
 template <> void InstARM32Movw::emit(const Cfg *Func) const;
 template <> void InstARM32Movt::emit(const Cfg *Func) const;
 
+} // end of namespace ARM32
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICEINSTARM32_H
diff --git a/src/IceInstMIPS32.cpp b/src/IceInstMIPS32.cpp
index 13faf28..b4c1a87 100644
--- a/src/IceInstMIPS32.cpp
+++ b/src/IceInstMIPS32.cpp
@@ -23,6 +23,7 @@
 #include <limits>
 
 namespace Ice {
+namespace MIPS32 {
 
 bool OperandMIPS32Mem::canHoldOffset(Type Ty, bool SignExt, int32_t Offset) {
   (void)SignExt;
@@ -274,4 +275,5 @@
   getSrc(0)->emit(Func);
 }
 
+} // end of namespace MIPS32
 } // end of namespace Ice
diff --git a/src/IceInstMIPS32.h b/src/IceInstMIPS32.h
index acd7bfd..aa707ae 100644
--- a/src/IceInstMIPS32.h
+++ b/src/IceInstMIPS32.h
@@ -25,6 +25,7 @@
 #include "IceOperand.h"
 
 namespace Ice {
+namespace MIPS32 {
 
 class TargetMIPS32;
 
@@ -395,6 +396,7 @@
   Variable *DestHi = nullptr;
 };
 
+} // end of namespace MIPS32
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICEINSTMIPS32_H
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 28fa07b..bf26f84 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -28,10 +28,10 @@
 
 namespace Ice {
 
-namespace X86Internal {
+namespace X8632 {
 
-const MachineTraits<TargetX8632>::InstBrAttributesType
-    MachineTraits<TargetX8632>::InstBrAttributes[] = {
+const TargetX8632Traits::InstBrAttributesType
+    TargetX8632Traits::InstBrAttributes[] = {
 #define X(val, encode, opp, dump, emit)                                        \
   { X8632::Traits::Cond::opp, dump, emit }                                     \
   ,
@@ -39,8 +39,8 @@
 #undef X
 };
 
-const MachineTraits<TargetX8632>::InstCmppsAttributesType
-    MachineTraits<TargetX8632>::InstCmppsAttributes[] = {
+const TargetX8632Traits::InstCmppsAttributesType
+    TargetX8632Traits::InstCmppsAttributes[] = {
 #define X(val, emit)                                                           \
   { emit }                                                                     \
   ,
@@ -48,8 +48,8 @@
 #undef X
 };
 
-const MachineTraits<TargetX8632>::TypeAttributesType
-    MachineTraits<TargetX8632>::TypeAttributes[] = {
+const TargetX8632Traits::TypeAttributesType
+    TargetX8632Traits::TypeAttributes[] = {
 #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld)             \
   { cvt, sdss, pdps, spsd, pack, width, fld }                                  \
   ,
@@ -57,27 +57,28 @@
 #undef X
 };
 
-const char *MachineTraits<TargetX8632>::InstSegmentRegNames[] = {
+const char *TargetX8632Traits::InstSegmentRegNames[] = {
 #define X(val, name, prefix) name,
     SEG_REGX8632_TABLE
 #undef X
 };
 
-uint8_t MachineTraits<TargetX8632>::InstSegmentPrefixes[] = {
+uint8_t TargetX8632Traits::InstSegmentPrefixes[] = {
 #define X(val, name, prefix) prefix,
     SEG_REGX8632_TABLE
 #undef X
 };
 
-void MachineTraits<TargetX8632>::X86Operand::dump(const Cfg *,
-                                                  Ostream &Str) const {
+void TargetX8632Traits::X86Operand::dump(const Cfg *, Ostream &Str) const {
   if (BuildDefs::dump())
     Str << "<OperandX8632>";
 }
 
-MachineTraits<TargetX8632>::X86OperandMem::X86OperandMem(
-    Cfg *Func, Type Ty, Variable *Base, Constant *Offset, Variable *Index,
-    uint16_t Shift, SegmentRegisters SegmentReg)
+TargetX8632Traits::X86OperandMem::X86OperandMem(Cfg *Func, Type Ty,
+                                                Variable *Base,
+                                                Constant *Offset,
+                                                Variable *Index, uint16_t Shift,
+                                                SegmentRegisters SegmentReg)
     : X86Operand(kMem, Ty), Base(Base), Offset(Offset), Index(Index),
       Shift(Shift), SegmentReg(SegmentReg), Randomized(false) {
   assert(Shift <= 3);
@@ -99,8 +100,9 @@
 }
 
 namespace {
-static int32_t GetRematerializableOffset(Variable *Var,
-                                         const Ice::TargetX8632 *Target) {
+static int32_t
+GetRematerializableOffset(Variable *Var,
+                          const Ice::X8632::TargetX8632 *Target) {
   int32_t Disp = Var->getStackOffset();
   SizeT RegNum = static_cast<SizeT>(Var->getRegNum());
   if (RegNum == Target->getFrameReg()) {
@@ -112,10 +114,11 @@
 }
 } // end of anonymous namespace
 
-void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const {
+void TargetX8632Traits::X86OperandMem::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
-  const auto *Target = static_cast<const Ice::TargetX8632 *>(Func->getTarget());
+  const auto *Target =
+      static_cast<const ::Ice::X8632::TargetX8632 *>(Func->getTarget());
   // If the base is rematerializable, we need to replace it with the correct
   // physical register (esp or ebp), and update the Offset.
   int32_t Disp = 0;
@@ -166,8 +169,8 @@
   }
 }
 
-void MachineTraits<TargetX8632>::X86OperandMem::dump(const Cfg *Func,
-                                                     Ostream &Str) const {
+void TargetX8632Traits::X86OperandMem::dump(const Cfg *Func,
+                                            Ostream &Str) const {
   if (!BuildDefs::dump())
     return;
   if (SegmentReg != DefaultSegment) {
@@ -177,7 +180,8 @@
   bool Dumped = false;
   Str << "[";
   int32_t Disp = 0;
-  const auto *Target = static_cast<const Ice::TargetX8632 *>(Func->getTarget());
+  const auto *Target =
+      static_cast<const ::Ice::X8632::TargetX8632 *>(Func->getTarget());
   if (getBase() && getBase()->isRematerializable()) {
     Disp += GetRematerializableOffset(getBase(), Target);
   }
@@ -230,20 +234,20 @@
   Str << "]";
 }
 
-void MachineTraits<TargetX8632>::X86OperandMem::emitSegmentOverride(
-    MachineTraits<TargetX8632>::Assembler *Asm) const {
+void TargetX8632Traits::X86OperandMem::emitSegmentOverride(
+    TargetX8632Traits::Assembler *Asm) const {
   if (SegmentReg != DefaultSegment) {
     assert(SegmentReg >= 0 && SegmentReg < SegReg_NUM);
     Asm->emitSegmentOverride(X8632::Traits::InstSegmentPrefixes[SegmentReg]);
   }
 }
 
-MachineTraits<TargetX8632>::Address
-MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress(
-    MachineTraits<TargetX8632>::Assembler *Asm,
+TargetX8632Traits::Address TargetX8632Traits::X86OperandMem::toAsmAddress(
+    TargetX8632Traits::Assembler *Asm,
     const Ice::TargetLowering *TargetLowering) const {
   int32_t Disp = 0;
-  const auto *Target = static_cast<const Ice::TargetX8632 *>(TargetLowering);
+  const auto *Target =
+      static_cast<const ::Ice::X8632::TargetX8632 *>(TargetLowering);
   if (getBase() && getBase()->isRematerializable()) {
     Disp += GetRematerializableOffset(getBase(), Target);
   }
@@ -284,8 +288,8 @@
   }
 }
 
-MachineTraits<TargetX8632>::Address
-MachineTraits<TargetX8632>::VariableSplit::toAsmAddress(const Cfg *Func) const {
+TargetX8632Traits::Address
+TargetX8632Traits::VariableSplit::toAsmAddress(const Cfg *Func) const {
   assert(!Var->hasReg());
   const ::Ice::TargetLowering *Target = Func->getTarget();
   int32_t Offset = Var->getStackOffset() + getOffset();
@@ -293,7 +297,7 @@
                                 Offset, AssemblerFixup::NoFixup);
 }
 
-void MachineTraits<TargetX8632>::VariableSplit::emit(const Cfg *Func) const {
+void TargetX8632Traits::VariableSplit::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -307,8 +311,8 @@
   Str << "(%" << Target->getRegName(Target->getFrameOrStackReg(), Ty) << ")";
 }
 
-void MachineTraits<TargetX8632>::VariableSplit::dump(const Cfg *Func,
-                                                     Ostream &Str) const {
+void TargetX8632Traits::VariableSplit::dump(const Cfg *Func,
+                                            Ostream &Str) const {
   if (!BuildDefs::dump())
     return;
   switch (Part) {
@@ -327,7 +331,7 @@
   Str << ")";
 }
 
-} // namespace X86Internal
+} // namespace X8632
 } // end of namespace Ice
 
-X86INSTS_DEFINE_STATIC_DATA(TargetX8632)
+X86INSTS_DEFINE_STATIC_DATA(X8632, X8632::Traits)
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 00d18a0..1007368 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -28,7 +28,9 @@
 
 #include "IceDefs.h"
 #include "IceInst.h"
+#define X86NAMESPACE X8632
 #include "IceInstX86Base.h"
+#undef X86NAMESPACE
 #include "IceOperand.h"
 #include "IceTargetLoweringX8632Traits.h"
 
diff --git a/src/IceInstX8664.cpp b/src/IceInstX8664.cpp
index c682d9e..e7130ed 100644
--- a/src/IceInstX8664.cpp
+++ b/src/IceInstX8664.cpp
@@ -29,10 +29,10 @@
 
 namespace Ice {
 
-namespace X86Internal {
+namespace X8664 {
 
-const MachineTraits<TargetX8664>::InstBrAttributesType
-    MachineTraits<TargetX8664>::InstBrAttributes[] = {
+const TargetX8664Traits::InstBrAttributesType
+    TargetX8664Traits::InstBrAttributes[] = {
 #define X(val, encode, opp, dump, emit)                                        \
   { X8664::Traits::Cond::opp, dump, emit }                                     \
   ,
@@ -40,8 +40,8 @@
 #undef X
 };
 
-const MachineTraits<TargetX8664>::InstCmppsAttributesType
-    MachineTraits<TargetX8664>::InstCmppsAttributes[] = {
+const TargetX8664Traits::InstCmppsAttributesType
+    TargetX8664Traits::InstCmppsAttributes[] = {
 #define X(val, emit)                                                           \
   { emit }                                                                     \
   ,
@@ -49,8 +49,8 @@
 #undef X
 };
 
-const MachineTraits<TargetX8664>::TypeAttributesType
-    MachineTraits<TargetX8664>::TypeAttributes[] = {
+const TargetX8664Traits::TypeAttributesType
+    TargetX8664Traits::TypeAttributes[] = {
 #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld)             \
   { cvt, sdss, pdps, spsd, pack, width, fld }                                  \
   ,
@@ -58,17 +58,15 @@
 #undef X
 };
 
-void MachineTraits<TargetX8664>::X86Operand::dump(const Cfg *,
-                                                  Ostream &Str) const {
+void TargetX8664Traits::X86Operand::dump(const Cfg *, Ostream &Str) const {
   if (BuildDefs::dump())
     Str << "<OperandX8664>";
 }
 
-MachineTraits<TargetX8664>::X86OperandMem::X86OperandMem(Cfg *Func, Type Ty,
-                                                         Variable *Base,
-                                                         Constant *Offset,
-                                                         Variable *Index,
-                                                         uint16_t Shift)
+TargetX8664Traits::X86OperandMem::X86OperandMem(Cfg *Func, Type Ty,
+                                                Variable *Base,
+                                                Constant *Offset,
+                                                Variable *Index, uint16_t Shift)
     : X86Operand(kMem, Ty), Base(Base), Offset(Offset), Index(Index),
       Shift(Shift) {
   assert(Shift <= 3);
@@ -90,8 +88,9 @@
 }
 
 namespace {
-static int32_t getRematerializableOffset(Variable *Var,
-                                         const Ice::TargetX8664 *Target) {
+static int32_t
+getRematerializableOffset(Variable *Var,
+                          const ::Ice::X8664::TargetX8664 *Target) {
   int32_t Disp = Var->getStackOffset();
   SizeT RegNum = static_cast<SizeT>(Var->getRegNum());
   if (RegNum == Target->getFrameReg()) {
@@ -103,10 +102,11 @@
 }
 } // end of anonymous namespace
 
-void MachineTraits<TargetX8664>::X86OperandMem::emit(const Cfg *Func) const {
+void TargetX8664Traits::X86OperandMem::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
-  const auto *Target = static_cast<const Ice::TargetX8664 *>(Func->getTarget());
+  const auto *Target =
+      static_cast<const ::Ice::X8664::TargetX8664 *>(Func->getTarget());
   // If the base is rematerializable, we need to replace it with the correct
   // physical register (stack or base pointer), and update the Offset.
   int32_t Disp = 0;
@@ -164,14 +164,15 @@
   }
 }
 
-void MachineTraits<TargetX8664>::X86OperandMem::dump(const Cfg *Func,
-                                                     Ostream &Str) const {
+void TargetX8664Traits::X86OperandMem::dump(const Cfg *Func,
+                                            Ostream &Str) const {
   if (!BuildDefs::dump())
     return;
   bool Dumped = false;
   Str << "[";
   int32_t Disp = 0;
-  const auto *Target = static_cast<const Ice::TargetX8664 *>(Func->getTarget());
+  const auto *Target =
+      static_cast<const ::Ice::X8664::TargetX8664 *>(Func->getTarget());
   if (getBase() && getBase()->isRematerializable()) {
     Disp += getRematerializableOffset(getBase(), Target);
   }
@@ -223,11 +224,11 @@
   Str << "]";
 }
 
-MachineTraits<TargetX8664>::Address
-MachineTraits<TargetX8664>::X86OperandMem::toAsmAddress(
-    MachineTraits<TargetX8664>::Assembler *Asm,
+TargetX8664Traits::Address TargetX8664Traits::X86OperandMem::toAsmAddress(
+    TargetX8664Traits::Assembler *Asm,
     const Ice::TargetLowering *TargetLowering) const {
-  const auto *Target = static_cast<const Ice::TargetX8664 *>(TargetLowering);
+  const auto *Target =
+      static_cast<const ::Ice::X8664::TargetX8664 *>(TargetLowering);
   int32_t Disp = 0;
   if (getBase() && getBase()->isRematerializable()) {
     Disp += getRematerializableOffset(getBase(), Target);
@@ -266,8 +267,8 @@
   }
 }
 
-MachineTraits<TargetX8664>::Address
-MachineTraits<TargetX8664>::VariableSplit::toAsmAddress(const Cfg *Func) const {
+TargetX8664Traits::Address
+TargetX8664Traits::VariableSplit::toAsmAddress(const Cfg *Func) const {
   assert(!Var->hasReg());
   const ::Ice::TargetLowering *Target = Func->getTarget();
   int32_t Offset = Var->getStackOffset() + getOffset();
@@ -275,7 +276,7 @@
                                 Offset, AssemblerFixup::NoFixup);
 }
 
-void MachineTraits<TargetX8664>::VariableSplit::emit(const Cfg *Func) const {
+void TargetX8664Traits::VariableSplit::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -289,8 +290,8 @@
   Str << "(%" << Target->getRegName(Target->getFrameOrStackReg(), Ty) << ")";
 }
 
-void MachineTraits<TargetX8664>::VariableSplit::dump(const Cfg *Func,
-                                                     Ostream &Str) const {
+void TargetX8664Traits::VariableSplit::dump(const Cfg *Func,
+                                            Ostream &Str) const {
   if (!BuildDefs::dump())
     return;
   switch (Part) {
@@ -309,7 +310,7 @@
   Str << ")";
 }
 
-} // namespace X86Internal
+} // namespace X8664
 } // end of namespace Ice
 
-X86INSTS_DEFINE_STATIC_DATA(TargetX8664)
+X86INSTS_DEFINE_STATIC_DATA(X8664, X8664::Traits)
diff --git a/src/IceInstX8664.h b/src/IceInstX8664.h
index e71255e..cdf986c 100644
--- a/src/IceInstX8664.h
+++ b/src/IceInstX8664.h
@@ -26,7 +26,9 @@
 
 #include "IceDefs.h"
 #include "IceInst.h"
+#define X86NAMESPACE X8664
 #include "IceInstX86Base.h"
+#undef X86NAMESPACE
 #include "IceOperand.h"
 #include "IceTargetLoweringX8664Traits.h"
 
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index de7b405..8cefabb 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -25,3423 +25,3458 @@
 
 namespace Ice {
 
-namespace X86Internal {
+#ifndef X86NAMESPACE
+#error "You must define the X86 Target namespace."
+#endif
 
-template <class Machine> struct MachineTraits;
+namespace X86NAMESPACE {
 
-template <class Machine> class InstX86Base : public InstTarget {
-  InstX86Base<Machine>() = delete;
-  InstX86Base<Machine>(const InstX86Base &) = delete;
-  InstX86Base &operator=(const InstX86Base &) = delete;
+template <typename TraitsType> struct InstImpl {
+  using Traits = TraitsType;
+  using Assembler = typename Traits::Assembler;
+  using AssemblerLabel = typename Assembler::Label;
+  using AssemblerImmediate = typename Assembler::Immediate;
+  using TargetLowering = typename Traits::TargetLowering;
+  using Address = typename Traits::Address;
+  using X86Operand = typename Traits::X86Operand;
+  using X86OperandMem = typename Traits::X86OperandMem;
+  using VariableSplit = typename Traits::VariableSplit;
 
-public:
-  using Traits = MachineTraits<Machine>;
-  using X86TargetLowering = typename Traits::TargetLowering;
+  using GPRRegister = typename Traits::RegisterSet::GPRRegister;
+  using RegisterSet = typename Traits::RegisterSet;
+  using XmmRegister = typename Traits::RegisterSet::XmmRegister;
 
-  enum InstKindX86 {
-    k__Start = Inst::Target,
-    Adc,
-    AdcRMW,
-    Add,
-    AddRMW,
-    Addps,
-    Addss,
-    And,
-    Andnps,
-    Andps,
-    AndRMW,
-    Blendvps,
-    Br,
-    Bsf,
-    Bsr,
-    Bswap,
-    Call,
-    Cbwdq,
-    Cmov,
-    Cmpps,
-    Cmpxchg,
-    Cmpxchg8b,
-    Cvt,
-    Div,
-    Divps,
-    Divss,
-    FakeRMW,
-    Fld,
-    Fstp,
-    Icmp,
-    Idiv,
-    Imul,
-    ImulImm,
-    Insertps,
-    Jmp,
-    Label,
-    Lea,
-    Load,
-    Mfence,
-    Minss,
-    Maxss,
-    Mov,
-    Movd,
-    Movp,
-    Movq,
-    MovssRegs,
-    Movsx,
-    Movzx,
-    Mul,
-    Mulps,
-    Mulss,
-    Neg,
-    Nop,
-    Or,
-    Orps,
-    OrRMW,
-    Padd,
-    Pand,
-    Pandn,
-    Pblendvb,
-    Pcmpeq,
-    Pcmpgt,
-    Pextr,
-    Pinsr,
-    Pmull,
-    Pmuludq,
-    Pop,
-    Por,
-    Pshufd,
-    Psll,
-    Psra,
-    Psrl,
-    Psub,
-    Push,
-    Pxor,
-    Ret,
-    Rol,
-    Sar,
-    Sbb,
-    SbbRMW,
-    Setcc,
-    Shl,
-    Shld,
-    Shr,
-    Shrd,
-    Shufps,
-    Sqrtss,
-    Store,
-    StoreP,
-    StoreQ,
-    Sub,
-    SubRMW,
-    Subps,
-    Subss,
-    Test,
-    Ucomiss,
-    UD2,
-    Xadd,
-    Xchg,
-    Xor,
-    Xorps,
-    XorRMW,
+  using Cond = typename Traits::Cond;
+  using BrCond = typename Traits::Cond::BrCond;
+  using CmppsCond = typename Traits::Cond::CmppsCond;
 
-    /// Intel Architecture Code Analyzer markers. These are not executable so
-    /// must only be used for analysis.
-    IacaStart,
-    IacaEnd
-  };
+  template <typename SReg_t, typename DReg_t>
+  using CastEmitterRegOp =
+      typename Traits::Assembler::template CastEmitterRegOp<SReg_t, DReg_t>;
+  template <typename SReg_t, typename DReg_t>
+  using ThreeOpImmEmitter =
+      typename Traits::Assembler::template ThreeOpImmEmitter<SReg_t, DReg_t>;
+  using GPREmitterAddrOp = typename Traits::Assembler::GPREmitterAddrOp;
+  using GPREmitterRegOp = typename Traits::Assembler::GPREmitterRegOp;
+  using GPREmitterShiftD = typename Traits::Assembler::GPREmitterShiftD;
+  using GPREmitterShiftOp = typename Traits::Assembler::GPREmitterShiftOp;
+  using GPREmitterOneOp = typename Traits::Assembler::GPREmitterOneOp;
+  using XmmEmitterRegOp = typename Traits::Assembler::XmmEmitterRegOp;
+  using XmmEmitterShiftOp = typename Traits::Assembler::XmmEmitterShiftOp;
+  using XmmEmitterMovOps = typename Traits::Assembler::XmmEmitterMovOps;
 
-  enum SseSuffix { None, Packed, Scalar, Integral };
+  class InstX86Base : public InstTarget {
+    InstX86Base() = delete;
+    InstX86Base(const InstX86Base &) = delete;
+    InstX86Base &operator=(const InstX86Base &) = delete;
 
-  static const char *getWidthString(Type Ty);
-  static const char *getFldString(Type Ty);
-  static typename Traits::Cond::BrCond
-  getOppositeCondition(typename Traits::Cond::BrCond Cond);
-  void dump(const Cfg *Func) const override;
+  public:
+    enum InstKindX86 {
+      k__Start = Inst::Target,
+      Adc,
+      AdcRMW,
+      Add,
+      AddRMW,
+      Addps,
+      Addss,
+      And,
+      Andnps,
+      Andps,
+      AndRMW,
+      Blendvps,
+      Br,
+      Bsf,
+      Bsr,
+      Bswap,
+      Call,
+      Cbwdq,
+      Cmov,
+      Cmpps,
+      Cmpxchg,
+      Cmpxchg8b,
+      Cvt,
+      Div,
+      Divps,
+      Divss,
+      FakeRMW,
+      Fld,
+      Fstp,
+      Icmp,
+      Idiv,
+      Imul,
+      ImulImm,
+      Insertps,
+      Jmp,
+      Label,
+      Lea,
+      Load,
+      Mfence,
+      Minss,
+      Maxss,
+      Mov,
+      Movd,
+      Movp,
+      Movq,
+      MovssRegs,
+      Movsx,
+      Movzx,
+      Mul,
+      Mulps,
+      Mulss,
+      Neg,
+      Nop,
+      Or,
+      Orps,
+      OrRMW,
+      Padd,
+      Pand,
+      Pandn,
+      Pblendvb,
+      Pcmpeq,
+      Pcmpgt,
+      Pextr,
+      Pinsr,
+      Pmull,
+      Pmuludq,
+      Pop,
+      Por,
+      Pshufd,
+      Psll,
+      Psra,
+      Psrl,
+      Psub,
+      Push,
+      Pxor,
+      Ret,
+      Rol,
+      Sar,
+      Sbb,
+      SbbRMW,
+      Setcc,
+      Shl,
+      Shld,
+      Shr,
+      Shrd,
+      Shufps,
+      Sqrtss,
+      Store,
+      StoreP,
+      StoreQ,
+      Sub,
+      SubRMW,
+      Subps,
+      Subss,
+      Test,
+      Ucomiss,
+      UD2,
+      Xadd,
+      Xchg,
+      Xor,
+      Xorps,
+      XorRMW,
 
-  // Shared emit routines for common forms of instructions.
-  void emitTwoAddress(const Cfg *Func, const char *Opcode,
-                      const char *Suffix = "") const;
+      /// Intel Architecture Code Analyzer markers. These are not executable so
+      /// must only be used for analysis.
+      IacaStart,
+      IacaEnd
+    };
 
-  static void
-  emitIASGPRShift(const Cfg *Func, Type Ty, const Variable *Var,
-                  const Operand *Src,
-                  const typename Traits::Assembler::GPREmitterShiftOp &Emitter);
+    enum SseSuffix { None, Packed, Scalar, Integral };
 
-  static X86TargetLowering *getTarget(const Cfg *Func) {
-    return static_cast<X86TargetLowering *>(Func->getTarget());
-  }
+    static const char *getWidthString(Type Ty);
+    static const char *getFldString(Type Ty);
+    static BrCond getOppositeCondition(BrCond Cond);
+    void dump(const Cfg *Func) const override;
 
-protected:
-  InstX86Base<Machine>(Cfg *Func, InstKindX86 Kind, SizeT Maxsrcs,
-                       Variable *Dest)
-      : InstTarget(Func, static_cast<InstKind>(Kind), Maxsrcs, Dest) {}
+    // Shared emit routines for common forms of instructions.
+    void emitTwoAddress(const Cfg *Func, const char *Opcode,
+                        const char *Suffix = "") const;
 
-  static bool isClassof(const Inst *Inst, InstKindX86 MyKind) {
-    return Inst->getKind() == static_cast<InstKind>(MyKind);
-  }
-  // Most instructions that operate on vector arguments require vector memory
-  // operands to be fully aligned (16-byte alignment for PNaCl vector types).
-  // The stack frame layout and call ABI ensure proper alignment for stack
-  // operands, but memory operands (originating from load/store bitcode
-  // instructions) only have element-size alignment guarantees. This function
-  // validates that none of the operands is a memory operand of vector type,
-  // calling report_fatal_error() if one is found. This function should be
-  // called during emission, and maybe also in the ctor (as long as that fits
-  // the lowering style).
-  void validateVectorAddrMode() const {
-    if (this->getDest())
-      this->validateVectorAddrModeOpnd(this->getDest());
-    for (SizeT i = 0; i < this->getSrcSize(); ++i) {
-      this->validateVectorAddrModeOpnd(this->getSrc(i));
+    static TargetLowering *getTarget(const Cfg *Func) {
+      return static_cast<TargetLowering *>(Func->getTarget());
     }
-  }
 
-private:
-  static void validateVectorAddrModeOpnd(const Operand *Opnd) {
-    if (llvm::isa<typename InstX86Base<Machine>::Traits::X86OperandMem>(Opnd) &&
-        isVectorType(Opnd->getType())) {
-      llvm::report_fatal_error("Possible misaligned vector memory operation");
+  protected:
+    InstX86Base(Cfg *Func, InstKindX86 Kind, SizeT Maxsrcs, Variable *Dest)
+        : InstTarget(Func, static_cast<InstKind>(Kind), Maxsrcs, Dest) {}
+
+    static bool isClassof(const Inst *Inst, InstKindX86 MyKind) {
+      return Inst->getKind() == static_cast<InstKind>(MyKind);
     }
-  }
-};
-
-/// InstX86FakeRMW represents a non-atomic read-modify-write operation on a
-/// memory location. An InstX86FakeRMW is a "fake" instruction in that it still
-/// needs to be lowered to some actual RMW instruction.
-///
-/// If A is some memory address, D is some data value to apply, and OP is an
-/// arithmetic operator, the instruction operates as: (*A) = (*A) OP D
-template <class Machine>
-class InstX86FakeRMW final : public InstX86Base<Machine> {
-  InstX86FakeRMW() = delete;
-  InstX86FakeRMW(const InstX86FakeRMW &) = delete;
-  InstX86FakeRMW &operator=(const InstX86FakeRMW &) = delete;
-
-public:
-  static InstX86FakeRMW *create(Cfg *Func, Operand *Data, Operand *Addr,
-                                Variable *Beacon, InstArithmetic::OpKind Op,
-                                uint32_t Align = 1) {
-    // TODO(stichnot): Stop ignoring alignment specification.
-    (void)Align;
-    return new (Func->allocate<InstX86FakeRMW>())
-        InstX86FakeRMW(Func, Data, Addr, Op, Beacon);
-  }
-  Operand *getAddr() const { return this->getSrc(1); }
-  Operand *getData() const { return this->getSrc(0); }
-  InstArithmetic::OpKind getOp() const { return Op; }
-  Variable *getBeacon() const { return llvm::cast<Variable>(this->getSrc(2)); }
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::FakeRMW);
-  }
-
-private:
-  InstArithmetic::OpKind Op;
-  InstX86FakeRMW(Cfg *Func, Operand *Data, Operand *Addr,
-                 InstArithmetic::OpKind Op, Variable *Beacon);
-};
-
-/// InstX86Label represents an intra-block label that is the target of an
-/// intra-block branch. The offset between the label and the branch must be fit
-/// into one byte (considered "near"). These are used for lowering i1
-/// calculations, Select instructions, and 64-bit compares on a 32-bit
-/// architecture, without basic block splitting. Basic block splitting is not so
-/// desirable for several reasons, one of which is the impact on decisions based
-/// on whether a variable's live range spans multiple basic blocks.
-///
-/// Intra-block control flow must be used with caution. Consider the sequence
-/// for "c = (a >= b ? x : y)".
-///     cmp a, b
-///     br lt, L1
-///     mov c, x
-///     jmp L2
-///   L1:
-///     mov c, y
-///   L2:
-///
-/// Labels L1 and L2 are intra-block labels. Without knowledge of the
-/// intra-block control flow, liveness analysis will determine the "mov c, x"
-/// instruction to be dead. One way to prevent this is to insert a "FakeUse(c)"
-/// instruction anywhere between the two "mov c, ..." instructions, e.g.:
-///
-///     cmp a, b
-///     br lt, L1
-///     mov c, x
-///     jmp L2
-///     FakeUse(c)
-///   L1:
-///     mov c, y
-///   L2:
-///
-/// The down-side is that "mov c, x" can never be dead-code eliminated even if
-/// there are no uses of c. As unlikely as this situation is, it may be
-/// prevented by running dead code elimination before lowering.
-template <class Machine>
-class InstX86Label final : public InstX86Base<Machine> {
-  InstX86Label() = delete;
-  InstX86Label(const InstX86Label &) = delete;
-  InstX86Label &operator=(const InstX86Label &) = delete;
-
-public:
-  static InstX86Label *
-  create(Cfg *Func,
-         typename InstX86Base<Machine>::Traits::TargetLowering *Target) {
-    return new (Func->allocate<InstX86Label>()) InstX86Label(Func, Target);
-  }
-  uint32_t getEmitInstCount() const override { return 0; }
-  IceString getName(const Cfg *Func) const;
-  SizeT getNumber() const { return Number; }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-
-private:
-  InstX86Label(Cfg *Func,
-               typename InstX86Base<Machine>::Traits::TargetLowering *Target);
-
-  SizeT Number; // used for unique label generation.
-};
-
-/// Conditional and unconditional branch instruction.
-template <class Machine> class InstX86Br final : public InstX86Base<Machine> {
-  InstX86Br() = delete;
-  InstX86Br(const InstX86Br &) = delete;
-  InstX86Br &operator=(const InstX86Br &) = delete;
-
-public:
-  enum Mode { Near, Far };
-
-  /// Create a conditional branch to a node.
-  static InstX86Br *
-  create(Cfg *Func, CfgNode *TargetTrue, CfgNode *TargetFalse,
-         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition,
-         Mode Kind) {
-    assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
-    constexpr InstX86Label<Machine> *NoLabel = nullptr;
-    return new (Func->allocate<InstX86Br>())
-        InstX86Br(Func, TargetTrue, TargetFalse, NoLabel, Condition, Kind);
-  }
-  /// Create an unconditional branch to a node.
-  static InstX86Br *create(Cfg *Func, CfgNode *Target, Mode Kind) {
-    constexpr CfgNode *NoCondTarget = nullptr;
-    constexpr InstX86Label<Machine> *NoLabel = nullptr;
-    return new (Func->allocate<InstX86Br>())
-        InstX86Br(Func, NoCondTarget, Target, NoLabel,
-                  InstX86Base<Machine>::Traits::Cond::Br_None, Kind);
-  }
-  /// Create a non-terminator conditional branch to a node, with a fallthrough
-  /// to the next instruction in the current node. This is used for switch
-  /// lowering.
-  static InstX86Br *
-  create(Cfg *Func, CfgNode *Target,
-         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition,
-         Mode Kind) {
-    assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
-    constexpr CfgNode *NoUncondTarget = nullptr;
-    constexpr InstX86Label<Machine> *NoLabel = nullptr;
-    return new (Func->allocate<InstX86Br>())
-        InstX86Br(Func, Target, NoUncondTarget, NoLabel, Condition, Kind);
-  }
-  /// Create a conditional intra-block branch (or unconditional, if
-  /// Condition==Br_None) to a label in the current block.
-  static InstX86Br *
-  create(Cfg *Func, InstX86Label<Machine> *Label,
-         typename InstX86Base<Machine>::Traits::Cond::BrCond Condition,
-         Mode Kind) {
-    constexpr CfgNode *NoCondTarget = nullptr;
-    constexpr CfgNode *NoUncondTarget = nullptr;
-    return new (Func->allocate<InstX86Br>())
-        InstX86Br(Func, NoCondTarget, NoUncondTarget, Label, Condition, Kind);
-  }
-  const CfgNode *getTargetTrue() const { return TargetTrue; }
-  const CfgNode *getTargetFalse() const { return TargetFalse; }
-  bool isNear() const { return Kind == Near; }
-  bool optimizeBranch(const CfgNode *NextNode);
-  uint32_t getEmitInstCount() const override {
-    uint32_t Sum = 0;
-    if (Label)
-      ++Sum;
-    if (getTargetTrue())
-      ++Sum;
-    if (getTargetFalse())
-      ++Sum;
-    return Sum;
-  }
-  bool isUnconditionalBranch() const override {
-    return !Label && Condition == InstX86Base<Machine>::Traits::Cond::Br_None;
-  }
-  bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Br);
-  }
-
-private:
-  InstX86Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
-            const InstX86Label<Machine> *Label,
-            typename InstX86Base<Machine>::Traits::Cond::BrCond Condition,
-            Mode Kind);
-
-  typename InstX86Base<Machine>::Traits::Cond::BrCond Condition;
-  const CfgNode *TargetTrue;
-  const CfgNode *TargetFalse;
-  const InstX86Label<Machine> *Label; // Intra-block branch target
-  const Mode Kind;
-};
-
-/// Jump to a target outside this function, such as tailcall, nacljump, naclret,
-/// unreachable. This is different from a Branch instruction in that there is no
-/// intra-function control flow to represent.
-template <class Machine> class InstX86Jmp final : public InstX86Base<Machine> {
-  InstX86Jmp() = delete;
-  InstX86Jmp(const InstX86Jmp &) = delete;
-  InstX86Jmp &operator=(const InstX86Jmp &) = delete;
-
-public:
-  static InstX86Jmp *create(Cfg *Func, Operand *Target) {
-    return new (Func->allocate<InstX86Jmp>()) InstX86Jmp(Func, Target);
-  }
-  Operand *getJmpTarget() const { return this->getSrc(0); }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Jmp);
-  }
-
-private:
-  InstX86Jmp(Cfg *Func, Operand *Target);
-};
-
-/// Call instruction. Arguments should have already been pushed.
-template <class Machine> class InstX86Call final : public InstX86Base<Machine> {
-  InstX86Call() = delete;
-  InstX86Call(const InstX86Call &) = delete;
-  InstX86Call &operator=(const InstX86Call &) = delete;
-
-public:
-  static InstX86Call *create(Cfg *Func, Variable *Dest, Operand *CallTarget) {
-    return new (Func->allocate<InstX86Call>())
-        InstX86Call(Func, Dest, CallTarget);
-  }
-  Operand *getCallTarget() const { return this->getSrc(0); }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Call);
-  }
-
-private:
-  InstX86Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
-};
-
-/// Emit a one-operand (GPR) instruction.
-template <class Machine>
-void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Var,
-                    const typename InstX86Base<
-                        Machine>::Traits::Assembler::GPREmitterOneOp &Emitter);
-
-template <class Machine>
-void emitIASAsAddrOpTyGPR(
-    const Cfg *Func, Type Ty, const Operand *Op0, const Operand *Op1,
-    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp
-        &Emitter);
-
-/// Instructions of the form x := op(x).
-template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
-class InstX86BaseInplaceopGPR : public InstX86Base<Machine> {
-  InstX86BaseInplaceopGPR() = delete;
-  InstX86BaseInplaceopGPR(const InstX86BaseInplaceopGPR &) = delete;
-  InstX86BaseInplaceopGPR &operator=(const InstX86BaseInplaceopGPR &) = delete;
-
-public:
-  using Base = InstX86BaseInplaceopGPR<Machine, K>;
-
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    assert(this->getSrcSize() == 1);
-    Str << "\t" << Opcode << "\t";
-    this->getSrc(0)->emit(Func);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    assert(this->getSrcSize() == 1);
-    const Variable *Var = this->getDest();
-    Type Ty = Var->getType();
-    emitIASOpTyGPR<Machine>(Func, Ty, Var, Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    this->dumpDest(Func);
-    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
-    this->dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
-  }
-
-protected:
-  InstX86BaseInplaceopGPR(Cfg *Func, Operand *SrcDest)
-      : InstX86Base<Machine>(Func, K, 1, llvm::dyn_cast<Variable>(SrcDest)) {
-    this->addSource(SrcDest);
-  }
-
-private:
-  static const char *Opcode;
-  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp
-      Emitter;
-};
-
-/// Emit a two-operand (GPR) instruction, where the dest operand is a Variable
-/// that's guaranteed to be a register.
-template <class Machine, bool VarCanBeByte = true, bool SrcCanBeByte = true>
-void emitIASRegOpTyGPR(
-    const Cfg *Func, Type Ty, const Variable *Dst, const Operand *Src,
-    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
-        &Emitter);
-
-/// Instructions of the form x := op(y).
-template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
-class InstX86BaseUnaryopGPR : public InstX86Base<Machine> {
-  InstX86BaseUnaryopGPR() = delete;
-  InstX86BaseUnaryopGPR(const InstX86BaseUnaryopGPR &) = delete;
-  InstX86BaseUnaryopGPR &operator=(const InstX86BaseUnaryopGPR &) = delete;
-
-public:
-  using Base = InstX86BaseUnaryopGPR<Machine, K>;
-
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    assert(this->getSrcSize() == 1);
-    Type SrcTy = this->getSrc(0)->getType();
-    Type DestTy = this->getDest()->getType();
-    Str << "\t" << Opcode << this->getWidthString(SrcTy);
-    // Movsx and movzx need both the source and dest type width letter to
-    // define the operation. The other unary operations have the same source
-    // and dest type and as a result need only one letter.
-    if (SrcTy != DestTy)
-      Str << this->getWidthString(DestTy);
-    Str << "\t";
-    this->getSrc(0)->emit(Func);
-    Str << ", ";
-    this->getDest()->emit(Func);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    assert(this->getSrcSize() == 1);
-    const Variable *Var = this->getDest();
-    Type Ty = Var->getType();
-    const Operand *Src = this->getSrc(0);
-    emitIASRegOpTyGPR<Machine>(Func, Ty, Var, Src, Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    this->dumpDest(Func);
-    Str << " = " << Opcode << "." << this->getSrc(0)->getType() << " ";
-    this->dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
-  }
-
-protected:
-  InstX86BaseUnaryopGPR(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX86Base<Machine>(Func, K, 1, Dest) {
-    this->addSource(Src);
-  }
-
-  static const char *Opcode;
-  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
-      Emitter;
-};
-
-template <class Machine>
-void emitIASRegOpTyXMM(
-    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
-    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
-        &Emitter);
-
-template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
-class InstX86BaseUnaryopXmm : public InstX86Base<Machine> {
-  InstX86BaseUnaryopXmm() = delete;
-  InstX86BaseUnaryopXmm(const InstX86BaseUnaryopXmm &) = delete;
-  InstX86BaseUnaryopXmm &operator=(const InstX86BaseUnaryopXmm &) = delete;
-
-public:
-  using Base = InstX86BaseUnaryopXmm<Machine, K>;
-
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    assert(this->getSrcSize() == 1);
-    Str << "\t" << Opcode << "\t";
-    this->getSrc(0)->emit(Func);
-    Str << ", ";
-    this->getDest()->emit(Func);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    Type Ty = this->getDest()->getType();
-    assert(this->getSrcSize() == 1);
-    emitIASRegOpTyXMM<Machine>(Func, Ty, this->getDest(), this->getSrc(0),
-                               Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    this->dumpDest(Func);
-    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
-    this->dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
-  }
-
-protected:
-  InstX86BaseUnaryopXmm(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX86Base<Machine>(Func, K, 1, Dest) {
-    this->addSource(Src);
-  }
-
-  static const char *Opcode;
-  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
-      Emitter;
-};
-
-template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
-class InstX86BaseBinopGPRShift : public InstX86Base<Machine> {
-  InstX86BaseBinopGPRShift() = delete;
-  InstX86BaseBinopGPRShift(const InstX86BaseBinopGPRShift &) = delete;
-  InstX86BaseBinopGPRShift &
-  operator=(const InstX86BaseBinopGPRShift &) = delete;
-
-public:
-  using Base = InstX86BaseBinopGPRShift<Machine, K>;
-
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    this->emitTwoAddress(Func, Opcode);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    Type Ty = this->getDest()->getType();
-    assert(this->getSrcSize() == 2);
-    this->emitIASGPRShift(Func, Ty, this->getDest(), this->getSrc(1), Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    this->dumpDest(Func);
-    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
-    this->dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
-  }
-
-protected:
-  InstX86BaseBinopGPRShift(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86Base<Machine>(Func, K, 2, Dest) {
-    this->addSource(Dest);
-    this->addSource(Source);
-  }
-
-  static const char *Opcode;
-  static const typename InstX86Base<
-      Machine>::Traits::Assembler::GPREmitterShiftOp Emitter;
-};
-
-template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
-class InstX86BaseBinopGPR : public InstX86Base<Machine> {
-  InstX86BaseBinopGPR() = delete;
-  InstX86BaseBinopGPR(const InstX86BaseBinopGPR &) = delete;
-  InstX86BaseBinopGPR &operator=(const InstX86BaseBinopGPR &) = delete;
-
-public:
-  using Base = InstX86BaseBinopGPR<Machine, K>;
-
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    this->emitTwoAddress(Func, Opcode);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    Type Ty = this->getDest()->getType();
-    assert(this->getSrcSize() == 2);
-    emitIASRegOpTyGPR<Machine>(Func, Ty, this->getDest(), this->getSrc(1),
-                               Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    this->dumpDest(Func);
-    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
-    this->dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
-  }
-
-protected:
-  InstX86BaseBinopGPR(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86Base<Machine>(Func, K, 2, Dest) {
-    this->addSource(Dest);
-    this->addSource(Source);
-  }
-
-  static const char *Opcode;
-  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
-      Emitter;
-};
-
-template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
-class InstX86BaseBinopRMW : public InstX86Base<Machine> {
-  InstX86BaseBinopRMW() = delete;
-  InstX86BaseBinopRMW(const InstX86BaseBinopRMW &) = delete;
-  InstX86BaseBinopRMW &operator=(const InstX86BaseBinopRMW &) = delete;
-
-public:
-  using Base = InstX86BaseBinopRMW<Machine, K>;
-
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    this->emitTwoAddress(Func, Opcode);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    Type Ty = this->getSrc(0)->getType();
-    assert(this->getSrcSize() == 2);
-    emitIASAsAddrOpTyGPR<Machine>(Func, Ty, this->getSrc(0), this->getSrc(1),
-                                  Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    Str << Opcode << "." << this->getSrc(0)->getType() << " ";
-    this->dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
-  }
-
-protected:
-  InstX86BaseBinopRMW(
-      Cfg *Func, typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-      Operand *Src1)
-      : InstX86Base<Machine>(Func, K, 2, nullptr) {
-    this->addSource(DestSrc0);
-    this->addSource(Src1);
-  }
-
-  static const char *Opcode;
-  static const typename InstX86Base<
-      Machine>::Traits::Assembler::GPREmitterAddrOp Emitter;
-};
-
-template <class Machine, typename InstX86Base<Machine>::InstKindX86 K,
-          bool NeedsElementType,
-          typename InstX86Base<Machine>::SseSuffix Suffix>
-class InstX86BaseBinopXmm : public InstX86Base<Machine> {
-  InstX86BaseBinopXmm() = delete;
-  InstX86BaseBinopXmm(const InstX86BaseBinopXmm &) = delete;
-  InstX86BaseBinopXmm &operator=(const InstX86BaseBinopXmm &) = delete;
-
-public:
-  using Base = InstX86BaseBinopXmm<Machine, K, NeedsElementType, Suffix>;
-
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    this->validateVectorAddrMode();
-    switch (Suffix) {
-    case InstX86Base<Machine>::SseSuffix::None:
-      this->emitTwoAddress(Func, Opcode);
-      break;
-    case InstX86Base<Machine>::SseSuffix::Packed: {
-      const Type DestTy = this->getDest()->getType();
-      this->emitTwoAddress(
-          Func, this->Opcode,
-          InstX86Base<Machine>::Traits::TypeAttributes[DestTy].PdPsString);
-    } break;
-    case InstX86Base<Machine>::SseSuffix::Scalar: {
-      const Type DestTy = this->getDest()->getType();
-      this->emitTwoAddress(
-          Func, this->Opcode,
-          InstX86Base<Machine>::Traits::TypeAttributes[DestTy].SdSsString);
-    } break;
-    case InstX86Base<Machine>::SseSuffix::Integral: {
-      const Type DestTy = this->getDest()->getType();
-      this->emitTwoAddress(
-          Func, this->Opcode,
-          InstX86Base<Machine>::Traits::TypeAttributes[DestTy].PackString);
-    } break;
-    }
-  }
-  void emitIAS(const Cfg *Func) const override {
-    this->validateVectorAddrMode();
-    Type Ty = this->getDest()->getType();
-    if (NeedsElementType)
-      Ty = typeElementType(Ty);
-    assert(this->getSrcSize() == 2);
-    emitIASRegOpTyXMM<Machine>(Func, Ty, this->getDest(), this->getSrc(1),
-                               Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    this->dumpDest(Func);
-    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
-    this->dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
-  }
-
-protected:
-  InstX86BaseBinopXmm(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86Base<Machine>(Func, K, 2, Dest) {
-    this->addSource(Dest);
-    this->addSource(Source);
-  }
-
-  static const char *Opcode;
-  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
-      Emitter;
-};
-
-template <class Machine>
-void emitIASXmmShift(
-    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
-    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterShiftOp
-        &Emitter);
-
-template <class Machine, typename InstX86Base<Machine>::InstKindX86 K,
-          bool AllowAllTypes = false>
-class InstX86BaseBinopXmmShift : public InstX86Base<Machine> {
-  InstX86BaseBinopXmmShift() = delete;
-  InstX86BaseBinopXmmShift(const InstX86BaseBinopXmmShift &) = delete;
-  InstX86BaseBinopXmmShift &
-  operator=(const InstX86BaseBinopXmmShift &) = delete;
-
-public:
-  using Base = InstX86BaseBinopXmmShift<Machine, K, AllowAllTypes>;
-
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    this->validateVectorAddrMode();
-    // Shift operations are always integral, and hence always need a suffix.
-    const Type DestTy = this->getDest()->getType();
-    this->emitTwoAddress(
-        Func, this->Opcode,
-        InstX86Base<Machine>::Traits::TypeAttributes[DestTy].PackString);
-  }
-  void emitIAS(const Cfg *Func) const override {
-    this->validateVectorAddrMode();
-    Type Ty = this->getDest()->getType();
-    assert(AllowAllTypes || isVectorType(Ty));
-    Type ElementTy = typeElementType(Ty);
-    assert(this->getSrcSize() == 2);
-    emitIASXmmShift<Machine>(Func, ElementTy, this->getDest(), this->getSrc(1),
-                             Emitter);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    this->dumpDest(Func);
-    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
-    this->dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
-  }
-
-protected:
-  InstX86BaseBinopXmmShift(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86Base<Machine>(Func, K, 2, Dest) {
-    this->addSource(Dest);
-    this->addSource(Source);
-  }
-
-  static const char *Opcode;
-  static const typename InstX86Base<
-      Machine>::Traits::Assembler::XmmEmitterShiftOp Emitter;
-};
-
-template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
-class InstX86BaseTernop : public InstX86Base<Machine> {
-  InstX86BaseTernop() = delete;
-  InstX86BaseTernop(const InstX86BaseTernop &) = delete;
-  InstX86BaseTernop &operator=(const InstX86BaseTernop &) = delete;
-
-public:
-  using Base = InstX86BaseTernop<Machine, K>;
-
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    assert(this->getSrcSize() == 3);
-    Str << "\t" << Opcode << "\t";
-    this->getSrc(2)->emit(Func);
-    Str << ", ";
-    this->getSrc(1)->emit(Func);
-    Str << ", ";
-    this->getDest()->emit(Func);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    this->dumpDest(Func);
-    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
-    this->dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
-  }
-
-protected:
-  InstX86BaseTernop(Cfg *Func, Variable *Dest, Operand *Source1,
-                    Operand *Source2)
-      : InstX86Base<Machine>(Func, K, 3, Dest) {
-    this->addSource(Dest);
-    this->addSource(Source1);
-    this->addSource(Source2);
-  }
-
-  static const char *Opcode;
-};
-
-// Instructions of the form x := y op z
-template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
-class InstX86BaseThreeAddressop : public InstX86Base<Machine> {
-  InstX86BaseThreeAddressop() = delete;
-  InstX86BaseThreeAddressop(const InstX86BaseThreeAddressop &) = delete;
-  InstX86BaseThreeAddressop &
-  operator=(const InstX86BaseThreeAddressop &) = delete;
-
-public:
-  using Base = InstX86BaseThreeAddressop<Machine, K>;
-
-  void emit(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    assert(this->getSrcSize() == 2);
-    Str << "\t" << Opcode << "\t";
-    this->getSrc(1)->emit(Func);
-    Str << ", ";
-    this->getSrc(0)->emit(Func);
-    Str << ", ";
-    this->getDest()->emit(Func);
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    this->dumpDest(Func);
-    Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
-    this->dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
-  }
-
-protected:
-  InstX86BaseThreeAddressop(Cfg *Func, Variable *Dest, Operand *Source0,
-                            Operand *Source1)
-      : InstX86Base<Machine>(Func, K, 2, Dest) {
-    this->addSource(Source0);
-    this->addSource(Source1);
-  }
-
-  static const char *Opcode;
-};
-
-/// Base class for assignment instructions
-template <class Machine, typename InstX86Base<Machine>::InstKindX86 K>
-class InstX86BaseMovlike : public InstX86Base<Machine> {
-  InstX86BaseMovlike() = delete;
-  InstX86BaseMovlike(const InstX86BaseMovlike &) = delete;
-  InstX86BaseMovlike &operator=(const InstX86BaseMovlike &) = delete;
-
-public:
-  using Base = InstX86BaseMovlike<Machine, K>;
-
-  bool isRedundantAssign() const override {
-    if (const auto *SrcVar = llvm::dyn_cast<const Variable>(this->getSrc(0))) {
-      if (SrcVar->hasReg() && this->Dest->hasReg()) {
-        // An assignment between physical registers is considered redundant if
-        // they have the same base register and the same encoding. E.g.:
-        //   mov cl, ecx ==> redundant
-        //   mov ch, ecx ==> not redundant due to different encodings
-        //   mov ch, ebp ==> not redundant due to different base registers
-        //   mov ecx, ecx ==> redundant, and dangerous in x86-64. i64 zexting
-        //                    is handled by Inst86Zext.
-        const int32_t SrcReg = SrcVar->getRegNum();
-        const int32_t DestReg = this->Dest->getRegNum();
-        return (InstX86Base<Machine>::Traits::getEncoding(SrcReg) ==
-                InstX86Base<Machine>::Traits::getEncoding(DestReg)) &&
-               (InstX86Base<Machine>::Traits::getBaseReg(SrcReg) ==
-                InstX86Base<Machine>::Traits::getBaseReg(DestReg));
+    // Most instructions that operate on vector arguments require vector memory
+    // operands to be fully aligned (16-byte alignment for PNaCl vector types).
+    // The stack frame layout and call ABI ensure proper alignment for stack
+    // operands, but memory operands (originating from load/store bitcode
+    // instructions) only have element-size alignment guarantees. This function
+    // validates that none of the operands is a memory operand of vector type,
+    // calling report_fatal_error() if one is found. This function should be
+    // called during emission, and maybe also in the ctor (as long as that fits
+    // the lowering style).
+    void validateVectorAddrMode() const {
+      if (this->getDest())
+        this->validateVectorAddrModeOpnd(this->getDest());
+      for (SizeT i = 0; i < this->getSrcSize(); ++i) {
+        this->validateVectorAddrModeOpnd(this->getSrc(i));
       }
     }
-    return checkForRedundantAssign(this->getDest(), this->getSrc(0));
-  }
-  bool isVarAssign() const override {
-    return llvm::isa<Variable>(this->getSrc(0));
-  }
-  void dump(const Cfg *Func) const override {
-    if (!BuildDefs::dump())
-      return;
-    Ostream &Str = Func->getContext()->getStrDump();
-    Str << Opcode << "." << this->getDest()->getType() << " ";
-    this->dumpDest(Func);
-    Str << ", ";
-    this->dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::K);
-  }
 
-protected:
-  InstX86BaseMovlike(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86Base<Machine>(Func, K, 1, Dest) {
-    this->addSource(Source);
-    // For an integer assignment, make sure it's either a same-type assignment
-    // or a truncation.
-    assert(!isScalarIntegerType(Dest->getType()) ||
-           (typeWidthInBytes(Dest->getType()) <=
-            typeWidthInBytes(Source->getType())));
-  }
+  private:
+    static void validateVectorAddrModeOpnd(const Operand *Opnd) {
+      if (llvm::isa<X86OperandMem>(Opnd) && isVectorType(Opnd->getType())) {
+        llvm::report_fatal_error("Possible misaligned vector memory operation");
+      }
+    }
+  };
 
-  static const char *Opcode;
-};
+  /// InstX86FakeRMW represents a non-atomic read-modify-write operation on a
+  /// memory location. An InstX86FakeRMW is a "fake" instruction in that it
+  /// still needs to be lowered to some actual RMW instruction.
+  ///
+  /// If A is some memory address, D is some data value to apply, and OP is an
+  /// arithmetic operator, the instruction operates as: (*A) = (*A) OP D
+  class InstX86FakeRMW final : public InstX86Base {
+    InstX86FakeRMW() = delete;
+    InstX86FakeRMW(const InstX86FakeRMW &) = delete;
+    InstX86FakeRMW &operator=(const InstX86FakeRMW &) = delete;
 
-template <class Machine>
-class InstX86Bswap
-    : public InstX86BaseInplaceopGPR<Machine, InstX86Base<Machine>::Bswap> {
-public:
-  static InstX86Bswap *create(Cfg *Func, Operand *SrcDest) {
-    return new (Func->allocate<InstX86Bswap>()) InstX86Bswap(Func, SrcDest);
-  }
+  public:
+    static InstX86FakeRMW *create(Cfg *Func, Operand *Data, Operand *Addr,
+                                  Variable *Beacon, InstArithmetic::OpKind Op,
+                                  uint32_t Align = 1) {
+      // TODO(stichnot): Stop ignoring alignment specification.
+      (void)Align;
+      return new (Func->allocate<InstX86FakeRMW>())
+          InstX86FakeRMW(Func, Data, Addr, Op, Beacon);
+    }
+    Operand *getAddr() const { return this->getSrc(1); }
+    Operand *getData() const { return this->getSrc(0); }
+    InstArithmetic::OpKind getOp() const { return Op; }
+    Variable *getBeacon() const {
+      return llvm::cast<Variable>(this->getSrc(2));
+    }
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::FakeRMW);
+    }
 
-private:
-  InstX86Bswap(Cfg *Func, Operand *SrcDest)
-      : InstX86BaseInplaceopGPR<Machine, InstX86Base<Machine>::Bswap>(Func,
-                                                                      SrcDest) {
-  }
-};
+  private:
+    InstArithmetic::OpKind Op;
+    InstX86FakeRMW(Cfg *Func, Operand *Data, Operand *Addr,
+                   InstArithmetic::OpKind Op, Variable *Beacon);
+  };
 
-template <class Machine>
-class InstX86Neg
-    : public InstX86BaseInplaceopGPR<Machine, InstX86Base<Machine>::Neg> {
-public:
-  static InstX86Neg *create(Cfg *Func, Operand *SrcDest) {
-    return new (Func->allocate<InstX86Neg>()) InstX86Neg(Func, SrcDest);
-  }
+  /// InstX86Label represents an intra-block label that is the target of an
+  /// intra-block branch. The offset between the label and the branch must be
+  /// fit into one byte (considered "near"). These are used for lowering i1
+  /// calculations, Select instructions, and 64-bit compares on a 32-bit
+  /// architecture, without basic block splitting. Basic block splitting is not
+  /// so desirable for several reasons, one of which is the impact on decisions
+  /// based on whether a variable's live range spans multiple basic blocks.
+  ///
+  /// Intra-block control flow must be used with caution. Consider the sequence
+  /// for "c = (a >= b ? x : y)".
+  ///     cmp a, b
+  ///     br lt, L1
+  ///     mov c, x
+  ///     jmp L2
+  ///   L1:
+  ///     mov c, y
+  ///   L2:
+  ///
+  /// Labels L1 and L2 are intra-block labels. Without knowledge of the
+  /// intra-block control flow, liveness analysis will determine the "mov c, x"
+  /// instruction to be dead. One way to prevent this is to insert a
+  /// "FakeUse(c)" instruction anywhere between the two "mov c, ..."
+  /// instructions, e.g.:
+  ///
+  ///     cmp a, b
+  ///     br lt, L1
+  ///     mov c, x
+  ///     jmp L2
+  ///     FakeUse(c)
+  ///   L1:
+  ///     mov c, y
+  ///   L2:
+  ///
+  /// The down-side is that "mov c, x" can never be dead-code eliminated even if
+  /// there are no uses of c. As unlikely as this situation is, it may be
+  /// prevented by running dead code elimination before lowering.
+  class InstX86Label final : public InstX86Base {
+    InstX86Label() = delete;
+    InstX86Label(const InstX86Label &) = delete;
+    InstX86Label &operator=(const InstX86Label &) = delete;
 
-private:
-  InstX86Neg(Cfg *Func, Operand *SrcDest)
-      : InstX86BaseInplaceopGPR<Machine, InstX86Base<Machine>::Neg>(Func,
-                                                                    SrcDest) {}
-};
+  public:
+    static InstX86Label *create(Cfg *Func, TargetLowering *Target) {
+      return new (Func->allocate<InstX86Label>()) InstX86Label(Func, Target);
+    }
+    uint32_t getEmitInstCount() const override { return 0; }
+    IceString getName(const Cfg *Func) const;
+    SizeT getNumber() const { return Number; }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
 
-template <class Machine>
-class InstX86Bsf
-    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Bsf> {
-public:
-  static InstX86Bsf *create(Cfg *Func, Variable *Dest, Operand *Src) {
-    return new (Func->allocate<InstX86Bsf>()) InstX86Bsf(Func, Dest, Src);
-  }
+  private:
+    InstX86Label(Cfg *Func, TargetLowering *Target);
 
-private:
-  InstX86Bsf(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Bsf>(Func, Dest,
-                                                                  Src) {}
-};
+    SizeT Number; // used for unique label generation.
+  };
 
-template <class Machine>
-class InstX86Bsr
-    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Bsr> {
-public:
-  static InstX86Bsr *create(Cfg *Func, Variable *Dest, Operand *Src) {
-    return new (Func->allocate<InstX86Bsr>()) InstX86Bsr(Func, Dest, Src);
-  }
+  /// Conditional and unconditional branch instruction.
+  class InstX86Br final : public InstX86Base {
+    InstX86Br() = delete;
+    InstX86Br(const InstX86Br &) = delete;
+    InstX86Br &operator=(const InstX86Br &) = delete;
 
-private:
-  InstX86Bsr(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Bsr>(Func, Dest,
-                                                                  Src) {}
-};
+  public:
+    enum Mode { Near, Far };
 
-template <class Machine>
-class InstX86Lea
-    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Lea> {
-public:
-  static InstX86Lea *create(Cfg *Func, Variable *Dest, Operand *Src) {
-    return new (Func->allocate<InstX86Lea>()) InstX86Lea(Func, Dest, Src);
-  }
+    /// Create a conditional branch to a node.
+    static InstX86Br *create(Cfg *Func, CfgNode *TargetTrue,
+                             CfgNode *TargetFalse, BrCond Condition,
+                             Mode Kind) {
+      assert(Condition != Cond::Br_None);
+      constexpr InstX86Label *NoLabel = nullptr;
+      return new (Func->allocate<InstX86Br>())
+          InstX86Br(Func, TargetTrue, TargetFalse, NoLabel, Condition, Kind);
+    }
+    /// Create an unconditional branch to a node.
+    static InstX86Br *create(Cfg *Func, CfgNode *Target, Mode Kind) {
+      constexpr CfgNode *NoCondTarget = nullptr;
+      constexpr InstX86Label *NoLabel = nullptr;
+      return new (Func->allocate<InstX86Br>())
+          InstX86Br(Func, NoCondTarget, Target, NoLabel, Cond::Br_None, Kind);
+    }
+    /// Create a non-terminator conditional branch to a node, with a fallthrough
+    /// to the next instruction in the current node. This is used for switch
+    /// lowering.
+    static InstX86Br *create(Cfg *Func, CfgNode *Target, BrCond Condition,
+                             Mode Kind) {
+      assert(Condition != Cond::Br_None);
+      constexpr CfgNode *NoUncondTarget = nullptr;
+      constexpr InstX86Label *NoLabel = nullptr;
+      return new (Func->allocate<InstX86Br>())
+          InstX86Br(Func, Target, NoUncondTarget, NoLabel, Condition, Kind);
+    }
+    /// Create a conditional intra-block branch (or unconditional, if
+    /// Condition==Br_None) to a label in the current block.
+    static InstX86Br *create(Cfg *Func, InstX86Label *Label, BrCond Condition,
+                             Mode Kind) {
+      constexpr CfgNode *NoCondTarget = nullptr;
+      constexpr CfgNode *NoUncondTarget = nullptr;
+      return new (Func->allocate<InstX86Br>())
+          InstX86Br(Func, NoCondTarget, NoUncondTarget, Label, Condition, Kind);
+    }
+    const CfgNode *getTargetTrue() const { return TargetTrue; }
+    const CfgNode *getTargetFalse() const { return TargetFalse; }
+    bool isNear() const { return Kind == Near; }
+    bool optimizeBranch(const CfgNode *NextNode);
+    uint32_t getEmitInstCount() const override {
+      uint32_t Sum = 0;
+      if (Label)
+        ++Sum;
+      if (getTargetTrue())
+        ++Sum;
+      if (getTargetFalse())
+        ++Sum;
+      return Sum;
+    }
+    bool isUnconditionalBranch() const override {
+      return !Label && Condition == Cond::Br_None;
+    }
+    bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Br);
+    }
 
-  void emit(const Cfg *Func) const override;
+  private:
+    InstX86Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
+              const InstX86Label *Label, BrCond Condition, Mode Kind);
 
-private:
-  InstX86Lea(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Lea>(Func, Dest,
-                                                                  Src) {}
-};
+    BrCond Condition;
+    const CfgNode *TargetTrue;
+    const CfgNode *TargetFalse;
+    const InstX86Label *Label; // Intra-block branch target
+    const Mode Kind;
+  };
 
-// Cbwdq instruction - wrapper for cbw, cwd, and cdq
-template <class Machine>
-class InstX86Cbwdq
-    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Cbwdq> {
-public:
-  static InstX86Cbwdq *create(Cfg *Func, Variable *Dest, Operand *Src) {
-    return new (Func->allocate<InstX86Cbwdq>()) InstX86Cbwdq(Func, Dest, Src);
-  }
+  /// Jump to a target outside this function, such as tailcall, nacljump,
+  /// naclret, unreachable. This is different from a Branch instruction in that
+  /// there is no intra-function control flow to represent.
+  class InstX86Jmp final : public InstX86Base {
+    InstX86Jmp() = delete;
+    InstX86Jmp(const InstX86Jmp &) = delete;
+    InstX86Jmp &operator=(const InstX86Jmp &) = delete;
 
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
+  public:
+    static InstX86Jmp *create(Cfg *Func, Operand *Target) {
+      return new (Func->allocate<InstX86Jmp>()) InstX86Jmp(Func, Target);
+    }
+    Operand *getJmpTarget() const { return this->getSrc(0); }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Jmp);
+    }
 
-private:
-  InstX86Cbwdq(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Cbwdq>(Func, Dest,
-                                                                    Src) {}
-};
+  private:
+    InstX86Jmp(Cfg *Func, Operand *Target);
+  };
 
-template <class Machine>
-class InstX86Movsx
-    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movsx> {
-public:
-  static InstX86Movsx *create(Cfg *Func, Variable *Dest, Operand *Src) {
-    assert(typeWidthInBytes(Dest->getType()) >
-           typeWidthInBytes(Src->getType()));
-    return new (Func->allocate<InstX86Movsx>()) InstX86Movsx(Func, Dest, Src);
-  }
+  /// Call instruction. Arguments should have already been pushed.
+  class InstX86Call final : public InstX86Base {
+    InstX86Call() = delete;
+    InstX86Call(const InstX86Call &) = delete;
+    InstX86Call &operator=(const InstX86Call &) = delete;
 
-  void emitIAS(const Cfg *Func) const override;
+  public:
+    static InstX86Call *create(Cfg *Func, Variable *Dest, Operand *CallTarget) {
+      return new (Func->allocate<InstX86Call>())
+          InstX86Call(Func, Dest, CallTarget);
+    }
+    Operand *getCallTarget() const { return this->getSrc(0); }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Call);
+    }
 
-private:
-  InstX86Movsx(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movsx>(Func, Dest,
-                                                                    Src) {}
-};
+  private:
+    InstX86Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
+  };
 
-template <class Machine>
-class InstX86Movzx
-    : public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movzx> {
-public:
-  static InstX86Movzx *create(Cfg *Func, Variable *Dest, Operand *Src) {
-    assert(typeWidthInBytes(Dest->getType()) >
-           typeWidthInBytes(Src->getType()));
-    return new (Func->allocate<InstX86Movzx>()) InstX86Movzx(Func, Dest, Src);
-  }
+  /// Emit a one-operand (GPR) instruction.
+  static void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Var,
+                             const GPREmitterOneOp &Emitter);
 
-  void emit(const Cfg *Func) const override;
+  static void emitIASAsAddrOpTyGPR(const Cfg *Func, Type Ty, const Operand *Op0,
+                                   const Operand *Op1,
+                                   const GPREmitterAddrOp &Emitter);
 
-  void emitIAS(const Cfg *Func) const override;
+  static void emitIASGPRShift(const Cfg *Func, Type Ty, const Variable *Var,
+                              const Operand *Src,
+                              const GPREmitterShiftOp &Emitter);
 
-private:
-  InstX86Movzx(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movzx>(Func, Dest,
-                                                                    Src) {}
-};
+  static void emitIASAddrOpTyGPR(const Cfg *Func, Type Ty, const Address &Addr,
+                                 const Operand *Src,
+                                 const GPREmitterAddrOp &Emitter);
 
-template <class Machine>
-class InstX86Movd
-    : public InstX86BaseUnaryopXmm<Machine, InstX86Base<Machine>::Movd> {
-public:
-  static InstX86Movd *create(Cfg *Func, Variable *Dest, Operand *Src) {
-    return new (Func->allocate<InstX86Movd>()) InstX86Movd(Func, Dest, Src);
-  }
+  static void emitIASRegOpTyXMM(const Cfg *Func, Type Ty, const Variable *Var,
+                                const Operand *Src,
+                                const XmmEmitterRegOp &Emitter);
 
-  void emit(const Cfg *Func) const override;
+  static void emitIASGPRShiftDouble(const Cfg *Func, const Variable *Dest,
+                                    const Operand *Src1Op,
+                                    const Operand *Src2Op,
+                                    const GPREmitterShiftD &Emitter);
 
-  void emitIAS(const Cfg *Func) const override;
+  template <typename DReg_t, typename SReg_t, DReg_t (*destEnc)(int32_t),
+            SReg_t (*srcEnc)(int32_t)>
+  static void emitIASCastRegOp(const Cfg *Func, Type DestTy,
+                               const Variable *Dest, Type SrcTy,
+                               const Operand *Src,
+                               const CastEmitterRegOp<DReg_t, SReg_t> &Emitter);
 
-private:
-  InstX86Movd(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX86BaseUnaryopXmm<Machine, InstX86Base<Machine>::Movd>(Func, Dest,
-                                                                   Src) {}
-};
+  template <typename DReg_t, typename SReg_t, DReg_t (*destEnc)(int32_t),
+            SReg_t (*srcEnc)(int32_t)>
+  static void
+  emitIASThreeOpImmOps(const Cfg *Func, Type DispatchTy, const Variable *Dest,
+                       const Operand *Src0, const Operand *Src1,
+                       const ThreeOpImmEmitter<DReg_t, SReg_t> Emitter);
 
-template <class Machine>
-class InstX86Sqrtss
-    : public InstX86BaseUnaryopXmm<Machine, InstX86Base<Machine>::Sqrtss> {
-public:
-  static InstX86Sqrtss *create(Cfg *Func, Variable *Dest, Operand *Src) {
-    return new (Func->allocate<InstX86Sqrtss>()) InstX86Sqrtss(Func, Dest, Src);
-  }
+  static void emitIASMovlikeXMM(const Cfg *Func, const Variable *Dest,
+                                const Operand *Src,
+                                const XmmEmitterMovOps Emitter);
 
-  virtual void emit(const Cfg *Func) const override;
+  static void emitVariableBlendInst(const char *Opcode, const Inst *Inst,
+                                    const Cfg *Func);
 
-private:
-  InstX86Sqrtss(Cfg *Func, Variable *Dest, Operand *Src)
-      : InstX86BaseUnaryopXmm<Machine, InstX86Base<Machine>::Sqrtss>(Func, Dest,
-                                                                     Src) {}
-};
+  static void emitIASVariableBlendInst(const Inst *Inst, const Cfg *Func,
+                                       const XmmEmitterRegOp &Emitter);
 
-/// Move/assignment instruction - wrapper for mov/movss/movsd.
-template <class Machine>
-class InstX86Mov
-    : public InstX86BaseMovlike<Machine, InstX86Base<Machine>::Mov> {
-public:
-  static InstX86Mov *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    assert(!isScalarIntegerType(Dest->getType()) ||
-           (typeWidthInBytes(Dest->getType()) <=
-            typeWidthInBytes(Source->getType())));
-    return new (Func->allocate<InstX86Mov>()) InstX86Mov(Func, Dest, Source);
-  }
+  static void emitIASXmmShift(const Cfg *Func, Type Ty, const Variable *Var,
+                              const Operand *Src,
+                              const XmmEmitterShiftOp &Emitter);
 
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
+  /// Emit a two-operand (GPR) instruction, where the dest operand is a Variable
+  /// that's guaranteed to be a register.
+  template <bool VarCanBeByte = true, bool SrcCanBeByte = true>
+  static void emitIASRegOpTyGPR(const Cfg *Func, Type Ty, const Variable *Dst,
+                                const Operand *Src,
+                                const GPREmitterRegOp &Emitter);
 
-private:
-  InstX86Mov(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseMovlike<Machine, InstX86Base<Machine>::Mov>(Func, Dest,
-                                                               Source) {}
-};
+  /// Instructions of the form x := op(x).
+  template <typename InstX86Base::InstKindX86 K>
+  class InstX86BaseInplaceopGPR : public InstX86Base {
+    InstX86BaseInplaceopGPR() = delete;
+    InstX86BaseInplaceopGPR(const InstX86BaseInplaceopGPR &) = delete;
+    InstX86BaseInplaceopGPR &
+    operator=(const InstX86BaseInplaceopGPR &) = delete;
 
-/// Move packed - copy 128 bit values between XMM registers, or mem128 and XMM
-/// registers.
-template <class Machine>
-class InstX86Movp
-    : public InstX86BaseMovlike<Machine, InstX86Base<Machine>::Movp> {
-public:
-  static InstX86Movp *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Movp>()) InstX86Movp(Func, Dest, Source);
-  }
+  public:
+    using Base = InstX86BaseInplaceopGPR<K>;
 
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
+    void emit(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrEmit();
+      assert(this->getSrcSize() == 1);
+      Str << "\t" << Opcode << "\t";
+      this->getSrc(0)->emit(Func);
+    }
+    void emitIAS(const Cfg *Func) const override {
+      assert(this->getSrcSize() == 1);
+      const Variable *Var = this->getDest();
+      Type Ty = Var->getType();
+      emitIASOpTyGPR(Func, Ty, Var, Emitter);
+    }
+    void dump(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrDump();
+      this->dumpDest(Func);
+      Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+      this->dumpSources(Func);
+    }
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::K);
+    }
 
-private:
-  InstX86Movp(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseMovlike<Machine, InstX86Base<Machine>::Movp>(Func, Dest,
+  protected:
+    InstX86BaseInplaceopGPR(Cfg *Func, Operand *SrcDest)
+        : InstX86Base(Func, K, 1, llvm::dyn_cast<Variable>(SrcDest)) {
+      this->addSource(SrcDest);
+    }
+
+  private:
+    static const char *Opcode;
+    static const GPREmitterOneOp Emitter;
+  };
+
+  /// Instructions of the form x := op(y).
+  template <typename InstX86Base::InstKindX86 K>
+  class InstX86BaseUnaryopGPR : public InstX86Base {
+    InstX86BaseUnaryopGPR() = delete;
+    InstX86BaseUnaryopGPR(const InstX86BaseUnaryopGPR &) = delete;
+    InstX86BaseUnaryopGPR &operator=(const InstX86BaseUnaryopGPR &) = delete;
+
+  public:
+    using Base = InstX86BaseUnaryopGPR<K>;
+
+    void emit(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrEmit();
+      assert(this->getSrcSize() == 1);
+      Type SrcTy = this->getSrc(0)->getType();
+      Type DestTy = this->getDest()->getType();
+      Str << "\t" << Opcode << this->getWidthString(SrcTy);
+      // Movsx and movzx need both the source and dest type width letter to
+      // define the operation. The other unary operations have the same source
+      // and dest type and as a result need only one letter.
+      if (SrcTy != DestTy)
+        Str << this->getWidthString(DestTy);
+      Str << "\t";
+      this->getSrc(0)->emit(Func);
+      Str << ", ";
+      this->getDest()->emit(Func);
+    }
+    void emitIAS(const Cfg *Func) const override {
+      assert(this->getSrcSize() == 1);
+      const Variable *Var = this->getDest();
+      Type Ty = Var->getType();
+      const Operand *Src = this->getSrc(0);
+      emitIASRegOpTyGPR(Func, Ty, Var, Src, Emitter);
+    }
+    void dump(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrDump();
+      this->dumpDest(Func);
+      Str << " = " << Opcode << "." << this->getSrc(0)->getType() << " ";
+      this->dumpSources(Func);
+    }
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::K);
+    }
+
+  protected:
+    InstX86BaseUnaryopGPR(Cfg *Func, Variable *Dest, Operand *Src)
+        : InstX86Base(Func, K, 1, Dest) {
+      this->addSource(Src);
+    }
+
+    static const char *Opcode;
+    static const GPREmitterRegOp Emitter;
+  };
+
+  template <typename InstX86Base::InstKindX86 K>
+  class InstX86BaseUnaryopXmm : public InstX86Base {
+    InstX86BaseUnaryopXmm() = delete;
+    InstX86BaseUnaryopXmm(const InstX86BaseUnaryopXmm &) = delete;
+    InstX86BaseUnaryopXmm &operator=(const InstX86BaseUnaryopXmm &) = delete;
+
+  public:
+    using Base = InstX86BaseUnaryopXmm<K>;
+
+    void emit(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrEmit();
+      assert(this->getSrcSize() == 1);
+      Str << "\t" << Opcode << "\t";
+      this->getSrc(0)->emit(Func);
+      Str << ", ";
+      this->getDest()->emit(Func);
+    }
+    void emitIAS(const Cfg *Func) const override {
+      Type Ty = this->getDest()->getType();
+      assert(this->getSrcSize() == 1);
+      emitIASRegOpTyXMM(Func, Ty, this->getDest(), this->getSrc(0), Emitter);
+    }
+    void dump(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrDump();
+      this->dumpDest(Func);
+      Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+      this->dumpSources(Func);
+    }
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::K);
+    }
+
+  protected:
+    InstX86BaseUnaryopXmm(Cfg *Func, Variable *Dest, Operand *Src)
+        : InstX86Base(Func, K, 1, Dest) {
+      this->addSource(Src);
+    }
+
+    static const char *Opcode;
+    static const XmmEmitterRegOp Emitter;
+  };
+
+  template <typename InstX86Base::InstKindX86 K>
+  class InstX86BaseBinopGPRShift : public InstX86Base {
+    InstX86BaseBinopGPRShift() = delete;
+    InstX86BaseBinopGPRShift(const InstX86BaseBinopGPRShift &) = delete;
+    InstX86BaseBinopGPRShift &
+    operator=(const InstX86BaseBinopGPRShift &) = delete;
+
+  public:
+    using Base = InstX86BaseBinopGPRShift<K>;
+
+    void emit(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      this->emitTwoAddress(Func, Opcode);
+    }
+    void emitIAS(const Cfg *Func) const override {
+      Type Ty = this->getDest()->getType();
+      assert(this->getSrcSize() == 2);
+      emitIASGPRShift(Func, Ty, this->getDest(), this->getSrc(1), Emitter);
+    }
+    void dump(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrDump();
+      this->dumpDest(Func);
+      Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+      this->dumpSources(Func);
+    }
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::K);
+    }
+
+  protected:
+    InstX86BaseBinopGPRShift(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86Base(Func, K, 2, Dest) {
+      this->addSource(Dest);
+      this->addSource(Source);
+    }
+
+    static const char *Opcode;
+    static const GPREmitterShiftOp Emitter;
+  };
+
+  template <typename InstX86Base::InstKindX86 K>
+  class InstX86BaseBinopGPR : public InstX86Base {
+    InstX86BaseBinopGPR() = delete;
+    InstX86BaseBinopGPR(const InstX86BaseBinopGPR &) = delete;
+    InstX86BaseBinopGPR &operator=(const InstX86BaseBinopGPR &) = delete;
+
+  public:
+    using Base = InstX86BaseBinopGPR<K>;
+
+    void emit(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      this->emitTwoAddress(Func, Opcode);
+    }
+    void emitIAS(const Cfg *Func) const override {
+      Type Ty = this->getDest()->getType();
+      assert(this->getSrcSize() == 2);
+      emitIASRegOpTyGPR(Func, Ty, this->getDest(), this->getSrc(1), Emitter);
+    }
+    void dump(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrDump();
+      this->dumpDest(Func);
+      Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+      this->dumpSources(Func);
+    }
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::K);
+    }
+
+  protected:
+    InstX86BaseBinopGPR(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86Base(Func, K, 2, Dest) {
+      this->addSource(Dest);
+      this->addSource(Source);
+    }
+
+    static const char *Opcode;
+    static const GPREmitterRegOp Emitter;
+  };
+
+  template <typename InstX86Base::InstKindX86 K>
+  class InstX86BaseBinopRMW : public InstX86Base {
+    InstX86BaseBinopRMW() = delete;
+    InstX86BaseBinopRMW(const InstX86BaseBinopRMW &) = delete;
+    InstX86BaseBinopRMW &operator=(const InstX86BaseBinopRMW &) = delete;
+
+  public:
+    using Base = InstX86BaseBinopRMW<K>;
+
+    void emit(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      this->emitTwoAddress(Func, Opcode);
+    }
+    void emitIAS(const Cfg *Func) const override {
+      Type Ty = this->getSrc(0)->getType();
+      assert(this->getSrcSize() == 2);
+      emitIASAsAddrOpTyGPR(Func, Ty, this->getSrc(0), this->getSrc(1), Emitter);
+    }
+    void dump(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrDump();
+      Str << Opcode << "." << this->getSrc(0)->getType() << " ";
+      this->dumpSources(Func);
+    }
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::K);
+    }
+
+  protected:
+    InstX86BaseBinopRMW(Cfg *Func, X86OperandMem *DestSrc0, Operand *Src1)
+        : InstX86Base(Func, K, 2, nullptr) {
+      this->addSource(DestSrc0);
+      this->addSource(Src1);
+    }
+
+    static const char *Opcode;
+    static const GPREmitterAddrOp Emitter;
+  };
+
+  template <typename InstX86Base::InstKindX86 K, bool NeedsElementType,
+            typename InstX86Base::SseSuffix Suffix>
+  class InstX86BaseBinopXmm : public InstX86Base {
+    InstX86BaseBinopXmm() = delete;
+    InstX86BaseBinopXmm(const InstX86BaseBinopXmm &) = delete;
+    InstX86BaseBinopXmm &operator=(const InstX86BaseBinopXmm &) = delete;
+
+  public:
+    using Base = InstX86BaseBinopXmm<K, NeedsElementType, Suffix>;
+
+    void emit(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      this->validateVectorAddrMode();
+      switch (Suffix) {
+      case InstX86Base::SseSuffix::None:
+        this->emitTwoAddress(Func, Opcode);
+        break;
+      case InstX86Base::SseSuffix::Packed: {
+        const Type DestTy = this->getDest()->getType();
+        this->emitTwoAddress(Func, this->Opcode,
+                             Traits::TypeAttributes[DestTy].PdPsString);
+      } break;
+      case InstX86Base::SseSuffix::Scalar: {
+        const Type DestTy = this->getDest()->getType();
+        this->emitTwoAddress(Func, this->Opcode,
+                             Traits::TypeAttributes[DestTy].SdSsString);
+      } break;
+      case InstX86Base::SseSuffix::Integral: {
+        const Type DestTy = this->getDest()->getType();
+        this->emitTwoAddress(Func, this->Opcode,
+                             Traits::TypeAttributes[DestTy].PackString);
+      } break;
+      }
+    }
+    void emitIAS(const Cfg *Func) const override {
+      this->validateVectorAddrMode();
+      Type Ty = this->getDest()->getType();
+      if (NeedsElementType)
+        Ty = typeElementType(Ty);
+      assert(this->getSrcSize() == 2);
+      emitIASRegOpTyXMM(Func, Ty, this->getDest(), this->getSrc(1), Emitter);
+    }
+    void dump(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrDump();
+      this->dumpDest(Func);
+      Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+      this->dumpSources(Func);
+    }
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::K);
+    }
+
+  protected:
+    InstX86BaseBinopXmm(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86Base(Func, K, 2, Dest) {
+      this->addSource(Dest);
+      this->addSource(Source);
+    }
+
+    static const char *Opcode;
+    static const XmmEmitterRegOp Emitter;
+  };
+
+  template <typename InstX86Base::InstKindX86 K, bool AllowAllTypes = false>
+  class InstX86BaseBinopXmmShift : public InstX86Base {
+    InstX86BaseBinopXmmShift() = delete;
+    InstX86BaseBinopXmmShift(const InstX86BaseBinopXmmShift &) = delete;
+    InstX86BaseBinopXmmShift &
+    operator=(const InstX86BaseBinopXmmShift &) = delete;
+
+  public:
+    using Base = InstX86BaseBinopXmmShift<K, AllowAllTypes>;
+
+    void emit(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      this->validateVectorAddrMode();
+      // Shift operations are always integral, and hence always need a suffix.
+      const Type DestTy = this->getDest()->getType();
+      this->emitTwoAddress(Func, this->Opcode,
+                           Traits::TypeAttributes[DestTy].PackString);
+    }
+    void emitIAS(const Cfg *Func) const override {
+      this->validateVectorAddrMode();
+      Type Ty = this->getDest()->getType();
+      assert(AllowAllTypes || isVectorType(Ty));
+      Type ElementTy = typeElementType(Ty);
+      assert(this->getSrcSize() == 2);
+      emitIASXmmShift(Func, ElementTy, this->getDest(), this->getSrc(1),
+                      Emitter);
+    }
+    void dump(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrDump();
+      this->dumpDest(Func);
+      Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+      this->dumpSources(Func);
+    }
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::K);
+    }
+
+  protected:
+    InstX86BaseBinopXmmShift(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86Base(Func, K, 2, Dest) {
+      this->addSource(Dest);
+      this->addSource(Source);
+    }
+
+    static const char *Opcode;
+    static const XmmEmitterShiftOp Emitter;
+  };
+
+  template <typename InstX86Base::InstKindX86 K>
+  class InstX86BaseTernop : public InstX86Base {
+    InstX86BaseTernop() = delete;
+    InstX86BaseTernop(const InstX86BaseTernop &) = delete;
+    InstX86BaseTernop &operator=(const InstX86BaseTernop &) = delete;
+
+  public:
+    using Base = InstX86BaseTernop<K>;
+
+    void emit(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrEmit();
+      assert(this->getSrcSize() == 3);
+      Str << "\t" << Opcode << "\t";
+      this->getSrc(2)->emit(Func);
+      Str << ", ";
+      this->getSrc(1)->emit(Func);
+      Str << ", ";
+      this->getDest()->emit(Func);
+    }
+    void dump(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrDump();
+      this->dumpDest(Func);
+      Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+      this->dumpSources(Func);
+    }
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::K);
+    }
+
+  protected:
+    InstX86BaseTernop(Cfg *Func, Variable *Dest, Operand *Source1,
+                      Operand *Source2)
+        : InstX86Base(Func, K, 3, Dest) {
+      this->addSource(Dest);
+      this->addSource(Source1);
+      this->addSource(Source2);
+    }
+
+    static const char *Opcode;
+  };
+
+  // Instructions of the form x := y op z
+  template <typename InstX86Base::InstKindX86 K>
+  class InstX86BaseThreeAddressop : public InstX86Base {
+    InstX86BaseThreeAddressop() = delete;
+    InstX86BaseThreeAddressop(const InstX86BaseThreeAddressop &) = delete;
+    InstX86BaseThreeAddressop &
+    operator=(const InstX86BaseThreeAddressop &) = delete;
+
+  public:
+    using Base = InstX86BaseThreeAddressop<K>;
+
+    void emit(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrEmit();
+      assert(this->getSrcSize() == 2);
+      Str << "\t" << Opcode << "\t";
+      this->getSrc(1)->emit(Func);
+      Str << ", ";
+      this->getSrc(0)->emit(Func);
+      Str << ", ";
+      this->getDest()->emit(Func);
+    }
+    void dump(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrDump();
+      this->dumpDest(Func);
+      Str << " = " << Opcode << "." << this->getDest()->getType() << " ";
+      this->dumpSources(Func);
+    }
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::K);
+    }
+
+  protected:
+    InstX86BaseThreeAddressop(Cfg *Func, Variable *Dest, Operand *Source0,
+                              Operand *Source1)
+        : InstX86Base(Func, K, 2, Dest) {
+      this->addSource(Source0);
+      this->addSource(Source1);
+    }
+
+    static const char *Opcode;
+  };
+
+  /// Base class for assignment instructions
+  template <typename InstX86Base::InstKindX86 K>
+  class InstX86BaseMovlike : public InstX86Base {
+    InstX86BaseMovlike() = delete;
+    InstX86BaseMovlike(const InstX86BaseMovlike &) = delete;
+    InstX86BaseMovlike &operator=(const InstX86BaseMovlike &) = delete;
+
+  public:
+    using Base = InstX86BaseMovlike<K>;
+
+    bool isRedundantAssign() const override {
+      if (const auto *SrcVar =
+              llvm::dyn_cast<const Variable>(this->getSrc(0))) {
+        if (SrcVar->hasReg() && this->Dest->hasReg()) {
+          // An assignment between physical registers is considered redundant if
+          // they have the same base register and the same encoding. E.g.:
+          //   mov cl, ecx ==> redundant
+          //   mov ch, ecx ==> not redundant due to different encodings
+          //   mov ch, ebp ==> not redundant due to different base registers
+          //   mov ecx, ecx ==> redundant, and dangerous in x86-64. i64 zexting
+          //                    is handled by Inst86Zext.
+          const int32_t SrcReg = SrcVar->getRegNum();
+          const int32_t DestReg = this->Dest->getRegNum();
+          return (Traits::getEncoding(SrcReg) ==
+                  Traits::getEncoding(DestReg)) &&
+                 (Traits::getBaseReg(SrcReg) == Traits::getBaseReg(DestReg));
+        }
+      }
+      return checkForRedundantAssign(this->getDest(), this->getSrc(0));
+    }
+    bool isVarAssign() const override {
+      return llvm::isa<Variable>(this->getSrc(0));
+    }
+    void dump(const Cfg *Func) const override {
+      if (!BuildDefs::dump())
+        return;
+      Ostream &Str = Func->getContext()->getStrDump();
+      Str << Opcode << "." << this->getDest()->getType() << " ";
+      this->dumpDest(Func);
+      Str << ", ";
+      this->dumpSources(Func);
+    }
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::K);
+    }
+
+  protected:
+    InstX86BaseMovlike(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86Base(Func, K, 1, Dest) {
+      this->addSource(Source);
+      // For an integer assignment, make sure it's either a same-type assignment
+      // or a truncation.
+      assert(!isScalarIntegerType(Dest->getType()) ||
+             (typeWidthInBytes(Dest->getType()) <=
+              typeWidthInBytes(Source->getType())));
+    }
+
+    static const char *Opcode;
+  };
+
+  class InstX86Bswap : public InstX86BaseInplaceopGPR<InstX86Base::Bswap> {
+  public:
+    static InstX86Bswap *create(Cfg *Func, Operand *SrcDest) {
+      return new (Func->allocate<InstX86Bswap>()) InstX86Bswap(Func, SrcDest);
+    }
+
+  private:
+    InstX86Bswap(Cfg *Func, Operand *SrcDest)
+        : InstX86BaseInplaceopGPR<InstX86Base::Bswap>(Func, SrcDest) {}
+  };
+
+  class InstX86Neg : public InstX86BaseInplaceopGPR<InstX86Base::Neg> {
+  public:
+    static InstX86Neg *create(Cfg *Func, Operand *SrcDest) {
+      return new (Func->allocate<InstX86Neg>()) InstX86Neg(Func, SrcDest);
+    }
+
+  private:
+    InstX86Neg(Cfg *Func, Operand *SrcDest)
+        : InstX86BaseInplaceopGPR<InstX86Base::Neg>(Func, SrcDest) {}
+  };
+
+  class InstX86Bsf : public InstX86BaseUnaryopGPR<InstX86Base::Bsf> {
+  public:
+    static InstX86Bsf *create(Cfg *Func, Variable *Dest, Operand *Src) {
+      return new (Func->allocate<InstX86Bsf>()) InstX86Bsf(Func, Dest, Src);
+    }
+
+  private:
+    InstX86Bsf(Cfg *Func, Variable *Dest, Operand *Src)
+        : InstX86BaseUnaryopGPR<InstX86Base::Bsf>(Func, Dest, Src) {}
+  };
+
+  class InstX86Bsr : public InstX86BaseUnaryopGPR<InstX86Base::Bsr> {
+  public:
+    static InstX86Bsr *create(Cfg *Func, Variable *Dest, Operand *Src) {
+      return new (Func->allocate<InstX86Bsr>()) InstX86Bsr(Func, Dest, Src);
+    }
+
+  private:
+    InstX86Bsr(Cfg *Func, Variable *Dest, Operand *Src)
+        : InstX86BaseUnaryopGPR<InstX86Base::Bsr>(Func, Dest, Src) {}
+  };
+
+  class InstX86Lea : public InstX86BaseUnaryopGPR<InstX86Base::Lea> {
+  public:
+    static InstX86Lea *create(Cfg *Func, Variable *Dest, Operand *Src) {
+      return new (Func->allocate<InstX86Lea>()) InstX86Lea(Func, Dest, Src);
+    }
+
+    void emit(const Cfg *Func) const override;
+
+  private:
+    InstX86Lea(Cfg *Func, Variable *Dest, Operand *Src)
+        : InstX86BaseUnaryopGPR<InstX86Base::Lea>(Func, Dest, Src) {}
+  };
+
+  // Cbwdq instruction - wrapper for cbw, cwd, and cdq
+  class InstX86Cbwdq : public InstX86BaseUnaryopGPR<InstX86Base::Cbwdq> {
+  public:
+    static InstX86Cbwdq *create(Cfg *Func, Variable *Dest, Operand *Src) {
+      return new (Func->allocate<InstX86Cbwdq>()) InstX86Cbwdq(Func, Dest, Src);
+    }
+
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Cbwdq(Cfg *Func, Variable *Dest, Operand *Src)
+        : InstX86BaseUnaryopGPR<InstX86Base::Cbwdq>(Func, Dest, Src) {}
+  };
+
+  class InstX86Movsx : public InstX86BaseUnaryopGPR<InstX86Base::Movsx> {
+  public:
+    static InstX86Movsx *create(Cfg *Func, Variable *Dest, Operand *Src) {
+      assert(typeWidthInBytes(Dest->getType()) >
+             typeWidthInBytes(Src->getType()));
+      return new (Func->allocate<InstX86Movsx>()) InstX86Movsx(Func, Dest, Src);
+    }
+
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Movsx(Cfg *Func, Variable *Dest, Operand *Src)
+        : InstX86BaseUnaryopGPR<InstX86Base::Movsx>(Func, Dest, Src) {}
+  };
+
+  class InstX86Movzx : public InstX86BaseUnaryopGPR<InstX86Base::Movzx> {
+  public:
+    static InstX86Movzx *create(Cfg *Func, Variable *Dest, Operand *Src) {
+      assert(typeWidthInBytes(Dest->getType()) >
+             typeWidthInBytes(Src->getType()));
+      return new (Func->allocate<InstX86Movzx>()) InstX86Movzx(Func, Dest, Src);
+    }
+
+    void emit(const Cfg *Func) const override;
+
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Movzx(Cfg *Func, Variable *Dest, Operand *Src)
+        : InstX86BaseUnaryopGPR<InstX86Base::Movzx>(Func, Dest, Src) {}
+  };
+
+  class InstX86Movd : public InstX86BaseUnaryopXmm<InstX86Base::Movd> {
+  public:
+    static InstX86Movd *create(Cfg *Func, Variable *Dest, Operand *Src) {
+      return new (Func->allocate<InstX86Movd>()) InstX86Movd(Func, Dest, Src);
+    }
+
+    void emit(const Cfg *Func) const override;
+
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Movd(Cfg *Func, Variable *Dest, Operand *Src)
+        : InstX86BaseUnaryopXmm<InstX86Base::Movd>(Func, Dest, Src) {}
+  };
+
+  class InstX86Sqrtss : public InstX86BaseUnaryopXmm<InstX86Base::Sqrtss> {
+  public:
+    static InstX86Sqrtss *create(Cfg *Func, Variable *Dest, Operand *Src) {
+      return new (Func->allocate<InstX86Sqrtss>())
+          InstX86Sqrtss(Func, Dest, Src);
+    }
+
+    virtual void emit(const Cfg *Func) const override;
+
+  private:
+    InstX86Sqrtss(Cfg *Func, Variable *Dest, Operand *Src)
+        : InstX86BaseUnaryopXmm<InstX86Base::Sqrtss>(Func, Dest, Src) {}
+  };
+
+  /// Move/assignment instruction - wrapper for mov/movss/movsd.
+  class InstX86Mov : public InstX86BaseMovlike<InstX86Base::Mov> {
+  public:
+    static InstX86Mov *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      assert(!isScalarIntegerType(Dest->getType()) ||
+             (typeWidthInBytes(Dest->getType()) <=
+              typeWidthInBytes(Source->getType())));
+      return new (Func->allocate<InstX86Mov>()) InstX86Mov(Func, Dest, Source);
+    }
+
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Mov(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseMovlike<InstX86Base::Mov>(Func, Dest, Source) {}
+  };
+
+  /// Move packed - copy 128 bit values between XMM registers, or mem128 and XMM
+  /// registers.
+  class InstX86Movp : public InstX86BaseMovlike<InstX86Base::Movp> {
+  public:
+    static InstX86Movp *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Movp>())
+          InstX86Movp(Func, Dest, Source);
+    }
+
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Movp(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseMovlike<InstX86Base::Movp>(Func, Dest, Source) {}
+  };
+
+  /// Movq - copy between XMM registers, or mem64 and XMM registers.
+  class InstX86Movq : public InstX86BaseMovlike<InstX86Base::Movq> {
+  public:
+    static InstX86Movq *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Movq>())
+          InstX86Movq(Func, Dest, Source);
+    }
+
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Movq(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseMovlike<InstX86Base::Movq>(Func, Dest, Source) {}
+  };
+
+  class InstX86Add : public InstX86BaseBinopGPR<InstX86Base::Add> {
+  public:
+    static InstX86Add *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Add>()) InstX86Add(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Add(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPR<InstX86Base::Add>(Func, Dest, Source) {}
+  };
+
+  class InstX86AddRMW : public InstX86BaseBinopRMW<InstX86Base::AddRMW> {
+  public:
+    static InstX86AddRMW *create(Cfg *Func, X86OperandMem *DestSrc0,
+                                 Operand *Src1) {
+      return new (Func->allocate<InstX86AddRMW>())
+          InstX86AddRMW(Func, DestSrc0, Src1);
+    }
+
+  private:
+    InstX86AddRMW(Cfg *Func, X86OperandMem *DestSrc0, Operand *Src1)
+        : InstX86BaseBinopRMW<InstX86Base::AddRMW>(Func, DestSrc0, Src1) {}
+  };
+
+  class InstX86Addps
+      : public InstX86BaseBinopXmm<InstX86Base::Addps, true,
+                                   InstX86Base::SseSuffix::Packed> {
+  public:
+    static InstX86Addps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Addps>())
+          InstX86Addps(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Addps(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Addps, true,
+                              InstX86Base::SseSuffix::Packed>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86Adc : public InstX86BaseBinopGPR<InstX86Base::Adc> {
+  public:
+    static InstX86Adc *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Adc>()) InstX86Adc(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Adc(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPR<InstX86Base::Adc>(Func, Dest, Source) {}
+  };
+
+  class InstX86AdcRMW : public InstX86BaseBinopRMW<InstX86Base::AdcRMW> {
+  public:
+    static InstX86AdcRMW *create(Cfg *Func, X86OperandMem *DestSrc0,
+                                 Operand *Src1) {
+      return new (Func->allocate<InstX86AdcRMW>())
+          InstX86AdcRMW(Func, DestSrc0, Src1);
+    }
+
+  private:
+    InstX86AdcRMW(Cfg *Func, X86OperandMem *DestSrc0, Operand *Src1)
+        : InstX86BaseBinopRMW<InstX86Base::AdcRMW>(Func, DestSrc0, Src1) {}
+  };
+
+  class InstX86Addss
+      : public InstX86BaseBinopXmm<InstX86Base::Addss, false,
+                                   InstX86Base::SseSuffix::Scalar> {
+  public:
+    static InstX86Addss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Addss>())
+          InstX86Addss(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Addss(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Addss, false,
+                              InstX86Base::SseSuffix::Scalar>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86Padd
+      : public InstX86BaseBinopXmm<InstX86Base::Padd, true,
+                                   InstX86Base::SseSuffix::Integral> {
+  public:
+    static InstX86Padd *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Padd>())
+          InstX86Padd(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Padd(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Padd, true,
+                              InstX86Base::SseSuffix::Integral>(Func, Dest,
                                                                 Source) {}
-};
+  };
 
-/// Movq - copy between XMM registers, or mem64 and XMM registers.
-template <class Machine>
-class InstX86Movq
-    : public InstX86BaseMovlike<Machine, InstX86Base<Machine>::Movq> {
-public:
-  static InstX86Movq *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Movq>()) InstX86Movq(Func, Dest, Source);
-  }
+  class InstX86Sub : public InstX86BaseBinopGPR<InstX86Base::Sub> {
+  public:
+    static InstX86Sub *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Sub>()) InstX86Sub(Func, Dest, Source);
+    }
 
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
+  private:
+    InstX86Sub(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPR<InstX86Base::Sub>(Func, Dest, Source) {}
+  };
 
-private:
-  InstX86Movq(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseMovlike<Machine, InstX86Base<Machine>::Movq>(Func, Dest,
+  class InstX86SubRMW : public InstX86BaseBinopRMW<InstX86Base::SubRMW> {
+  public:
+    static InstX86SubRMW *create(Cfg *Func, X86OperandMem *DestSrc0,
+                                 Operand *Src1) {
+      return new (Func->allocate<InstX86SubRMW>())
+          InstX86SubRMW(Func, DestSrc0, Src1);
+    }
+
+  private:
+    InstX86SubRMW(Cfg *Func, X86OperandMem *DestSrc0, Operand *Src1)
+        : InstX86BaseBinopRMW<InstX86Base::SubRMW>(Func, DestSrc0, Src1) {}
+  };
+
+  class InstX86Subps
+      : public InstX86BaseBinopXmm<InstX86Base::Subps, true,
+                                   InstX86Base::SseSuffix::Packed> {
+  public:
+    static InstX86Subps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Subps>())
+          InstX86Subps(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Subps(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Subps, true,
+                              InstX86Base::SseSuffix::Packed>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86Subss
+      : public InstX86BaseBinopXmm<InstX86Base::Subss, false,
+                                   InstX86Base::SseSuffix::Scalar> {
+  public:
+    static InstX86Subss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Subss>())
+          InstX86Subss(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Subss(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Subss, false,
+                              InstX86Base::SseSuffix::Scalar>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86Sbb : public InstX86BaseBinopGPR<InstX86Base::Sbb> {
+  public:
+    static InstX86Sbb *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Sbb>()) InstX86Sbb(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Sbb(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPR<InstX86Base::Sbb>(Func, Dest, Source) {}
+  };
+
+  class InstX86SbbRMW : public InstX86BaseBinopRMW<InstX86Base::SbbRMW> {
+  public:
+    static InstX86SbbRMW *create(Cfg *Func, X86OperandMem *DestSrc0,
+                                 Operand *Src1) {
+      return new (Func->allocate<InstX86SbbRMW>())
+          InstX86SbbRMW(Func, DestSrc0, Src1);
+    }
+
+  private:
+    InstX86SbbRMW(Cfg *Func, X86OperandMem *DestSrc0, Operand *Src1)
+        : InstX86BaseBinopRMW<InstX86Base::SbbRMW>(Func, DestSrc0, Src1) {}
+  };
+
+  class InstX86Psub
+      : public InstX86BaseBinopXmm<InstX86Base::Psub, true,
+                                   InstX86Base::SseSuffix::Integral> {
+  public:
+    static InstX86Psub *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Psub>())
+          InstX86Psub(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Psub(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Psub, true,
+                              InstX86Base::SseSuffix::Integral>(Func, Dest,
                                                                 Source) {}
-};
+  };
 
-template <class Machine>
-class InstX86Add
-    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Add> {
-public:
-  static InstX86Add *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Add>()) InstX86Add(Func, Dest, Source);
-  }
+  class InstX86And : public InstX86BaseBinopGPR<InstX86Base::And> {
+  public:
+    static InstX86And *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86And>()) InstX86And(Func, Dest, Source);
+    }
 
-private:
-  InstX86Add(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Add>(Func, Dest,
+  private:
+    InstX86And(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPR<InstX86Base::And>(Func, Dest, Source) {}
+  };
+
+  class InstX86Andnps
+      : public InstX86BaseBinopXmm<InstX86Base::Andnps, true,
+                                   InstX86Base::SseSuffix::Packed> {
+  public:
+    static InstX86Andnps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Andnps>())
+          InstX86Andnps(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Andnps(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Andnps, true,
+                              InstX86Base::SseSuffix::Packed>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86Andps
+      : public InstX86BaseBinopXmm<InstX86Base::Andps, true,
+                                   InstX86Base::SseSuffix::Packed> {
+  public:
+    static InstX86Andps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Andps>())
+          InstX86Andps(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Andps(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Andps, true,
+                              InstX86Base::SseSuffix::Packed>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86AndRMW : public InstX86BaseBinopRMW<InstX86Base::AndRMW> {
+  public:
+    static InstX86AndRMW *create(Cfg *Func, X86OperandMem *DestSrc0,
+                                 Operand *Src1) {
+      return new (Func->allocate<InstX86AndRMW>())
+          InstX86AndRMW(Func, DestSrc0, Src1);
+    }
+
+  private:
+    InstX86AndRMW(Cfg *Func, X86OperandMem *DestSrc0, Operand *Src1)
+        : InstX86BaseBinopRMW<InstX86Base::AndRMW>(Func, DestSrc0, Src1) {}
+  };
+
+  class InstX86Pand : public InstX86BaseBinopXmm<InstX86Base::Pand, false,
+                                                 InstX86Base::SseSuffix::None> {
+  public:
+    static InstX86Pand *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Pand>())
+          InstX86Pand(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Pand(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Pand, false,
+                              InstX86Base::SseSuffix::None>(Func, Dest,
+                                                            Source) {}
+  };
+
+  class InstX86Pandn
+      : public InstX86BaseBinopXmm<InstX86Base::Pandn, false,
+                                   InstX86Base::SseSuffix::None> {
+  public:
+    static InstX86Pandn *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Pandn>())
+          InstX86Pandn(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Pandn(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Pandn, false,
+                              InstX86Base::SseSuffix::None>(Func, Dest,
+                                                            Source) {}
+  };
+
+  class InstX86Maxss
+      : public InstX86BaseBinopXmm<InstX86Base::Maxss, true,
+                                   InstX86Base::SseSuffix::Scalar> {
+  public:
+    static InstX86Maxss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Maxss>())
+          InstX86Maxss(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Maxss(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Maxss, true,
+                              InstX86Base::SseSuffix::Scalar>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86Minss
+      : public InstX86BaseBinopXmm<InstX86Base::Minss, true,
+                                   InstX86Base::SseSuffix::Scalar> {
+  public:
+    static InstX86Minss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Minss>())
+          InstX86Minss(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Minss(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Minss, true,
+                              InstX86Base::SseSuffix::Scalar>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86Or : public InstX86BaseBinopGPR<InstX86Base::Or> {
+  public:
+    static InstX86Or *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Or>()) InstX86Or(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Or(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPR<InstX86Base::Or>(Func, Dest, Source) {}
+  };
+
+  class InstX86Orps
+      : public InstX86BaseBinopXmm<InstX86Base::Orps, true,
+                                   InstX86Base::SseSuffix::Packed> {
+  public:
+    static InstX86Orps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Orps>())
+          InstX86Orps(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Orps(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Orps, true,
+                              InstX86Base::SseSuffix::Packed>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86OrRMW : public InstX86BaseBinopRMW<InstX86Base::OrRMW> {
+  public:
+    static InstX86OrRMW *create(Cfg *Func, X86OperandMem *DestSrc0,
+                                Operand *Src1) {
+      return new (Func->allocate<InstX86OrRMW>())
+          InstX86OrRMW(Func, DestSrc0, Src1);
+    }
+
+  private:
+    InstX86OrRMW(Cfg *Func, X86OperandMem *DestSrc0, Operand *Src1)
+        : InstX86BaseBinopRMW<InstX86Base::OrRMW>(Func, DestSrc0, Src1) {}
+  };
+
+  class InstX86Por : public InstX86BaseBinopXmm<InstX86Base::Por, false,
+                                                InstX86Base::SseSuffix::None> {
+  public:
+    static InstX86Por *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Por>()) InstX86Por(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Por(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Por, false,
+                              InstX86Base::SseSuffix::None>(Func, Dest,
+                                                            Source) {}
+  };
+
+  class InstX86Xor : public InstX86BaseBinopGPR<InstX86Base::Xor> {
+  public:
+    static InstX86Xor *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Xor>()) InstX86Xor(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Xor(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPR<InstX86Base::Xor>(Func, Dest, Source) {}
+  };
+
+  class InstX86Xorps
+      : public InstX86BaseBinopXmm<InstX86Base::Xorps, true,
+                                   InstX86Base::SseSuffix::Packed> {
+  public:
+    static InstX86Xorps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Xorps>())
+          InstX86Xorps(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Xorps(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Xorps, true,
+                              InstX86Base::SseSuffix::Packed>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86XorRMW : public InstX86BaseBinopRMW<InstX86Base::XorRMW> {
+  public:
+    static InstX86XorRMW *create(Cfg *Func, X86OperandMem *DestSrc0,
+                                 Operand *Src1) {
+      return new (Func->allocate<InstX86XorRMW>())
+          InstX86XorRMW(Func, DestSrc0, Src1);
+    }
+
+  private:
+    InstX86XorRMW(Cfg *Func, X86OperandMem *DestSrc0, Operand *Src1)
+        : InstX86BaseBinopRMW<InstX86Base::XorRMW>(Func, DestSrc0, Src1) {}
+  };
+
+  class InstX86Pxor : public InstX86BaseBinopXmm<InstX86Base::Pxor, false,
+                                                 InstX86Base::SseSuffix::None> {
+  public:
+    static InstX86Pxor *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Pxor>())
+          InstX86Pxor(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Pxor(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Pxor, false,
+                              InstX86Base::SseSuffix::None>(Func, Dest,
+                                                            Source) {}
+  };
+
+  class InstX86Imul : public InstX86BaseBinopGPR<InstX86Base::Imul> {
+  public:
+    static InstX86Imul *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Imul>())
+          InstX86Imul(Func, Dest, Source);
+    }
+
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Imul(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPR<InstX86Base::Imul>(Func, Dest, Source) {}
+  };
+
+  class InstX86ImulImm
+      : public InstX86BaseThreeAddressop<InstX86Base::ImulImm> {
+  public:
+    static InstX86ImulImm *create(Cfg *Func, Variable *Dest, Operand *Source0,
+                                  Operand *Source1) {
+      return new (Func->allocate<InstX86ImulImm>())
+          InstX86ImulImm(Func, Dest, Source0, Source1);
+    }
+
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86ImulImm(Cfg *Func, Variable *Dest, Operand *Source0,
+                   Operand *Source1)
+        : InstX86BaseThreeAddressop<InstX86Base::ImulImm>(Func, Dest, Source0,
+                                                          Source1) {}
+  };
+
+  class InstX86Mulps
+      : public InstX86BaseBinopXmm<InstX86Base::Mulps, true,
+                                   InstX86Base::SseSuffix::Packed> {
+  public:
+    static InstX86Mulps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Mulps>())
+          InstX86Mulps(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Mulps(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Mulps, true,
+                              InstX86Base::SseSuffix::Packed>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86Mulss
+      : public InstX86BaseBinopXmm<InstX86Base::Mulss, false,
+                                   InstX86Base::SseSuffix::Scalar> {
+  public:
+    static InstX86Mulss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Mulss>())
+          InstX86Mulss(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Mulss(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Mulss, false,
+                              InstX86Base::SseSuffix::Scalar>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86Pmull
+      : public InstX86BaseBinopXmm<InstX86Base::Pmull, true,
+                                   InstX86Base::SseSuffix::Integral> {
+  public:
+    static InstX86Pmull *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      bool TypesAreValid =
+          Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
+      auto *Target = InstX86Base::getTarget(Func);
+      bool InstructionSetIsValid =
+          Dest->getType() == IceType_v8i16 ||
+          Target->getInstructionSet() >= Traits::SSE4_1;
+      (void)TypesAreValid;
+      (void)InstructionSetIsValid;
+      assert(TypesAreValid);
+      assert(InstructionSetIsValid);
+      return new (Func->allocate<InstX86Pmull>())
+          InstX86Pmull(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Pmull(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Pmull, true,
+                              InstX86Base::SseSuffix::Integral>(Func, Dest,
                                                                 Source) {}
-};
+  };
 
-template <class Machine>
-class InstX86AddRMW
-    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AddRMW> {
-public:
-  static InstX86AddRMW *
-  create(Cfg *Func,
-         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-         Operand *Src1) {
-    return new (Func->allocate<InstX86AddRMW>())
-        InstX86AddRMW(Func, DestSrc0, Src1);
-  }
+  class InstX86Pmuludq
+      : public InstX86BaseBinopXmm<InstX86Base::Pmuludq, false,
+                                   InstX86Base::SseSuffix::None> {
+  public:
+    static InstX86Pmuludq *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      assert(Dest->getType() == IceType_v4i32 &&
+             Source->getType() == IceType_v4i32);
+      return new (Func->allocate<InstX86Pmuludq>())
+          InstX86Pmuludq(Func, Dest, Source);
+    }
 
-private:
-  InstX86AddRMW(Cfg *Func,
-                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-                Operand *Src1)
-      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AddRMW>(
-            Func, DestSrc0, Src1) {}
-};
+  private:
+    InstX86Pmuludq(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Pmuludq, false,
+                              InstX86Base::SseSuffix::None>(Func, Dest,
+                                                            Source) {}
+  };
 
-template <class Machine>
-class InstX86Addps
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Addps, true,
-                                 InstX86Base<Machine>::SseSuffix::Packed> {
-public:
-  static InstX86Addps *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Addps>())
-        InstX86Addps(Func, Dest, Source);
-  }
+  class InstX86Divps
+      : public InstX86BaseBinopXmm<InstX86Base::Divps, true,
+                                   InstX86Base::SseSuffix::Packed> {
+  public:
+    static InstX86Divps *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Divps>())
+          InstX86Divps(Func, Dest, Source);
+    }
 
-private:
-  InstX86Addps(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Addps, true,
-                            InstX86Base<Machine>::SseSuffix::Packed>(Func, Dest,
-                                                                     Source) {}
-};
+  private:
+    InstX86Divps(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Divps, true,
+                              InstX86Base::SseSuffix::Packed>(Func, Dest,
+                                                              Source) {}
+  };
 
-template <class Machine>
-class InstX86Adc
-    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Adc> {
-public:
-  static InstX86Adc *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Adc>()) InstX86Adc(Func, Dest, Source);
-  }
+  class InstX86Divss
+      : public InstX86BaseBinopXmm<InstX86Base::Divss, false,
+                                   InstX86Base::SseSuffix::Scalar> {
+  public:
+    static InstX86Divss *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Divss>())
+          InstX86Divss(Func, Dest, Source);
+    }
 
-private:
-  InstX86Adc(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Adc>(Func, Dest,
+  private:
+    InstX86Divss(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Divss, false,
+                              InstX86Base::SseSuffix::Scalar>(Func, Dest,
+                                                              Source) {}
+  };
+
+  class InstX86Rol : public InstX86BaseBinopGPRShift<InstX86Base::Rol> {
+  public:
+    static InstX86Rol *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Rol>()) InstX86Rol(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Rol(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPRShift<InstX86Base::Rol>(Func, Dest, Source) {}
+  };
+
+  class InstX86Shl : public InstX86BaseBinopGPRShift<InstX86Base::Shl> {
+  public:
+    static InstX86Shl *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Shl>()) InstX86Shl(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Shl(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPRShift<InstX86Base::Shl>(Func, Dest, Source) {}
+  };
+
+  class InstX86Psll : public InstX86BaseBinopXmmShift<InstX86Base::Psll> {
+  public:
+    static InstX86Psll *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      assert(
+          Dest->getType() == IceType_v8i16 || Dest->getType() == IceType_v8i1 ||
+          Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v4i1);
+      return new (Func->allocate<InstX86Psll>())
+          InstX86Psll(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Psll(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmmShift<InstX86Base::Psll>(Func, Dest, Source) {}
+  };
+
+  class InstX86Psrl : public InstX86BaseBinopXmmShift<InstX86Base::Psrl, true> {
+  public:
+    static InstX86Psrl *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Psrl>())
+          InstX86Psrl(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Psrl(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmmShift<InstX86Base::Psrl, true>(Func, Dest,
+                                                            Source) {}
+  };
+
+  class InstX86Shr : public InstX86BaseBinopGPRShift<InstX86Base::Shr> {
+  public:
+    static InstX86Shr *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Shr>()) InstX86Shr(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Shr(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPRShift<InstX86Base::Shr>(Func, Dest, Source) {}
+  };
+
+  class InstX86Sar : public InstX86BaseBinopGPRShift<InstX86Base::Sar> {
+  public:
+    static InstX86Sar *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Sar>()) InstX86Sar(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Sar(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopGPRShift<InstX86Base::Sar>(Func, Dest, Source) {}
+  };
+
+  class InstX86Psra : public InstX86BaseBinopXmmShift<InstX86Base::Psra> {
+  public:
+    static InstX86Psra *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      assert(
+          Dest->getType() == IceType_v8i16 || Dest->getType() == IceType_v8i1 ||
+          Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v4i1);
+      return new (Func->allocate<InstX86Psra>())
+          InstX86Psra(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Psra(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmmShift<InstX86Base::Psra>(Func, Dest, Source) {}
+  };
+
+  class InstX86Pcmpeq
+      : public InstX86BaseBinopXmm<InstX86Base::Pcmpeq, true,
+                                   InstX86Base::SseSuffix::Integral> {
+  public:
+    static InstX86Pcmpeq *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Pcmpeq>())
+          InstX86Pcmpeq(Func, Dest, Source);
+    }
+
+  private:
+    InstX86Pcmpeq(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Pcmpeq, true,
+                              InstX86Base::SseSuffix::Integral>(Func, Dest,
                                                                 Source) {}
-};
+  };
 
-template <class Machine>
-class InstX86AdcRMW
-    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AdcRMW> {
-public:
-  static InstX86AdcRMW *
-  create(Cfg *Func,
-         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-         Operand *Src1) {
-    return new (Func->allocate<InstX86AdcRMW>())
-        InstX86AdcRMW(Func, DestSrc0, Src1);
-  }
+  class InstX86Pcmpgt
+      : public InstX86BaseBinopXmm<InstX86Base::Pcmpgt, true,
+                                   InstX86Base::SseSuffix::Integral> {
+  public:
+    static InstX86Pcmpgt *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      return new (Func->allocate<InstX86Pcmpgt>())
+          InstX86Pcmpgt(Func, Dest, Source);
+    }
 
-private:
-  InstX86AdcRMW(Cfg *Func,
-                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-                Operand *Src1)
-      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AdcRMW>(
-            Func, DestSrc0, Src1) {}
-};
-
-template <class Machine>
-class InstX86Addss
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Addss, false,
-                                 InstX86Base<Machine>::SseSuffix::Scalar> {
-public:
-  static InstX86Addss *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Addss>())
-        InstX86Addss(Func, Dest, Source);
-  }
-
-private:
-  InstX86Addss(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Addss, false,
-                            InstX86Base<Machine>::SseSuffix::Scalar>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Padd
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Padd, true,
-                                 InstX86Base<Machine>::SseSuffix::Integral> {
-public:
-  static InstX86Padd *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Padd>()) InstX86Padd(Func, Dest, Source);
-  }
-
-private:
-  InstX86Padd(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Padd, true,
-                            InstX86Base<Machine>::SseSuffix::Integral>(
-            Func, Dest, Source) {}
-};
-
-template <class Machine>
-class InstX86Sub
-    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Sub> {
-public:
-  static InstX86Sub *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Sub>()) InstX86Sub(Func, Dest, Source);
-  }
-
-private:
-  InstX86Sub(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Sub>(Func, Dest,
+  private:
+    InstX86Pcmpgt(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::Pcmpgt, true,
+                              InstX86Base::SseSuffix::Integral>(Func, Dest,
                                                                 Source) {}
-};
+  };
 
-template <class Machine>
-class InstX86SubRMW
-    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::SubRMW> {
-public:
-  static InstX86SubRMW *
-  create(Cfg *Func,
-         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-         Operand *Src1) {
-    return new (Func->allocate<InstX86SubRMW>())
-        InstX86SubRMW(Func, DestSrc0, Src1);
-  }
+  /// movss is only a binary operation when the source and dest operands are
+  /// both registers (the high bits of dest are left untouched). In other cases,
+  /// it behaves like a copy (mov-like) operation (and the high bits of dest are
+  /// cleared). InstX86Movss will assert that both its source and dest operands
+  /// are registers, so the lowering code should use _mov instead of _movss in
+  /// cases where a copy operation is intended.
+  class InstX86MovssRegs
+      : public InstX86BaseBinopXmm<InstX86Base::MovssRegs, false,
+                                   InstX86Base::SseSuffix::None> {
+  public:
+    static InstX86MovssRegs *create(Cfg *Func, Variable *Dest,
+                                    Operand *Source) {
+      return new (Func->allocate<InstX86MovssRegs>())
+          InstX86MovssRegs(Func, Dest, Source);
+    }
 
-private:
-  InstX86SubRMW(Cfg *Func,
-                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-                Operand *Src1)
-      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::SubRMW>(
-            Func, DestSrc0, Src1) {}
-};
+    void emitIAS(const Cfg *Func) const override;
 
-template <class Machine>
-class InstX86Subps
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Subps, true,
-                                 InstX86Base<Machine>::SseSuffix::Packed> {
-public:
-  static InstX86Subps *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Subps>())
-        InstX86Subps(Func, Dest, Source);
-  }
+  private:
+    InstX86MovssRegs(Cfg *Func, Variable *Dest, Operand *Source)
+        : InstX86BaseBinopXmm<InstX86Base::MovssRegs, false,
+                              InstX86Base::SseSuffix::None>(Func, Dest,
+                                                            Source) {}
+  };
 
-private:
-  InstX86Subps(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Subps, true,
-                            InstX86Base<Machine>::SseSuffix::Packed>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Subss
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Subss, false,
-                                 InstX86Base<Machine>::SseSuffix::Scalar> {
-public:
-  static InstX86Subss *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Subss>())
-        InstX86Subss(Func, Dest, Source);
-  }
-
-private:
-  InstX86Subss(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Subss, false,
-                            InstX86Base<Machine>::SseSuffix::Scalar>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Sbb
-    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Sbb> {
-public:
-  static InstX86Sbb *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Sbb>()) InstX86Sbb(Func, Dest, Source);
-  }
-
-private:
-  InstX86Sbb(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Sbb>(Func, Dest,
-                                                                Source) {}
-};
-
-template <class Machine>
-class InstX86SbbRMW
-    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::SbbRMW> {
-public:
-  static InstX86SbbRMW *
-  create(Cfg *Func,
-         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-         Operand *Src1) {
-    return new (Func->allocate<InstX86SbbRMW>())
-        InstX86SbbRMW(Func, DestSrc0, Src1);
-  }
-
-private:
-  InstX86SbbRMW(Cfg *Func,
-                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-                Operand *Src1)
-      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::SbbRMW>(
-            Func, DestSrc0, Src1) {}
-};
-
-template <class Machine>
-class InstX86Psub
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Psub, true,
-                                 InstX86Base<Machine>::SseSuffix::Integral> {
-public:
-  static InstX86Psub *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Psub>()) InstX86Psub(Func, Dest, Source);
-  }
-
-private:
-  InstX86Psub(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Psub, true,
-                            InstX86Base<Machine>::SseSuffix::Integral>(
-            Func, Dest, Source) {}
-};
-
-template <class Machine>
-class InstX86And
-    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::And> {
-public:
-  static InstX86And *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86And>()) InstX86And(Func, Dest, Source);
-  }
-
-private:
-  InstX86And(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::And>(Func, Dest,
-                                                                Source) {}
-};
-
-template <class Machine>
-class InstX86Andnps
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Andnps, true,
-                                 InstX86Base<Machine>::SseSuffix::Packed> {
-public:
-  static InstX86Andnps *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Andnps>())
-        InstX86Andnps(Func, Dest, Source);
-  }
-
-private:
-  InstX86Andnps(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Andnps, true,
-                            InstX86Base<Machine>::SseSuffix::Packed>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Andps
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Andps, true,
-                                 InstX86Base<Machine>::SseSuffix::Packed> {
-public:
-  static InstX86Andps *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Andps>())
-        InstX86Andps(Func, Dest, Source);
-  }
-
-private:
-  InstX86Andps(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Andps, true,
-                            InstX86Base<Machine>::SseSuffix::Packed>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86AndRMW
-    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AndRMW> {
-public:
-  static InstX86AndRMW *
-  create(Cfg *Func,
-         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-         Operand *Src1) {
-    return new (Func->allocate<InstX86AndRMW>())
-        InstX86AndRMW(Func, DestSrc0, Src1);
-  }
-
-private:
-  InstX86AndRMW(Cfg *Func,
-                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-                Operand *Src1)
-      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::AndRMW>(
-            Func, DestSrc0, Src1) {}
-};
-
-template <class Machine>
-class InstX86Pand
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pand, false,
-                                 InstX86Base<Machine>::SseSuffix::None> {
-public:
-  static InstX86Pand *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Pand>()) InstX86Pand(Func, Dest, Source);
-  }
-
-private:
-  InstX86Pand(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pand, false,
-                            InstX86Base<Machine>::SseSuffix::None>(Func, Dest,
-                                                                   Source) {}
-};
-
-template <class Machine>
-class InstX86Pandn
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pandn, false,
-                                 InstX86Base<Machine>::SseSuffix::None> {
-public:
-  static InstX86Pandn *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Pandn>())
-        InstX86Pandn(Func, Dest, Source);
-  }
-
-private:
-  InstX86Pandn(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pandn, false,
-                            InstX86Base<Machine>::SseSuffix::None>(Func, Dest,
-                                                                   Source) {}
-};
-
-template <class Machine>
-class InstX86Maxss
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Maxss, true,
-                                 InstX86Base<Machine>::SseSuffix::Scalar> {
-public:
-  static InstX86Maxss *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Maxss>())
-        InstX86Maxss(Func, Dest, Source);
-  }
-
-private:
-  InstX86Maxss(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Maxss, true,
-                            InstX86Base<Machine>::SseSuffix::Scalar>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Minss
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Minss, true,
-                                 InstX86Base<Machine>::SseSuffix::Scalar> {
-public:
-  static InstX86Minss *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Minss>())
-        InstX86Minss(Func, Dest, Source);
-  }
-
-private:
-  InstX86Minss(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Minss, true,
-                            InstX86Base<Machine>::SseSuffix::Scalar>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Or
-    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Or> {
-public:
-  static InstX86Or *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Or>()) InstX86Or(Func, Dest, Source);
-  }
-
-private:
-  InstX86Or(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Or>(Func, Dest,
-                                                               Source) {}
-};
-
-template <class Machine>
-class InstX86Orps
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Orps, true,
-                                 InstX86Base<Machine>::SseSuffix::Packed> {
-public:
-  static InstX86Orps *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Orps>()) InstX86Orps(Func, Dest, Source);
-  }
-
-private:
-  InstX86Orps(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Orps, true,
-                            InstX86Base<Machine>::SseSuffix::Packed>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86OrRMW
-    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::OrRMW> {
-public:
-  static InstX86OrRMW *
-  create(Cfg *Func,
-         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-         Operand *Src1) {
-    return new (Func->allocate<InstX86OrRMW>())
-        InstX86OrRMW(Func, DestSrc0, Src1);
-  }
-
-private:
-  InstX86OrRMW(Cfg *Func,
-               typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-               Operand *Src1)
-      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::OrRMW>(
-            Func, DestSrc0, Src1) {}
-};
-
-template <class Machine>
-class InstX86Por
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Por, false,
-                                 InstX86Base<Machine>::SseSuffix::None> {
-public:
-  static InstX86Por *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Por>()) InstX86Por(Func, Dest, Source);
-  }
-
-private:
-  InstX86Por(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Por, false,
-                            InstX86Base<Machine>::SseSuffix::None>(Func, Dest,
-                                                                   Source) {}
-};
-
-template <class Machine>
-class InstX86Xor
-    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Xor> {
-public:
-  static InstX86Xor *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Xor>()) InstX86Xor(Func, Dest, Source);
-  }
-
-private:
-  InstX86Xor(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Xor>(Func, Dest,
-                                                                Source) {}
-};
-
-template <class Machine>
-class InstX86Xorps
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Xorps, true,
-                                 InstX86Base<Machine>::SseSuffix::Packed> {
-public:
-  static InstX86Xorps *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Xorps>())
-        InstX86Xorps(Func, Dest, Source);
-  }
-
-private:
-  InstX86Xorps(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Xorps, true,
-                            InstX86Base<Machine>::SseSuffix::Packed>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86XorRMW
-    : public InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::XorRMW> {
-public:
-  static InstX86XorRMW *
-  create(Cfg *Func,
-         typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-         Operand *Src1) {
-    return new (Func->allocate<InstX86XorRMW>())
-        InstX86XorRMW(Func, DestSrc0, Src1);
-  }
-
-private:
-  InstX86XorRMW(Cfg *Func,
-                typename InstX86Base<Machine>::Traits::X86OperandMem *DestSrc0,
-                Operand *Src1)
-      : InstX86BaseBinopRMW<Machine, InstX86Base<Machine>::XorRMW>(
-            Func, DestSrc0, Src1) {}
-};
-
-template <class Machine>
-class InstX86Pxor
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pxor, false,
-                                 InstX86Base<Machine>::SseSuffix::None> {
-public:
-  static InstX86Pxor *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Pxor>()) InstX86Pxor(Func, Dest, Source);
-  }
-
-private:
-  InstX86Pxor(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pxor, false,
-                            InstX86Base<Machine>::SseSuffix::None>(Func, Dest,
-                                                                   Source) {}
-};
-
-template <class Machine>
-class InstX86Imul
-    : public InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Imul> {
-public:
-  static InstX86Imul *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Imul>()) InstX86Imul(Func, Dest, Source);
-  }
-
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-
-private:
-  InstX86Imul(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPR<Machine, InstX86Base<Machine>::Imul>(Func, Dest,
-                                                                 Source) {}
-};
-
-template <class Machine>
-class InstX86ImulImm
-    : public InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::ImulImm> {
-public:
-  static InstX86ImulImm *create(Cfg *Func, Variable *Dest, Operand *Source0,
-                                Operand *Source1) {
-    return new (Func->allocate<InstX86ImulImm>())
-        InstX86ImulImm(Func, Dest, Source0, Source1);
-  }
-
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-
-private:
-  InstX86ImulImm(Cfg *Func, Variable *Dest, Operand *Source0, Operand *Source1)
-      : InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::ImulImm>(
-            Func, Dest, Source0, Source1) {}
-};
-
-template <class Machine>
-class InstX86Mulps
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Mulps, true,
-                                 InstX86Base<Machine>::SseSuffix::Packed> {
-public:
-  static InstX86Mulps *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Mulps>())
-        InstX86Mulps(Func, Dest, Source);
-  }
-
-private:
-  InstX86Mulps(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Mulps, true,
-                            InstX86Base<Machine>::SseSuffix::Packed>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Mulss
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Mulss, false,
-                                 InstX86Base<Machine>::SseSuffix::Scalar> {
-public:
-  static InstX86Mulss *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Mulss>())
-        InstX86Mulss(Func, Dest, Source);
-  }
-
-private:
-  InstX86Mulss(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Mulss, false,
-                            InstX86Base<Machine>::SseSuffix::Scalar>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Pmull
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pmull, true,
-                                 InstX86Base<Machine>::SseSuffix::Integral> {
-public:
-  static InstX86Pmull *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    bool TypesAreValid =
-        Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
-    auto *Target = InstX86Base<Machine>::getTarget(Func);
-    bool InstructionSetIsValid =
-        Dest->getType() == IceType_v8i16 ||
-        Target->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1;
-    (void)TypesAreValid;
-    (void)InstructionSetIsValid;
-    assert(TypesAreValid);
-    assert(InstructionSetIsValid);
-    return new (Func->allocate<InstX86Pmull>())
-        InstX86Pmull(Func, Dest, Source);
-  }
-
-private:
-  InstX86Pmull(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pmull, true,
-                            InstX86Base<Machine>::SseSuffix::Integral>(
-            Func, Dest, Source) {}
-};
-
-template <class Machine>
-class InstX86Pmuludq
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pmuludq, false,
-                                 InstX86Base<Machine>::SseSuffix::None> {
-public:
-  static InstX86Pmuludq *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    assert(Dest->getType() == IceType_v4i32 &&
-           Source->getType() == IceType_v4i32);
-    return new (Func->allocate<InstX86Pmuludq>())
-        InstX86Pmuludq(Func, Dest, Source);
-  }
-
-private:
-  InstX86Pmuludq(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pmuludq, false,
-                            InstX86Base<Machine>::SseSuffix::None>(Func, Dest,
-                                                                   Source) {}
-};
-
-template <class Machine>
-class InstX86Divps
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Divps, true,
-                                 InstX86Base<Machine>::SseSuffix::Packed> {
-public:
-  static InstX86Divps *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Divps>())
-        InstX86Divps(Func, Dest, Source);
-  }
-
-private:
-  InstX86Divps(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Divps, true,
-                            InstX86Base<Machine>::SseSuffix::Packed>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Divss
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Divss, false,
-                                 InstX86Base<Machine>::SseSuffix::Scalar> {
-public:
-  static InstX86Divss *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Divss>())
-        InstX86Divss(Func, Dest, Source);
-  }
-
-private:
-  InstX86Divss(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Divss, false,
-                            InstX86Base<Machine>::SseSuffix::Scalar>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Rol
-    : public InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Rol> {
-public:
-  static InstX86Rol *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Rol>()) InstX86Rol(Func, Dest, Source);
-  }
-
-private:
-  InstX86Rol(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Rol>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Shl
-    : public InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Shl> {
-public:
-  static InstX86Shl *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Shl>()) InstX86Shl(Func, Dest, Source);
-  }
-
-private:
-  InstX86Shl(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Shl>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Psll
-    : public InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psll> {
-public:
-  static InstX86Psll *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    assert(Dest->getType() == IceType_v8i16 ||
-           Dest->getType() == IceType_v8i1 ||
-           Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v4i1);
-    return new (Func->allocate<InstX86Psll>()) InstX86Psll(Func, Dest, Source);
-  }
-
-private:
-  InstX86Psll(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psll>(
-            Func, Dest, Source) {}
-};
-
-template <class Machine>
-class InstX86Psrl
-    : public InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psrl,
-                                      true> {
-public:
-  static InstX86Psrl *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Psrl>()) InstX86Psrl(Func, Dest, Source);
-  }
-
-private:
-  InstX86Psrl(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psrl, true>(
-            Func, Dest, Source) {}
-};
-
-template <class Machine>
-class InstX86Shr
-    : public InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Shr> {
-public:
-  static InstX86Shr *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Shr>()) InstX86Shr(Func, Dest, Source);
-  }
-
-private:
-  InstX86Shr(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Shr>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Sar
-    : public InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Sar> {
-public:
-  static InstX86Sar *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Sar>()) InstX86Sar(Func, Dest, Source);
-  }
-
-private:
-  InstX86Sar(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopGPRShift<Machine, InstX86Base<Machine>::Sar>(Func, Dest,
-                                                                     Source) {}
-};
-
-template <class Machine>
-class InstX86Psra
-    : public InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psra> {
-public:
-  static InstX86Psra *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    assert(Dest->getType() == IceType_v8i16 ||
-           Dest->getType() == IceType_v8i1 ||
-           Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v4i1);
-    return new (Func->allocate<InstX86Psra>()) InstX86Psra(Func, Dest, Source);
-  }
-
-private:
-  InstX86Psra(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmmShift<Machine, InstX86Base<Machine>::Psra>(
-            Func, Dest, Source) {}
-};
-
-template <class Machine>
-class InstX86Pcmpeq
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pcmpeq, true,
-                                 InstX86Base<Machine>::SseSuffix::Integral> {
-public:
-  static InstX86Pcmpeq *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Pcmpeq>())
-        InstX86Pcmpeq(Func, Dest, Source);
-  }
-
-private:
-  InstX86Pcmpeq(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pcmpeq, true,
-                            InstX86Base<Machine>::SseSuffix::Integral>(
-            Func, Dest, Source) {}
-};
-
-template <class Machine>
-class InstX86Pcmpgt
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pcmpgt, true,
-                                 InstX86Base<Machine>::SseSuffix::Integral> {
-public:
-  static InstX86Pcmpgt *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86Pcmpgt>())
-        InstX86Pcmpgt(Func, Dest, Source);
-  }
-
-private:
-  InstX86Pcmpgt(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Pcmpgt, true,
-                            InstX86Base<Machine>::SseSuffix::Integral>(
-            Func, Dest, Source) {}
-};
-
-/// movss is only a binary operation when the source and dest operands are both
-/// registers (the high bits of dest are left untouched). In other cases, it
-/// behaves like a copy (mov-like) operation (and the high bits of dest are
-/// cleared). InstX86Movss will assert that both its source and dest operands
-/// are registers, so the lowering code should use _mov instead of _movss in
-/// cases where a copy operation is intended.
-template <class Machine>
-class InstX86MovssRegs
-    : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::MovssRegs,
-                                 false, InstX86Base<Machine>::SseSuffix::None> {
-public:
-  static InstX86MovssRegs *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX86MovssRegs>())
-        InstX86MovssRegs(Func, Dest, Source);
-  }
-
-  void emitIAS(const Cfg *Func) const override;
-
-private:
-  InstX86MovssRegs(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::MovssRegs, false,
-                            InstX86Base<Machine>::SseSuffix::None>(Func, Dest,
-                                                                   Source) {}
-};
-
-template <class Machine>
-class InstX86Idiv
-    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Idiv> {
-public:
-  static InstX86Idiv *create(Cfg *Func, Variable *Dest, Operand *Source1,
-                             Operand *Source2) {
-    return new (Func->allocate<InstX86Idiv>())
-        InstX86Idiv(Func, Dest, Source1, Source2);
-  }
-
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-
-private:
-  InstX86Idiv(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
-      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Idiv>(
-            Func, Dest, Source1, Source2) {}
-};
-
-template <class Machine>
-class InstX86Div
-    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Div> {
-public:
-  static InstX86Div *create(Cfg *Func, Variable *Dest, Operand *Source1,
-                            Operand *Source2) {
-    return new (Func->allocate<InstX86Div>())
-        InstX86Div(Func, Dest, Source1, Source2);
-  }
-
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-
-private:
-  InstX86Div(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
-      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Div>(
-            Func, Dest, Source1, Source2) {}
-};
-
-template <class Machine>
-class InstX86Insertps
-    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Insertps> {
-public:
-  static InstX86Insertps *create(Cfg *Func, Variable *Dest, Operand *Source1,
-                                 Operand *Source2) {
-    return new (Func->allocate<InstX86Insertps>())
-        InstX86Insertps(Func, Dest, Source1, Source2);
-  }
-
-  void emitIAS(const Cfg *Func) const override;
-
-private:
-  InstX86Insertps(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
-      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Insertps>(
-            Func, Dest, Source1, Source2) {}
-};
-
-template <class Machine>
-class InstX86Pinsr
-    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Pinsr> {
-public:
-  static InstX86Pinsr *create(Cfg *Func, Variable *Dest, Operand *Source1,
-                              Operand *Source2) {
-    // pinsrb and pinsrd are SSE4.1 instructions.
-    assert(Dest->getType() == IceType_v8i16 ||
-           Dest->getType() == IceType_v8i1 ||
-           InstX86Base<Machine>::getTarget(Func)->getInstructionSet() >=
-               InstX86Base<Machine>::Traits::SSE4_1);
-    return new (Func->allocate<InstX86Pinsr>())
-        InstX86Pinsr(Func, Dest, Source1, Source2);
-  }
-
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-
-private:
-  InstX86Pinsr(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
-      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Pinsr>(
-            Func, Dest, Source1, Source2) {}
-};
-
-template <class Machine>
-class InstX86Shufps
-    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Shufps> {
-public:
-  static InstX86Shufps *create(Cfg *Func, Variable *Dest, Operand *Source1,
+  class InstX86Idiv : public InstX86BaseTernop<InstX86Base::Idiv> {
+  public:
+    static InstX86Idiv *create(Cfg *Func, Variable *Dest, Operand *Source1,
                                Operand *Source2) {
-    return new (Func->allocate<InstX86Shufps>())
-        InstX86Shufps(Func, Dest, Source1, Source2);
-  }
+      return new (Func->allocate<InstX86Idiv>())
+          InstX86Idiv(Func, Dest, Source1, Source2);
+    }
 
-  void emitIAS(const Cfg *Func) const override;
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
 
-private:
-  InstX86Shufps(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
-      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Shufps>(
-            Func, Dest, Source1, Source2) {}
-};
+  private:
+    InstX86Idiv(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
+        : InstX86BaseTernop<InstX86Base::Idiv>(Func, Dest, Source1, Source2) {}
+  };
 
-template <class Machine>
-class InstX86Blendvps
-    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Blendvps> {
-public:
-  static InstX86Blendvps *create(Cfg *Func, Variable *Dest, Operand *Source1,
+  class InstX86Div : public InstX86BaseTernop<InstX86Base::Div> {
+  public:
+    static InstX86Div *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                              Operand *Source2) {
+      return new (Func->allocate<InstX86Div>())
+          InstX86Div(Func, Dest, Source1, Source2);
+    }
+
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Div(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
+        : InstX86BaseTernop<InstX86Base::Div>(Func, Dest, Source1, Source2) {}
+  };
+
+  class InstX86Insertps : public InstX86BaseTernop<InstX86Base::Insertps> {
+  public:
+    static InstX86Insertps *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                                   Operand *Source2) {
+      return new (Func->allocate<InstX86Insertps>())
+          InstX86Insertps(Func, Dest, Source1, Source2);
+    }
+
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Insertps(Cfg *Func, Variable *Dest, Operand *Source1,
+                    Operand *Source2)
+        : InstX86BaseTernop<InstX86Base::Insertps>(Func, Dest, Source1,
+                                                   Source2) {}
+  };
+
+  class InstX86Pinsr : public InstX86BaseTernop<InstX86Base::Pinsr> {
+  public:
+    static InstX86Pinsr *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                                Operand *Source2) {
+      // pinsrb and pinsrd are SSE4.1 instructions.
+      assert(
+          Dest->getType() == IceType_v8i16 || Dest->getType() == IceType_v8i1 ||
+          InstX86Base::getTarget(Func)->getInstructionSet() >= Traits::SSE4_1);
+      return new (Func->allocate<InstX86Pinsr>())
+          InstX86Pinsr(Func, Dest, Source1, Source2);
+    }
+
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+
+  private:
+    InstX86Pinsr(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
+        : InstX86BaseTernop<InstX86Base::Pinsr>(Func, Dest, Source1, Source2) {}
+  };
+
+  class InstX86Shufps : public InstX86BaseTernop<InstX86Base::Shufps> {
+  public:
+    static InstX86Shufps *create(Cfg *Func, Variable *Dest, Operand *Source1,
                                  Operand *Source2) {
-    assert(InstX86Base<Machine>::getTarget(Func)->getInstructionSet() >=
-           InstX86Base<Machine>::Traits::SSE4_1);
-    return new (Func->allocate<InstX86Blendvps>())
-        InstX86Blendvps(Func, Dest, Source1, Source2);
-  }
+      return new (Func->allocate<InstX86Shufps>())
+          InstX86Shufps(Func, Dest, Source1, Source2);
+    }
 
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Fund) const override;
+    void emitIAS(const Cfg *Func) const override;
 
-private:
-  InstX86Blendvps(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
-      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Blendvps>(
-            Func, Dest, Source1, Source2) {}
-};
+  private:
+    InstX86Shufps(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
+        : InstX86BaseTernop<InstX86Base::Shufps>(Func, Dest, Source1, Source2) {
+    }
+  };
 
-template <class Machine>
-class InstX86Pblendvb
-    : public InstX86BaseTernop<Machine, InstX86Base<Machine>::Pblendvb> {
-public:
-  static InstX86Pblendvb *create(Cfg *Func, Variable *Dest, Operand *Source1,
-                                 Operand *Source2) {
-    assert(InstX86Base<Machine>::getTarget(Func)->getInstructionSet() >=
-           InstX86Base<Machine>::Traits::SSE4_1);
-    return new (Func->allocate<InstX86Pblendvb>())
-        InstX86Pblendvb(Func, Dest, Source1, Source2);
-  }
+  class InstX86Blendvps : public InstX86BaseTernop<InstX86Base::Blendvps> {
+  public:
+    static InstX86Blendvps *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                                   Operand *Source2) {
+      assert(InstX86Base::getTarget(Func)->getInstructionSet() >=
+             Traits::SSE4_1);
+      return new (Func->allocate<InstX86Blendvps>())
+          InstX86Blendvps(Func, Dest, Source1, Source2);
+    }
 
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Fund) const override;
 
-private:
-  InstX86Pblendvb(Cfg *Func, Variable *Dest, Operand *Source1, Operand *Source2)
-      : InstX86BaseTernop<Machine, InstX86Base<Machine>::Pblendvb>(
-            Func, Dest, Source1, Source2) {}
-};
+  private:
+    InstX86Blendvps(Cfg *Func, Variable *Dest, Operand *Source1,
+                    Operand *Source2)
+        : InstX86BaseTernop<InstX86Base::Blendvps>(Func, Dest, Source1,
+                                                   Source2) {}
+  };
 
-template <class Machine>
-class InstX86Pextr
-    : public InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::Pextr> {
-public:
-  static InstX86Pextr *create(Cfg *Func, Variable *Dest, Operand *Source0,
-                              Operand *Source1) {
-    assert(Source0->getType() == IceType_v8i16 ||
-           Source0->getType() == IceType_v8i1 ||
-           InstX86Base<Machine>::getTarget(Func)->getInstructionSet() >=
-               InstX86Base<Machine>::Traits::SSE4_1);
-    return new (Func->allocate<InstX86Pextr>())
-        InstX86Pextr(Func, Dest, Source0, Source1);
-  }
+  class InstX86Pblendvb : public InstX86BaseTernop<InstX86Base::Pblendvb> {
+  public:
+    static InstX86Pblendvb *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                                   Operand *Source2) {
+      assert(InstX86Base::getTarget(Func)->getInstructionSet() >=
+             Traits::SSE4_1);
+      return new (Func->allocate<InstX86Pblendvb>())
+          InstX86Pblendvb(Func, Dest, Source1, Source2);
+    }
 
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
 
-private:
-  InstX86Pextr(Cfg *Func, Variable *Dest, Operand *Source0, Operand *Source1)
-      : InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::Pextr>(
-            Func, Dest, Source0, Source1) {}
-};
+  private:
+    InstX86Pblendvb(Cfg *Func, Variable *Dest, Operand *Source1,
+                    Operand *Source2)
+        : InstX86BaseTernop<InstX86Base::Pblendvb>(Func, Dest, Source1,
+                                                   Source2) {}
+  };
 
-template <class Machine>
-class InstX86Pshufd
-    : public InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::Pshufd> {
-public:
-  static InstX86Pshufd *create(Cfg *Func, Variable *Dest, Operand *Source0,
-                               Operand *Source1) {
-    return new (Func->allocate<InstX86Pshufd>())
-        InstX86Pshufd(Func, Dest, Source0, Source1);
-  }
+  class InstX86Pextr : public InstX86BaseThreeAddressop<InstX86Base::Pextr> {
+  public:
+    static InstX86Pextr *create(Cfg *Func, Variable *Dest, Operand *Source0,
+                                Operand *Source1) {
+      assert(Source0->getType() == IceType_v8i16 ||
+             Source0->getType() == IceType_v8i1 ||
+             InstX86Base::getTarget(Func)->getInstructionSet() >=
+                 Traits::SSE4_1);
+      return new (Func->allocate<InstX86Pextr>())
+          InstX86Pextr(Func, Dest, Source0, Source1);
+    }
 
-  void emitIAS(const Cfg *Func) const override;
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
 
-private:
-  InstX86Pshufd(Cfg *Func, Variable *Dest, Operand *Source0, Operand *Source1)
-      : InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::Pshufd>(
-            Func, Dest, Source0, Source1) {}
-};
+  private:
+    InstX86Pextr(Cfg *Func, Variable *Dest, Operand *Source0, Operand *Source1)
+        : InstX86BaseThreeAddressop<InstX86Base::Pextr>(Func, Dest, Source0,
+                                                        Source1) {}
+  };
 
-/// Base class for a lockable x86-32 instruction (emits a locked prefix).
-template <class Machine>
-class InstX86BaseLockable : public InstX86Base<Machine> {
-  InstX86BaseLockable() = delete;
-  InstX86BaseLockable(const InstX86BaseLockable &) = delete;
-  InstX86BaseLockable &operator=(const InstX86BaseLockable &) = delete;
+  class InstX86Pshufd : public InstX86BaseThreeAddressop<InstX86Base::Pshufd> {
+  public:
+    static InstX86Pshufd *create(Cfg *Func, Variable *Dest, Operand *Source0,
+                                 Operand *Source1) {
+      return new (Func->allocate<InstX86Pshufd>())
+          InstX86Pshufd(Func, Dest, Source0, Source1);
+    }
 
-protected:
-  bool Locked;
+    void emitIAS(const Cfg *Func) const override;
 
-  InstX86BaseLockable(Cfg *Func,
-                      typename InstX86Base<Machine>::InstKindX86 Kind,
-                      SizeT Maxsrcs, Variable *Dest, bool Locked)
-      : InstX86Base<Machine>(Func, Kind, Maxsrcs, Dest), Locked(Locked) {
-    // Assume that such instructions are used for Atomics and be careful with
-    // optimizations.
-    this->HasSideEffects = Locked;
-  }
-};
+  private:
+    InstX86Pshufd(Cfg *Func, Variable *Dest, Operand *Source0, Operand *Source1)
+        : InstX86BaseThreeAddressop<InstX86Base::Pshufd>(Func, Dest, Source0,
+                                                         Source1) {}
+  };
 
-/// Mul instruction - unsigned multiply.
-template <class Machine> class InstX86Mul final : public InstX86Base<Machine> {
-  InstX86Mul() = delete;
-  InstX86Mul(const InstX86Mul &) = delete;
-  InstX86Mul &operator=(const InstX86Mul &) = delete;
+  /// Base class for a lockable x86-32 instruction (emits a locked prefix).
+  class InstX86BaseLockable : public InstX86Base {
+    InstX86BaseLockable() = delete;
+    InstX86BaseLockable(const InstX86BaseLockable &) = delete;
+    InstX86BaseLockable &operator=(const InstX86BaseLockable &) = delete;
 
-public:
-  static InstX86Mul *create(Cfg *Func, Variable *Dest, Variable *Source1,
-                            Operand *Source2) {
-    return new (Func->allocate<InstX86Mul>())
-        InstX86Mul(Func, Dest, Source1, Source2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Mul);
-  }
+  protected:
+    bool Locked;
 
-private:
-  InstX86Mul(Cfg *Func, Variable *Dest, Variable *Source1, Operand *Source2);
-};
+    InstX86BaseLockable(Cfg *Func, typename InstX86Base::InstKindX86 Kind,
+                        SizeT Maxsrcs, Variable *Dest, bool Locked)
+        : InstX86Base(Func, Kind, Maxsrcs, Dest), Locked(Locked) {
+      // Assume that such instructions are used for Atomics and be careful with
+      // optimizations.
+      this->HasSideEffects = Locked;
+    }
+  };
 
-/// Shld instruction - shift across a pair of operands.
-template <class Machine> class InstX86Shld final : public InstX86Base<Machine> {
-  InstX86Shld() = delete;
-  InstX86Shld(const InstX86Shld &) = delete;
-  InstX86Shld &operator=(const InstX86Shld &) = delete;
+  /// Mul instruction - unsigned multiply.
+  class InstX86Mul final : public InstX86Base {
+    InstX86Mul() = delete;
+    InstX86Mul(const InstX86Mul &) = delete;
+    InstX86Mul &operator=(const InstX86Mul &) = delete;
 
-public:
-  static InstX86Shld *create(Cfg *Func, Variable *Dest, Variable *Source1,
-                             Operand *Source2) {
-    return new (Func->allocate<InstX86Shld>())
-        InstX86Shld(Func, Dest, Source1, Source2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Shld);
-  }
+  public:
+    static InstX86Mul *create(Cfg *Func, Variable *Dest, Variable *Source1,
+                              Operand *Source2) {
+      return new (Func->allocate<InstX86Mul>())
+          InstX86Mul(Func, Dest, Source1, Source2);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Mul);
+    }
 
-private:
-  InstX86Shld(Cfg *Func, Variable *Dest, Variable *Source1, Operand *Source2);
-};
+  private:
+    InstX86Mul(Cfg *Func, Variable *Dest, Variable *Source1, Operand *Source2);
+  };
 
-/// Shrd instruction - shift across a pair of operands.
-template <class Machine> class InstX86Shrd final : public InstX86Base<Machine> {
-  InstX86Shrd() = delete;
-  InstX86Shrd(const InstX86Shrd &) = delete;
-  InstX86Shrd &operator=(const InstX86Shrd &) = delete;
+  /// Shld instruction - shift across a pair of operands.
+  class InstX86Shld final : public InstX86Base {
+    InstX86Shld() = delete;
+    InstX86Shld(const InstX86Shld &) = delete;
+    InstX86Shld &operator=(const InstX86Shld &) = delete;
 
-public:
-  static InstX86Shrd *create(Cfg *Func, Variable *Dest, Variable *Source1,
-                             Operand *Source2) {
-    return new (Func->allocate<InstX86Shrd>())
-        InstX86Shrd(Func, Dest, Source1, Source2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Shrd);
-  }
+  public:
+    static InstX86Shld *create(Cfg *Func, Variable *Dest, Variable *Source1,
+                               Operand *Source2) {
+      return new (Func->allocate<InstX86Shld>())
+          InstX86Shld(Func, Dest, Source1, Source2);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Shld);
+    }
 
-private:
-  InstX86Shrd(Cfg *Func, Variable *Dest, Variable *Source1, Operand *Source2);
-};
+  private:
+    InstX86Shld(Cfg *Func, Variable *Dest, Variable *Source1, Operand *Source2);
+  };
 
-/// Conditional move instruction.
-template <class Machine> class InstX86Cmov final : public InstX86Base<Machine> {
-  InstX86Cmov() = delete;
-  InstX86Cmov(const InstX86Cmov &) = delete;
-  InstX86Cmov &operator=(const InstX86Cmov &) = delete;
+  /// Shrd instruction - shift across a pair of operands.
+  class InstX86Shrd final : public InstX86Base {
+    InstX86Shrd() = delete;
+    InstX86Shrd(const InstX86Shrd &) = delete;
+    InstX86Shrd &operator=(const InstX86Shrd &) = delete;
 
-public:
-  static InstX86Cmov *
-  create(Cfg *Func, Variable *Dest, Operand *Source,
-         typename InstX86Base<Machine>::Traits::Cond::BrCond Cond) {
-    return new (Func->allocate<InstX86Cmov>())
-        InstX86Cmov(Func, Dest, Source, Cond);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Cmov);
-  }
+  public:
+    static InstX86Shrd *create(Cfg *Func, Variable *Dest, Variable *Source1,
+                               Operand *Source2) {
+      return new (Func->allocate<InstX86Shrd>())
+          InstX86Shrd(Func, Dest, Source1, Source2);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Shrd);
+    }
 
-private:
-  InstX86Cmov(Cfg *Func, Variable *Dest, Operand *Source,
-              typename InstX86Base<Machine>::Traits::Cond::BrCond Cond);
+  private:
+    InstX86Shrd(Cfg *Func, Variable *Dest, Variable *Source1, Operand *Source2);
+  };
 
-  typename InstX86Base<Machine>::Traits::Cond::BrCond Condition;
-};
+  /// Conditional move instruction.
+  class InstX86Cmov final : public InstX86Base {
+    InstX86Cmov() = delete;
+    InstX86Cmov(const InstX86Cmov &) = delete;
+    InstX86Cmov &operator=(const InstX86Cmov &) = delete;
 
-/// Cmpps instruction - compare packed singled-precision floating point values
-template <class Machine>
-class InstX86Cmpps final : public InstX86Base<Machine> {
-  InstX86Cmpps() = delete;
-  InstX86Cmpps(const InstX86Cmpps &) = delete;
-  InstX86Cmpps &operator=(const InstX86Cmpps &) = delete;
+  public:
+    static InstX86Cmov *create(Cfg *Func, Variable *Dest, Operand *Source,
+                               BrCond Cond) {
+      return new (Func->allocate<InstX86Cmov>())
+          InstX86Cmov(Func, Dest, Source, Cond);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Cmov);
+    }
 
-public:
-  static InstX86Cmpps *
-  create(Cfg *Func, Variable *Dest, Operand *Source,
-         typename InstX86Base<Machine>::Traits::Cond::CmppsCond Condition) {
-    return new (Func->allocate<InstX86Cmpps>())
-        InstX86Cmpps(Func, Dest, Source, Condition);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Cmpps);
-  }
+  private:
+    InstX86Cmov(Cfg *Func, Variable *Dest, Operand *Source, BrCond Cond);
 
-private:
-  InstX86Cmpps(Cfg *Func, Variable *Dest, Operand *Source,
-               typename InstX86Base<Machine>::Traits::Cond::CmppsCond Cond);
+    BrCond Condition;
+  };
 
-  typename InstX86Base<Machine>::Traits::Cond::CmppsCond Condition;
-};
+  /// Cmpps instruction - compare packed singled-precision floating point values
+  class InstX86Cmpps final : public InstX86Base {
+    InstX86Cmpps() = delete;
+    InstX86Cmpps(const InstX86Cmpps &) = delete;
+    InstX86Cmpps &operator=(const InstX86Cmpps &) = delete;
 
-/// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
-/// equals eax. If so, the ZF is set and <desired> is stored in <dest>. If not,
-/// ZF is cleared and <dest> is copied to eax (or subregister). <dest> can be a
-/// register or memory, while <desired> must be a register. It is the user's
-/// responsibility to mark eax with a FakeDef.
-template <class Machine>
-class InstX86Cmpxchg final : public InstX86BaseLockable<Machine> {
-  InstX86Cmpxchg() = delete;
-  InstX86Cmpxchg(const InstX86Cmpxchg &) = delete;
-  InstX86Cmpxchg &operator=(const InstX86Cmpxchg &) = delete;
+  public:
+    static InstX86Cmpps *create(Cfg *Func, Variable *Dest, Operand *Source,
+                                CmppsCond Condition) {
+      return new (Func->allocate<InstX86Cmpps>())
+          InstX86Cmpps(Func, Dest, Source, Condition);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Cmpps);
+    }
 
-public:
-  static InstX86Cmpxchg *create(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
-                                Variable *Desired, bool Locked) {
-    return new (Func->allocate<InstX86Cmpxchg>())
-        InstX86Cmpxchg(Func, DestOrAddr, Eax, Desired, Locked);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Cmpxchg);
-  }
+  private:
+    InstX86Cmpps(Cfg *Func, Variable *Dest, Operand *Source, CmppsCond Cond);
 
-private:
-  InstX86Cmpxchg(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
-                 Variable *Desired, bool Locked);
-};
+    CmppsCond Condition;
+  };
 
-/// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64> equals
-/// edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>. If not, ZF is
-/// cleared and <m64> is copied to edx:eax. The caller is responsible for
-/// inserting FakeDefs to mark edx and eax as modified. <m64> must be a memory
-/// operand.
-template <class Machine>
-class InstX86Cmpxchg8b final : public InstX86BaseLockable<Machine> {
-  InstX86Cmpxchg8b() = delete;
-  InstX86Cmpxchg8b(const InstX86Cmpxchg8b &) = delete;
-  InstX86Cmpxchg8b &operator=(const InstX86Cmpxchg8b &) = delete;
+  /// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
+  /// equals eax. If so, the ZF is set and <desired> is stored in <dest>. If
+  /// not, ZF is cleared and <dest> is copied to eax (or subregister). <dest>
+  /// can be a register or memory, while <desired> must be a register. It is
+  /// the user's responsibility to mark eax with a FakeDef.
+  class InstX86Cmpxchg final : public InstX86BaseLockable {
+    InstX86Cmpxchg() = delete;
+    InstX86Cmpxchg(const InstX86Cmpxchg &) = delete;
+    InstX86Cmpxchg &operator=(const InstX86Cmpxchg &) = delete;
 
-public:
-  static InstX86Cmpxchg8b *
-  create(Cfg *Func, typename InstX86Base<Machine>::Traits::X86OperandMem *Dest,
-         Variable *Edx, Variable *Eax, Variable *Ecx, Variable *Ebx,
-         bool Locked) {
-    return new (Func->allocate<InstX86Cmpxchg8b>())
-        InstX86Cmpxchg8b(Func, Dest, Edx, Eax, Ecx, Ebx, Locked);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst,
-                                           InstX86Base<Machine>::Cmpxchg8b);
-  }
+  public:
+    static InstX86Cmpxchg *create(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
+                                  Variable *Desired, bool Locked) {
+      return new (Func->allocate<InstX86Cmpxchg>())
+          InstX86Cmpxchg(Func, DestOrAddr, Eax, Desired, Locked);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Cmpxchg);
+    }
 
-private:
-  InstX86Cmpxchg8b(Cfg *Func,
-                   typename InstX86Base<Machine>::Traits::X86OperandMem *Dest,
-                   Variable *Edx, Variable *Eax, Variable *Ecx, Variable *Ebx,
-                   bool Locked);
-};
+  private:
+    InstX86Cmpxchg(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
+                   Variable *Desired, bool Locked);
+  };
 
-/// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i} as
-/// appropriate.  s=float, d=double, i=int. X and Y are determined from dest/src
-/// types. Sign and zero extension on the integer operand needs to be done
-/// separately.
-template <class Machine> class InstX86Cvt final : public InstX86Base<Machine> {
-  InstX86Cvt() = delete;
-  InstX86Cvt(const InstX86Cvt &) = delete;
-  InstX86Cvt &operator=(const InstX86Cvt &) = delete;
+  /// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64> equals
+  /// edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>. If not, ZF
+  /// is cleared and <m64> is copied to edx:eax. The caller is responsible for
+  /// inserting FakeDefs to mark edx and eax as modified. <m64> must be a memory
+  /// operand.
+  class InstX86Cmpxchg8b final : public InstX86BaseLockable {
+    InstX86Cmpxchg8b() = delete;
+    InstX86Cmpxchg8b(const InstX86Cmpxchg8b &) = delete;
+    InstX86Cmpxchg8b &operator=(const InstX86Cmpxchg8b &) = delete;
 
-public:
-  enum CvtVariant { Si2ss, Tss2si, Float2float, Dq2ps, Tps2dq };
-  static InstX86Cvt *create(Cfg *Func, Variable *Dest, Operand *Source,
-                            CvtVariant Variant) {
-    return new (Func->allocate<InstX86Cvt>())
-        InstX86Cvt(Func, Dest, Source, Variant);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Cvt);
-  }
-  bool isTruncating() const { return Variant == Tss2si || Variant == Tps2dq; }
+  public:
+    static InstX86Cmpxchg8b *create(Cfg *Func, X86OperandMem *Dest,
+                                    Variable *Edx, Variable *Eax, Variable *Ecx,
+                                    Variable *Ebx, bool Locked) {
+      return new (Func->allocate<InstX86Cmpxchg8b>())
+          InstX86Cmpxchg8b(Func, Dest, Edx, Eax, Ecx, Ebx, Locked);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Cmpxchg8b);
+    }
 
-private:
-  CvtVariant Variant;
-  InstX86Cvt(Cfg *Func, Variable *Dest, Operand *Source, CvtVariant Variant);
-};
+  private:
+    InstX86Cmpxchg8b(Cfg *Func, X86OperandMem *Dest, Variable *Edx,
+                     Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked);
+  };
 
-/// cmp - Integer compare instruction.
-template <class Machine> class InstX86Icmp final : public InstX86Base<Machine> {
-  InstX86Icmp() = delete;
-  InstX86Icmp(const InstX86Icmp &) = delete;
-  InstX86Icmp &operator=(const InstX86Icmp &) = delete;
+  /// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i} as
+  /// appropriate.  s=float, d=double, i=int. X and Y are determined from
+  /// dest/src types. Sign and zero extension on the integer operand needs to be
+  /// done separately.
+  class InstX86Cvt final : public InstX86Base {
+    InstX86Cvt() = delete;
+    InstX86Cvt(const InstX86Cvt &) = delete;
+    InstX86Cvt &operator=(const InstX86Cvt &) = delete;
 
-public:
-  static InstX86Icmp *create(Cfg *Func, Operand *Src1, Operand *Src2) {
-    return new (Func->allocate<InstX86Icmp>()) InstX86Icmp(Func, Src1, Src2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Icmp);
-  }
+  public:
+    enum CvtVariant { Si2ss, Tss2si, Float2float, Dq2ps, Tps2dq };
+    static InstX86Cvt *create(Cfg *Func, Variable *Dest, Operand *Source,
+                              CvtVariant Variant) {
+      return new (Func->allocate<InstX86Cvt>())
+          InstX86Cvt(Func, Dest, Source, Variant);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Cvt);
+    }
+    bool isTruncating() const { return Variant == Tss2si || Variant == Tps2dq; }
 
-private:
-  InstX86Icmp(Cfg *Func, Operand *Src1, Operand *Src2);
-};
+  private:
+    CvtVariant Variant;
+    InstX86Cvt(Cfg *Func, Variable *Dest, Operand *Source, CvtVariant Variant);
+  };
 
-/// ucomiss/ucomisd - floating-point compare instruction.
-template <class Machine>
-class InstX86Ucomiss final : public InstX86Base<Machine> {
-  InstX86Ucomiss() = delete;
-  InstX86Ucomiss(const InstX86Ucomiss &) = delete;
-  InstX86Ucomiss &operator=(const InstX86Ucomiss &) = delete;
+  /// cmp - Integer compare instruction.
+  class InstX86Icmp final : public InstX86Base {
+    InstX86Icmp() = delete;
+    InstX86Icmp(const InstX86Icmp &) = delete;
+    InstX86Icmp &operator=(const InstX86Icmp &) = delete;
 
-public:
-  static InstX86Ucomiss *create(Cfg *Func, Operand *Src1, Operand *Src2) {
-    return new (Func->allocate<InstX86Ucomiss>())
-        InstX86Ucomiss(Func, Src1, Src2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Ucomiss);
-  }
+  public:
+    static InstX86Icmp *create(Cfg *Func, Operand *Src1, Operand *Src2) {
+      return new (Func->allocate<InstX86Icmp>()) InstX86Icmp(Func, Src1, Src2);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Icmp);
+    }
 
-private:
-  InstX86Ucomiss(Cfg *Func, Operand *Src1, Operand *Src2);
-};
+  private:
+    InstX86Icmp(Cfg *Func, Operand *Src1, Operand *Src2);
+  };
 
-/// UD2 instruction.
-template <class Machine> class InstX86UD2 final : public InstX86Base<Machine> {
-  InstX86UD2() = delete;
-  InstX86UD2(const InstX86UD2 &) = delete;
-  InstX86UD2 &operator=(const InstX86UD2 &) = delete;
+  /// ucomiss/ucomisd - floating-point compare instruction.
+  class InstX86Ucomiss final : public InstX86Base {
+    InstX86Ucomiss() = delete;
+    InstX86Ucomiss(const InstX86Ucomiss &) = delete;
+    InstX86Ucomiss &operator=(const InstX86Ucomiss &) = delete;
 
-public:
-  static InstX86UD2 *create(Cfg *Func) {
-    return new (Func->allocate<InstX86UD2>()) InstX86UD2(Func);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::UD2);
-  }
+  public:
+    static InstX86Ucomiss *create(Cfg *Func, Operand *Src1, Operand *Src2) {
+      return new (Func->allocate<InstX86Ucomiss>())
+          InstX86Ucomiss(Func, Src1, Src2);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Ucomiss);
+    }
 
-private:
-  explicit InstX86UD2(Cfg *Func);
-};
+  private:
+    InstX86Ucomiss(Cfg *Func, Operand *Src1, Operand *Src2);
+  };
 
-/// Test instruction.
-template <class Machine> class InstX86Test final : public InstX86Base<Machine> {
-  InstX86Test() = delete;
-  InstX86Test(const InstX86Test &) = delete;
-  InstX86Test &operator=(const InstX86Test &) = delete;
+  /// UD2 instruction.
+  class InstX86UD2 final : public InstX86Base {
+    InstX86UD2() = delete;
+    InstX86UD2(const InstX86UD2 &) = delete;
+    InstX86UD2 &operator=(const InstX86UD2 &) = delete;
+
+  public:
+    static InstX86UD2 *create(Cfg *Func) {
+      return new (Func->allocate<InstX86UD2>()) InstX86UD2(Func);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::UD2);
+    }
+
+  private:
+    explicit InstX86UD2(Cfg *Func);
+  };
+
+  /// Test instruction.
+  class InstX86Test final : public InstX86Base {
+    InstX86Test() = delete;
+    InstX86Test(const InstX86Test &) = delete;
+    InstX86Test &operator=(const InstX86Test &) = delete;
 
-public:
-  static InstX86Test *create(Cfg *Func, Operand *Source1, Operand *Source2) {
-    return new (Func->allocate<InstX86Test>())
-        InstX86Test(Func, Source1, Source2);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Test);
-  }
+  public:
+    static InstX86Test *create(Cfg *Func, Operand *Source1, Operand *Source2) {
+      return new (Func->allocate<InstX86Test>())
+          InstX86Test(Func, Source1, Source2);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Test);
+    }
 
-private:
-  InstX86Test(Cfg *Func, Operand *Source1, Operand *Source2);
-};
+  private:
+    InstX86Test(Cfg *Func, Operand *Source1, Operand *Source2);
+  };
 
-/// Mfence instruction.
-template <class Machine>
-class InstX86Mfence final : public InstX86Base<Machine> {
-  InstX86Mfence() = delete;
-  InstX86Mfence(const InstX86Mfence &) = delete;
-  InstX86Mfence &operator=(const InstX86Mfence &) = delete;
+  /// Mfence instruction.
+  class InstX86Mfence final : public InstX86Base {
+    InstX86Mfence() = delete;
+    InstX86Mfence(const InstX86Mfence &) = delete;
+    InstX86Mfence &operator=(const InstX86Mfence &) = delete;
 
-public:
-  static InstX86Mfence *create(Cfg *Func) {
-    return new (Func->allocate<InstX86Mfence>()) InstX86Mfence(Func);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Mfence);
-  }
+  public:
+    static InstX86Mfence *create(Cfg *Func) {
+      return new (Func->allocate<InstX86Mfence>()) InstX86Mfence(Func);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Mfence);
+    }
 
-private:
-  explicit InstX86Mfence(Cfg *Func);
-};
+  private:
+    explicit InstX86Mfence(Cfg *Func);
+  };
 
-/// This is essentially a "mov" instruction with an
-/// InstX86Base<Machine>::Traits::X86OperandMem operand instead of Variable as
-/// the destination. It's important for liveness that there is no Dest operand.
-template <class Machine>
-class InstX86Store final : public InstX86Base<Machine> {
-  InstX86Store() = delete;
-  InstX86Store(const InstX86Store &) = delete;
-  InstX86Store &operator=(const InstX86Store &) = delete;
+  /// This is essentially a "mov" instruction with anX86OperandMem operand
+  /// instead of Variable as the destination. It's important for liveness that
+  /// there is no Dest operand.
+  class InstX86Store final : public InstX86Base {
+    InstX86Store() = delete;
+    InstX86Store(const InstX86Store &) = delete;
+    InstX86Store &operator=(const InstX86Store &) = delete;
 
-public:
-  static InstX86Store *
-  create(Cfg *Func, Operand *Value,
-         typename InstX86Base<Machine>::Traits::X86Operand *Mem) {
-    return new (Func->allocate<InstX86Store>()) InstX86Store(Func, Value, Mem);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Store);
-  }
+  public:
+    static InstX86Store *create(Cfg *Func, Operand *Value, X86Operand *Mem) {
+      return new (Func->allocate<InstX86Store>())
+          InstX86Store(Func, Value, Mem);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Store);
+    }
 
-private:
-  InstX86Store(Cfg *Func, Operand *Value,
-               typename InstX86Base<Machine>::Traits::X86Operand *Mem);
-};
+  private:
+    InstX86Store(Cfg *Func, Operand *Value, X86Operand *Mem);
+  };
 
-/// This is essentially a vector "mov" instruction with an typename
-/// InstX86Base<Machine>::Traits::X86OperandMem operand instead of Variable as
-/// the destination. It's important for liveness that there is no Dest operand.
-/// The source must be an Xmm register, since Dest is mem.
-template <class Machine>
-class InstX86StoreP final : public InstX86Base<Machine> {
-  InstX86StoreP() = delete;
-  InstX86StoreP(const InstX86StoreP &) = delete;
-  InstX86StoreP &operator=(const InstX86StoreP &) = delete;
+  /// This is essentially a vector "mov" instruction with an typename
+  /// X86OperandMem operand instead of Variable as the destination. It's
+  /// important for liveness that there is no Dest operand. The source must be
+  /// an Xmm register, since Dest is mem.
+  class InstX86StoreP final : public InstX86Base {
+    InstX86StoreP() = delete;
+    InstX86StoreP(const InstX86StoreP &) = delete;
+    InstX86StoreP &operator=(const InstX86StoreP &) = delete;
 
-public:
-  static InstX86StoreP *
-  create(Cfg *Func, Variable *Value,
-         typename InstX86Base<Machine>::Traits::X86OperandMem *Mem) {
-    return new (Func->allocate<InstX86StoreP>())
-        InstX86StoreP(Func, Value, Mem);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::StoreP);
-  }
+  public:
+    static InstX86StoreP *create(Cfg *Func, Variable *Value,
+                                 X86OperandMem *Mem) {
+      return new (Func->allocate<InstX86StoreP>())
+          InstX86StoreP(Func, Value, Mem);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::StoreP);
+    }
 
-private:
-  InstX86StoreP(Cfg *Func, Variable *Value,
-                typename InstX86Base<Machine>::Traits::X86OperandMem *Mem);
-};
+  private:
+    InstX86StoreP(Cfg *Func, Variable *Value, X86OperandMem *Mem);
+  };
 
-template <class Machine>
-class InstX86StoreQ final : public InstX86Base<Machine> {
-  InstX86StoreQ() = delete;
-  InstX86StoreQ(const InstX86StoreQ &) = delete;
-  InstX86StoreQ &operator=(const InstX86StoreQ &) = delete;
+  class InstX86StoreQ final : public InstX86Base {
+    InstX86StoreQ() = delete;
+    InstX86StoreQ(const InstX86StoreQ &) = delete;
+    InstX86StoreQ &operator=(const InstX86StoreQ &) = delete;
 
-public:
-  static InstX86StoreQ *
-  create(Cfg *Func, Variable *Value,
-         typename InstX86Base<Machine>::Traits::X86OperandMem *Mem) {
-    return new (Func->allocate<InstX86StoreQ>())
-        InstX86StoreQ(Func, Value, Mem);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::StoreQ);
-  }
+  public:
+    static InstX86StoreQ *create(Cfg *Func, Variable *Value,
+                                 X86OperandMem *Mem) {
+      return new (Func->allocate<InstX86StoreQ>())
+          InstX86StoreQ(Func, Value, Mem);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::StoreQ);
+    }
 
-private:
-  InstX86StoreQ(Cfg *Func, Variable *Value,
-                typename InstX86Base<Machine>::Traits::X86OperandMem *Mem);
-};
+  private:
+    InstX86StoreQ(Cfg *Func, Variable *Value, X86OperandMem *Mem);
+  };
 
-/// Nop instructions of varying length
-template <class Machine> class InstX86Nop final : public InstX86Base<Machine> {
-  InstX86Nop() = delete;
-  InstX86Nop(const InstX86Nop &) = delete;
-  InstX86Nop &operator=(const InstX86Nop &) = delete;
+  /// Nop instructions of varying length
+  class InstX86Nop final : public InstX86Base {
+    InstX86Nop() = delete;
+    InstX86Nop(const InstX86Nop &) = delete;
+    InstX86Nop &operator=(const InstX86Nop &) = delete;
 
-public:
-  // TODO: Replace with enum.
-  using NopVariant = unsigned;
+  public:
+    // TODO: Replace with enum.
+    using NopVariant = unsigned;
 
-  static InstX86Nop *create(Cfg *Func, NopVariant Variant) {
-    return new (Func->allocate<InstX86Nop>()) InstX86Nop(Func, Variant);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Nop);
-  }
+    static InstX86Nop *create(Cfg *Func, NopVariant Variant) {
+      return new (Func->allocate<InstX86Nop>()) InstX86Nop(Func, Variant);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Nop);
+    }
 
-private:
-  InstX86Nop(Cfg *Func, SizeT Length);
+  private:
+    InstX86Nop(Cfg *Func, SizeT Length);
 
-  NopVariant Variant;
-};
+    NopVariant Variant;
+  };
 
-/// Fld - load a value onto the x87 FP stack.
-template <class Machine> class InstX86Fld final : public InstX86Base<Machine> {
-  InstX86Fld() = delete;
-  InstX86Fld(const InstX86Fld &) = delete;
-  InstX86Fld &operator=(const InstX86Fld &) = delete;
+  /// Fld - load a value onto the x87 FP stack.
+  class InstX86Fld final : public InstX86Base {
+    InstX86Fld() = delete;
+    InstX86Fld(const InstX86Fld &) = delete;
+    InstX86Fld &operator=(const InstX86Fld &) = delete;
 
-public:
-  static InstX86Fld *create(Cfg *Func, Operand *Src) {
-    return new (Func->allocate<InstX86Fld>()) InstX86Fld(Func, Src);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Fld);
-  }
+  public:
+    static InstX86Fld *create(Cfg *Func, Operand *Src) {
+      return new (Func->allocate<InstX86Fld>()) InstX86Fld(Func, Src);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Fld);
+    }
 
-private:
-  InstX86Fld(Cfg *Func, Operand *Src);
-};
+  private:
+    InstX86Fld(Cfg *Func, Operand *Src);
+  };
 
-/// Fstp - store x87 st(0) into memory and pop st(0).
-template <class Machine> class InstX86Fstp final : public InstX86Base<Machine> {
-  InstX86Fstp() = delete;
-  InstX86Fstp(const InstX86Fstp &) = delete;
-  InstX86Fstp &operator=(const InstX86Fstp &) = delete;
+  /// Fstp - store x87 st(0) into memory and pop st(0).
+  class InstX86Fstp final : public InstX86Base {
+    InstX86Fstp() = delete;
+    InstX86Fstp(const InstX86Fstp &) = delete;
+    InstX86Fstp &operator=(const InstX86Fstp &) = delete;
 
-public:
-  static InstX86Fstp *create(Cfg *Func, Variable *Dest) {
-    return new (Func->allocate<InstX86Fstp>()) InstX86Fstp(Func, Dest);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Fstp);
-  }
+  public:
+    static InstX86Fstp *create(Cfg *Func, Variable *Dest) {
+      return new (Func->allocate<InstX86Fstp>()) InstX86Fstp(Func, Dest);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Fstp);
+    }
 
-private:
-  InstX86Fstp(Cfg *Func, Variable *Dest);
-};
+  private:
+    InstX86Fstp(Cfg *Func, Variable *Dest);
+  };
 
-template <class Machine> class InstX86Pop final : public InstX86Base<Machine> {
-  InstX86Pop() = delete;
-  InstX86Pop(const InstX86Pop &) = delete;
-  InstX86Pop &operator=(const InstX86Pop &) = delete;
+  class InstX86Pop final : public InstX86Base {
+    InstX86Pop() = delete;
+    InstX86Pop(const InstX86Pop &) = delete;
+    InstX86Pop &operator=(const InstX86Pop &) = delete;
 
-public:
-  static InstX86Pop *create(Cfg *Func, Variable *Dest) {
-    return new (Func->allocate<InstX86Pop>()) InstX86Pop(Func, Dest);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Pop);
-  }
+  public:
+    static InstX86Pop *create(Cfg *Func, Variable *Dest) {
+      return new (Func->allocate<InstX86Pop>()) InstX86Pop(Func, Dest);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Pop);
+    }
 
-private:
-  InstX86Pop(Cfg *Func, Variable *Dest);
-};
+  private:
+    InstX86Pop(Cfg *Func, Variable *Dest);
+  };
 
-template <class Machine> class InstX86Push final : public InstX86Base<Machine> {
-  InstX86Push() = delete;
-  InstX86Push(const InstX86Push &) = delete;
-  InstX86Push &operator=(const InstX86Push &) = delete;
+  class InstX86Push final : public InstX86Base {
+    InstX86Push() = delete;
+    InstX86Push(const InstX86Push &) = delete;
+    InstX86Push &operator=(const InstX86Push &) = delete;
 
-public:
-  static InstX86Push *create(Cfg *Func, Variable *Source) {
-    return new (Func->allocate<InstX86Push>()) InstX86Push(Func, Source);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Push);
-  }
+  public:
+    static InstX86Push *create(Cfg *Func, Variable *Source) {
+      return new (Func->allocate<InstX86Push>()) InstX86Push(Func, Source);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Push);
+    }
 
-private:
-  InstX86Push(Cfg *Func, Variable *Source);
-};
+  private:
+    InstX86Push(Cfg *Func, Variable *Source);
+  };
 
-/// Ret instruction. Currently only supports the "ret" version that does not pop
-/// arguments. This instruction takes a Source operand (for non-void returning
-/// functions) for liveness analysis, though a FakeUse before the ret would do
-/// just as well.
-template <class Machine> class InstX86Ret final : public InstX86Base<Machine> {
-  InstX86Ret() = delete;
-  InstX86Ret(const InstX86Ret &) = delete;
-  InstX86Ret &operator=(const InstX86Ret &) = delete;
+  /// Ret instruction. Currently only supports the "ret" version that does not
+  /// pop arguments. This instruction takes a Source operand (for non-void
+  /// returning functions) for liveness analysis, though a FakeUse before the
+  /// ret would do just as well.
+  class InstX86Ret final : public InstX86Base {
+    InstX86Ret() = delete;
+    InstX86Ret(const InstX86Ret &) = delete;
+    InstX86Ret &operator=(const InstX86Ret &) = delete;
 
-public:
-  static InstX86Ret *create(Cfg *Func, Variable *Source = nullptr) {
-    return new (Func->allocate<InstX86Ret>()) InstX86Ret(Func, Source);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Ret);
-  }
+  public:
+    static InstX86Ret *create(Cfg *Func, Variable *Source = nullptr) {
+      return new (Func->allocate<InstX86Ret>()) InstX86Ret(Func, Source);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Ret);
+    }
 
-private:
-  InstX86Ret(Cfg *Func, Variable *Source);
-};
+  private:
+    InstX86Ret(Cfg *Func, Variable *Source);
+  };
 
-/// Conditional set-byte instruction.
-template <class Machine>
-class InstX86Setcc final : public InstX86Base<Machine> {
-  InstX86Setcc() = delete;
-  InstX86Setcc(const InstX86Cmov<Machine> &) = delete;
-  InstX86Setcc &operator=(const InstX86Setcc &) = delete;
+  /// Conditional set-byte instruction.
+  class InstX86Setcc final : public InstX86Base {
+    InstX86Setcc() = delete;
+    InstX86Setcc(const InstX86Cmov &) = delete;
+    InstX86Setcc &operator=(const InstX86Setcc &) = delete;
 
-public:
-  static InstX86Setcc *
-  create(Cfg *Func, Variable *Dest,
-         typename InstX86Base<Machine>::Traits::Cond::BrCond Cond) {
-    return new (Func->allocate<InstX86Setcc>()) InstX86Setcc(Func, Dest, Cond);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Setcc);
-  }
+  public:
+    static InstX86Setcc *create(Cfg *Func, Variable *Dest, BrCond Cond) {
+      return new (Func->allocate<InstX86Setcc>())
+          InstX86Setcc(Func, Dest, Cond);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Setcc);
+    }
 
-private:
-  InstX86Setcc(Cfg *Func, Variable *Dest,
-               typename InstX86Base<Machine>::Traits::Cond::BrCond Cond);
+  private:
+    InstX86Setcc(Cfg *Func, Variable *Dest, BrCond Cond);
 
-  const typename InstX86Base<Machine>::Traits::Cond::BrCond Condition;
-};
+    const BrCond Condition;
+  };
 
-/// Exchanging Add instruction. Exchanges the first operand (destination
-/// operand) with the second operand (source operand), then loads the sum of the
-/// two values into the destination operand. The destination may be a register
-/// or memory, while the source must be a register.
-///
-/// Both the dest and source are updated. The caller should then insert a
-/// FakeDef to reflect the second udpate.
-template <class Machine>
-class InstX86Xadd final : public InstX86BaseLockable<Machine> {
-  InstX86Xadd() = delete;
-  InstX86Xadd(const InstX86Xadd &) = delete;
-  InstX86Xadd &operator=(const InstX86Xadd &) = delete;
+  /// Exchanging Add instruction. Exchanges the first operand (destination
+  /// operand) with the second operand (source operand), then loads the sum of
+  /// the two values into the destination operand. The destination may be a
+  /// register or memory, while the source must be a register.
+  ///
+  /// Both the dest and source are updated. The caller should then insert a
+  /// FakeDef to reflect the second udpate.
+  class InstX86Xadd final : public InstX86BaseLockable {
+    InstX86Xadd() = delete;
+    InstX86Xadd(const InstX86Xadd &) = delete;
+    InstX86Xadd &operator=(const InstX86Xadd &) = delete;
 
-public:
-  static InstX86Xadd *create(Cfg *Func, Operand *Dest, Variable *Source,
-                             bool Locked) {
-    return new (Func->allocate<InstX86Xadd>())
-        InstX86Xadd(Func, Dest, Source, Locked);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Xadd);
-  }
+  public:
+    static InstX86Xadd *create(Cfg *Func, Operand *Dest, Variable *Source,
+                               bool Locked) {
+      return new (Func->allocate<InstX86Xadd>())
+          InstX86Xadd(Func, Dest, Source, Locked);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Xadd);
+    }
 
-private:
-  InstX86Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked);
-};
+  private:
+    InstX86Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked);
+  };
 
-/// Exchange instruction. Exchanges the first operand (destination operand) with
-/// the second operand (source operand). At least one of the operands must be a
-/// register (and the other can be reg or mem). Both the Dest and Source are
-/// updated. If there is a memory operand, then the instruction is automatically
-/// "locked" without the need for a lock prefix.
-template <class Machine> class InstX86Xchg final : public InstX86Base<Machine> {
-  InstX86Xchg() = delete;
-  InstX86Xchg(const InstX86Xchg &) = delete;
-  InstX86Xchg &operator=(const InstX86Xchg &) = delete;
+  /// Exchange instruction. Exchanges the first operand (destination operand)
+  /// with the second operand (source operand). At least one of the operands
+  /// must be a register (and the other can be reg or mem). Both the Dest and
+  /// Source are updated. If there is a memory operand, then the instruction is
+  /// automatically "locked" without the need for a lock prefix.
+  class InstX86Xchg final : public InstX86Base {
+    InstX86Xchg() = delete;
+    InstX86Xchg(const InstX86Xchg &) = delete;
+    InstX86Xchg &operator=(const InstX86Xchg &) = delete;
 
-public:
-  static InstX86Xchg *create(Cfg *Func, Operand *Dest, Variable *Source) {
-    return new (Func->allocate<InstX86Xchg>()) InstX86Xchg(Func, Dest, Source);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::Xchg);
-  }
+  public:
+    static InstX86Xchg *create(Cfg *Func, Operand *Dest, Variable *Source) {
+      return new (Func->allocate<InstX86Xchg>())
+          InstX86Xchg(Func, Dest, Source);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::Xchg);
+    }
 
-private:
-  InstX86Xchg(Cfg *Func, Operand *Dest, Variable *Source);
-};
+  private:
+    InstX86Xchg(Cfg *Func, Operand *Dest, Variable *Source);
+  };
 
-/// Start marker for the Intel Architecture Code Analyzer. This is not an
-/// executable instruction and must only be used for analysis.
-template <class Machine>
-class InstX86IacaStart final : public InstX86Base<Machine> {
-  InstX86IacaStart() = delete;
-  InstX86IacaStart(const InstX86IacaStart &) = delete;
-  InstX86IacaStart &operator=(const InstX86IacaStart &) = delete;
+  /// Start marker for the Intel Architecture Code Analyzer. This is not an
+  /// executable instruction and must only be used for analysis.
+  class InstX86IacaStart final : public InstX86Base {
+    InstX86IacaStart() = delete;
+    InstX86IacaStart(const InstX86IacaStart &) = delete;
+    InstX86IacaStart &operator=(const InstX86IacaStart &) = delete;
 
-public:
-  static InstX86IacaStart *create(Cfg *Func) {
-    return new (Func->allocate<InstX86IacaStart>()) InstX86IacaStart(Func);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst,
-                                           InstX86Base<Machine>::IacaStart);
-  }
+  public:
+    static InstX86IacaStart *create(Cfg *Func) {
+      return new (Func->allocate<InstX86IacaStart>()) InstX86IacaStart(Func);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::IacaStart);
+    }
 
-private:
-  InstX86IacaStart(Cfg *Func);
-};
+  private:
+    InstX86IacaStart(Cfg *Func);
+  };
 
-/// End marker for the Intel Architecture Code Analyzer. This is not an
-/// executable instruction and must only be used for analysis.
-template <class Machine>
-class InstX86IacaEnd final : public InstX86Base<Machine> {
-  InstX86IacaEnd() = delete;
-  InstX86IacaEnd(const InstX86IacaEnd &) = delete;
-  InstX86IacaEnd &operator=(const InstX86IacaEnd &) = delete;
+  /// End marker for the Intel Architecture Code Analyzer. This is not an
+  /// executable instruction and must only be used for analysis.
+  class InstX86IacaEnd final : public InstX86Base {
+    InstX86IacaEnd() = delete;
+    InstX86IacaEnd(const InstX86IacaEnd &) = delete;
+    InstX86IacaEnd &operator=(const InstX86IacaEnd &) = delete;
 
-public:
-  static InstX86IacaEnd *create(Cfg *Func) {
-    return new (Func->allocate<InstX86IacaEnd>()) InstX86IacaEnd(Func);
-  }
-  void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) {
-    return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::IacaEnd);
-  }
+  public:
+    static InstX86IacaEnd *create(Cfg *Func) {
+      return new (Func->allocate<InstX86IacaEnd>()) InstX86IacaEnd(Func);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Inst) {
+      return InstX86Base::isClassof(Inst, InstX86Base::IacaEnd);
+    }
 
-private:
-  InstX86IacaEnd(Cfg *Func);
-};
+  private:
+    InstX86IacaEnd(Cfg *Func);
+  };
+}; // struct InstImpl
 
 /// struct Insts is a template that can be used to instantiate all the X86
 /// instructions for a target with a simple
 ///
-/// using Insts = ::Ice::X86Internal::Insts<TargeT>;
-template <class Machine> struct Insts {
-  using FakeRMW = InstX86FakeRMW<Machine>;
-  using Label = InstX86Label<Machine>;
+/// using Insts = ::Ice::X86NAMESPACE::Insts<TraitsType>;
+template <typename TraitsType> struct Insts {
+  using FakeRMW = typename InstImpl<TraitsType>::InstX86FakeRMW;
+  using Label = typename InstImpl<TraitsType>::InstX86Label;
 
-  using Call = InstX86Call<Machine>;
+  using Call = typename InstImpl<TraitsType>::InstX86Call;
 
-  using Br = InstX86Br<Machine>;
-  using Jmp = InstX86Jmp<Machine>;
-  using Bswap = InstX86Bswap<Machine>;
-  using Neg = InstX86Neg<Machine>;
-  using Bsf = InstX86Bsf<Machine>;
-  using Bsr = InstX86Bsr<Machine>;
-  using Lea = InstX86Lea<Machine>;
-  using Cbwdq = InstX86Cbwdq<Machine>;
-  using Movsx = InstX86Movsx<Machine>;
-  using Movzx = InstX86Movzx<Machine>;
-  using Movd = InstX86Movd<Machine>;
-  using Sqrtss = InstX86Sqrtss<Machine>;
-  using Mov = InstX86Mov<Machine>;
-  using Movp = InstX86Movp<Machine>;
-  using Movq = InstX86Movq<Machine>;
-  using Add = InstX86Add<Machine>;
-  using AddRMW = InstX86AddRMW<Machine>;
-  using Addps = InstX86Addps<Machine>;
-  using Adc = InstX86Adc<Machine>;
-  using AdcRMW = InstX86AdcRMW<Machine>;
-  using Addss = InstX86Addss<Machine>;
-  using Andnps = InstX86Andnps<Machine>;
-  using Andps = InstX86Andps<Machine>;
-  using Padd = InstX86Padd<Machine>;
-  using Sub = InstX86Sub<Machine>;
-  using SubRMW = InstX86SubRMW<Machine>;
-  using Subps = InstX86Subps<Machine>;
-  using Subss = InstX86Subss<Machine>;
-  using Sbb = InstX86Sbb<Machine>;
-  using SbbRMW = InstX86SbbRMW<Machine>;
-  using Psub = InstX86Psub<Machine>;
-  using And = InstX86And<Machine>;
-  using AndRMW = InstX86AndRMW<Machine>;
-  using Pand = InstX86Pand<Machine>;
-  using Pandn = InstX86Pandn<Machine>;
-  using Or = InstX86Or<Machine>;
-  using Orps = InstX86Orps<Machine>;
-  using OrRMW = InstX86OrRMW<Machine>;
-  using Por = InstX86Por<Machine>;
-  using Xor = InstX86Xor<Machine>;
-  using Xorps = InstX86Xorps<Machine>;
-  using XorRMW = InstX86XorRMW<Machine>;
-  using Pxor = InstX86Pxor<Machine>;
-  using Maxss = InstX86Maxss<Machine>;
-  using Minss = InstX86Minss<Machine>;
-  using Imul = InstX86Imul<Machine>;
-  using ImulImm = InstX86ImulImm<Machine>;
-  using Mulps = InstX86Mulps<Machine>;
-  using Mulss = InstX86Mulss<Machine>;
-  using Pmull = InstX86Pmull<Machine>;
-  using Pmuludq = InstX86Pmuludq<Machine>;
-  using Divps = InstX86Divps<Machine>;
-  using Divss = InstX86Divss<Machine>;
-  using Rol = InstX86Rol<Machine>;
-  using Shl = InstX86Shl<Machine>;
-  using Psll = InstX86Psll<Machine>;
-  using Psrl = InstX86Psrl<Machine>;
-  using Shr = InstX86Shr<Machine>;
-  using Sar = InstX86Sar<Machine>;
-  using Psra = InstX86Psra<Machine>;
-  using Pcmpeq = InstX86Pcmpeq<Machine>;
-  using Pcmpgt = InstX86Pcmpgt<Machine>;
-  using MovssRegs = InstX86MovssRegs<Machine>;
-  using Idiv = InstX86Idiv<Machine>;
-  using Div = InstX86Div<Machine>;
-  using Insertps = InstX86Insertps<Machine>;
-  using Pinsr = InstX86Pinsr<Machine>;
-  using Shufps = InstX86Shufps<Machine>;
-  using Blendvps = InstX86Blendvps<Machine>;
-  using Pblendvb = InstX86Pblendvb<Machine>;
-  using Pextr = InstX86Pextr<Machine>;
-  using Pshufd = InstX86Pshufd<Machine>;
-  using Lockable = InstX86BaseLockable<Machine>;
-  using Mul = InstX86Mul<Machine>;
-  using Shld = InstX86Shld<Machine>;
-  using Shrd = InstX86Shrd<Machine>;
-  using Cmov = InstX86Cmov<Machine>;
-  using Cmpps = InstX86Cmpps<Machine>;
-  using Cmpxchg = InstX86Cmpxchg<Machine>;
-  using Cmpxchg8b = InstX86Cmpxchg8b<Machine>;
-  using Cvt = InstX86Cvt<Machine>;
-  using Icmp = InstX86Icmp<Machine>;
-  using Ucomiss = InstX86Ucomiss<Machine>;
-  using UD2 = InstX86UD2<Machine>;
-  using Test = InstX86Test<Machine>;
-  using Mfence = InstX86Mfence<Machine>;
-  using Store = InstX86Store<Machine>;
-  using StoreP = InstX86StoreP<Machine>;
-  using StoreQ = InstX86StoreQ<Machine>;
-  using Nop = InstX86Nop<Machine>;
-  template <typename T = typename InstX86Base<Machine>::Traits>
-  using Fld = typename std::enable_if<T::UsesX87, InstX86Fld<Machine>>::type;
-  template <typename T = typename InstX86Base<Machine>::Traits>
-  using Fstp = typename std::enable_if<T::UsesX87, InstX86Fstp<Machine>>::type;
-  using Pop = InstX86Pop<Machine>;
-  using Push = InstX86Push<Machine>;
-  using Ret = InstX86Ret<Machine>;
-  using Setcc = InstX86Setcc<Machine>;
-  using Xadd = InstX86Xadd<Machine>;
-  using Xchg = InstX86Xchg<Machine>;
+  using Br = typename InstImpl<TraitsType>::InstX86Br;
+  using Jmp = typename InstImpl<TraitsType>::InstX86Jmp;
+  using Bswap = typename InstImpl<TraitsType>::InstX86Bswap;
+  using Neg = typename InstImpl<TraitsType>::InstX86Neg;
+  using Bsf = typename InstImpl<TraitsType>::InstX86Bsf;
+  using Bsr = typename InstImpl<TraitsType>::InstX86Bsr;
+  using Lea = typename InstImpl<TraitsType>::InstX86Lea;
+  using Cbwdq = typename InstImpl<TraitsType>::InstX86Cbwdq;
+  using Movsx = typename InstImpl<TraitsType>::InstX86Movsx;
+  using Movzx = typename InstImpl<TraitsType>::InstX86Movzx;
+  using Movd = typename InstImpl<TraitsType>::InstX86Movd;
+  using Sqrtss = typename InstImpl<TraitsType>::InstX86Sqrtss;
+  using Mov = typename InstImpl<TraitsType>::InstX86Mov;
+  using Movp = typename InstImpl<TraitsType>::InstX86Movp;
+  using Movq = typename InstImpl<TraitsType>::InstX86Movq;
+  using Add = typename InstImpl<TraitsType>::InstX86Add;
+  using AddRMW = typename InstImpl<TraitsType>::InstX86AddRMW;
+  using Addps = typename InstImpl<TraitsType>::InstX86Addps;
+  using Adc = typename InstImpl<TraitsType>::InstX86Adc;
+  using AdcRMW = typename InstImpl<TraitsType>::InstX86AdcRMW;
+  using Addss = typename InstImpl<TraitsType>::InstX86Addss;
+  using Andnps = typename InstImpl<TraitsType>::InstX86Andnps;
+  using Andps = typename InstImpl<TraitsType>::InstX86Andps;
+  using Padd = typename InstImpl<TraitsType>::InstX86Padd;
+  using Sub = typename InstImpl<TraitsType>::InstX86Sub;
+  using SubRMW = typename InstImpl<TraitsType>::InstX86SubRMW;
+  using Subps = typename InstImpl<TraitsType>::InstX86Subps;
+  using Subss = typename InstImpl<TraitsType>::InstX86Subss;
+  using Sbb = typename InstImpl<TraitsType>::InstX86Sbb;
+  using SbbRMW = typename InstImpl<TraitsType>::InstX86SbbRMW;
+  using Psub = typename InstImpl<TraitsType>::InstX86Psub;
+  using And = typename InstImpl<TraitsType>::InstX86And;
+  using AndRMW = typename InstImpl<TraitsType>::InstX86AndRMW;
+  using Pand = typename InstImpl<TraitsType>::InstX86Pand;
+  using Pandn = typename InstImpl<TraitsType>::InstX86Pandn;
+  using Or = typename InstImpl<TraitsType>::InstX86Or;
+  using Orps = typename InstImpl<TraitsType>::InstX86Orps;
+  using OrRMW = typename InstImpl<TraitsType>::InstX86OrRMW;
+  using Por = typename InstImpl<TraitsType>::InstX86Por;
+  using Xor = typename InstImpl<TraitsType>::InstX86Xor;
+  using Xorps = typename InstImpl<TraitsType>::InstX86Xorps;
+  using XorRMW = typename InstImpl<TraitsType>::InstX86XorRMW;
+  using Pxor = typename InstImpl<TraitsType>::InstX86Pxor;
+  using Maxss = typename InstImpl<TraitsType>::InstX86Maxss;
+  using Minss = typename InstImpl<TraitsType>::InstX86Minss;
+  using Imul = typename InstImpl<TraitsType>::InstX86Imul;
+  using ImulImm = typename InstImpl<TraitsType>::InstX86ImulImm;
+  using Mulps = typename InstImpl<TraitsType>::InstX86Mulps;
+  using Mulss = typename InstImpl<TraitsType>::InstX86Mulss;
+  using Pmull = typename InstImpl<TraitsType>::InstX86Pmull;
+  using Pmuludq = typename InstImpl<TraitsType>::InstX86Pmuludq;
+  using Divps = typename InstImpl<TraitsType>::InstX86Divps;
+  using Divss = typename InstImpl<TraitsType>::InstX86Divss;
+  using Rol = typename InstImpl<TraitsType>::InstX86Rol;
+  using Shl = typename InstImpl<TraitsType>::InstX86Shl;
+  using Psll = typename InstImpl<TraitsType>::InstX86Psll;
+  using Psrl = typename InstImpl<TraitsType>::InstX86Psrl;
+  using Shr = typename InstImpl<TraitsType>::InstX86Shr;
+  using Sar = typename InstImpl<TraitsType>::InstX86Sar;
+  using Psra = typename InstImpl<TraitsType>::InstX86Psra;
+  using Pcmpeq = typename InstImpl<TraitsType>::InstX86Pcmpeq;
+  using Pcmpgt = typename InstImpl<TraitsType>::InstX86Pcmpgt;
+  using MovssRegs = typename InstImpl<TraitsType>::InstX86MovssRegs;
+  using Idiv = typename InstImpl<TraitsType>::InstX86Idiv;
+  using Div = typename InstImpl<TraitsType>::InstX86Div;
+  using Insertps = typename InstImpl<TraitsType>::InstX86Insertps;
+  using Pinsr = typename InstImpl<TraitsType>::InstX86Pinsr;
+  using Shufps = typename InstImpl<TraitsType>::InstX86Shufps;
+  using Blendvps = typename InstImpl<TraitsType>::InstX86Blendvps;
+  using Pblendvb = typename InstImpl<TraitsType>::InstX86Pblendvb;
+  using Pextr = typename InstImpl<TraitsType>::InstX86Pextr;
+  using Pshufd = typename InstImpl<TraitsType>::InstX86Pshufd;
+  using Lockable = typename InstImpl<TraitsType>::InstX86BaseLockable;
+  using Mul = typename InstImpl<TraitsType>::InstX86Mul;
+  using Shld = typename InstImpl<TraitsType>::InstX86Shld;
+  using Shrd = typename InstImpl<TraitsType>::InstX86Shrd;
+  using Cmov = typename InstImpl<TraitsType>::InstX86Cmov;
+  using Cmpps = typename InstImpl<TraitsType>::InstX86Cmpps;
+  using Cmpxchg = typename InstImpl<TraitsType>::InstX86Cmpxchg;
+  using Cmpxchg8b = typename InstImpl<TraitsType>::InstX86Cmpxchg8b;
+  using Cvt = typename InstImpl<TraitsType>::InstX86Cvt;
+  using Icmp = typename InstImpl<TraitsType>::InstX86Icmp;
+  using Ucomiss = typename InstImpl<TraitsType>::InstX86Ucomiss;
+  using UD2 = typename InstImpl<TraitsType>::InstX86UD2;
+  using Test = typename InstImpl<TraitsType>::InstX86Test;
+  using Mfence = typename InstImpl<TraitsType>::InstX86Mfence;
+  using Store = typename InstImpl<TraitsType>::InstX86Store;
+  using StoreP = typename InstImpl<TraitsType>::InstX86StoreP;
+  using StoreQ = typename InstImpl<TraitsType>::InstX86StoreQ;
+  using Nop = typename InstImpl<TraitsType>::InstX86Nop;
+  template <typename T = typename InstImpl<TraitsType>::Traits>
+  using Fld =
+      typename std::enable_if<T::UsesX87,
+                              typename InstImpl<TraitsType>::InstX86Fld>::type;
+  template <typename T = typename InstImpl<TraitsType>::Traits>
+  using Fstp =
+      typename std::enable_if<T::UsesX87,
+                              typename InstImpl<TraitsType>::InstX86Fstp>::type;
+  using Pop = typename InstImpl<TraitsType>::InstX86Pop;
+  using Push = typename InstImpl<TraitsType>::InstX86Push;
+  using Ret = typename InstImpl<TraitsType>::InstX86Ret;
+  using Setcc = typename InstImpl<TraitsType>::InstX86Setcc;
+  using Xadd = typename InstImpl<TraitsType>::InstX86Xadd;
+  using Xchg = typename InstImpl<TraitsType>::InstX86Xchg;
 
-  using IacaStart = InstX86IacaStart<Machine>;
-  using IacaEnd = InstX86IacaEnd<Machine>;
+  using IacaStart = typename InstImpl<TraitsType>::InstX86IacaStart;
+  using IacaEnd = typename InstImpl<TraitsType>::InstX86IacaEnd;
 };
 
 /// X86 Instructions have static data (particularly, opcodes and instruction
 /// emitters). Each X86 target needs to define all of these, so this macro is
 /// provided so that, if something changes, then all X86 targets will be updated
 /// automatically.
-#define X86INSTS_DEFINE_STATIC_DATA(Machine)                                   \
+#define X86INSTS_DEFINE_STATIC_DATA(X86NAMESPACE, TraitsType)                  \
   namespace Ice {                                                              \
-  namespace X86Internal {                                                      \
+  namespace X86NAMESPACE {                                                     \
   /* In-place ops */                                                           \
-  template <> const char *InstX86Bswap<Machine>::Base::Opcode = "bswap";       \
-  template <> const char *InstX86Neg<Machine>::Base::Opcode = "neg";           \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Bswap::Base::Opcode = "bswap";      \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Neg::Base::Opcode = "neg";          \
   /* Unary ops */                                                              \
-  template <> const char *InstX86Bsf<Machine>::Base::Opcode = "bsf";           \
-  template <> const char *InstX86Bsr<Machine>::Base::Opcode = "bsr";           \
-  template <> const char *InstX86Lea<Machine>::Base::Opcode = "lea";           \
-  template <> const char *InstX86Movd<Machine>::Base::Opcode = "movd";         \
-  template <> const char *InstX86Movsx<Machine>::Base::Opcode = "movs";        \
-  template <> const char *InstX86Movzx<Machine>::Base::Opcode = "movz";        \
-  template <> const char *InstX86Sqrtss<Machine>::Base::Opcode = "sqrtss";     \
-  template <> const char *InstX86Cbwdq<Machine>::Base::Opcode = "cbw/cwd/cdq"; \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Bsf::Base::Opcode = "bsf";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Bsr::Base::Opcode = "bsr";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Lea::Base::Opcode = "lea";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Movd::Base::Opcode = "movd";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Movsx::Base::Opcode = "movs";       \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Movzx::Base::Opcode = "movz";       \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Sqrtss::Base::Opcode = "sqrtss";    \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Cbwdq::Base::Opcode =               \
+      "cbw/cwd/cdq";                                                           \
   /* Mov-like ops */                                                           \
-  template <> const char *InstX86Mov<Machine>::Base::Opcode = "mov";           \
-  template <> const char *InstX86Movp<Machine>::Base::Opcode = "movups";       \
-  template <> const char *InstX86Movq<Machine>::Base::Opcode = "movq";         \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Mov::Base::Opcode = "mov";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Movp::Base::Opcode = "movups";      \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Movq::Base::Opcode = "movq";        \
   /* Binary ops */                                                             \
-  template <> const char *InstX86Add<Machine>::Base::Opcode = "add";           \
-  template <> const char *InstX86AddRMW<Machine>::Base::Opcode = "add";        \
-  template <> const char *InstX86Addps<Machine>::Base::Opcode = "add";         \
-  template <> const char *InstX86Adc<Machine>::Base::Opcode = "adc";           \
-  template <> const char *InstX86AdcRMW<Machine>::Base::Opcode = "adc";        \
-  template <> const char *InstX86Addss<Machine>::Base::Opcode = "add";         \
-  template <> const char *InstX86Andnps<Machine>::Base::Opcode = "andn";       \
-  template <> const char *InstX86Andps<Machine>::Base::Opcode = "and";         \
-  template <> const char *InstX86Maxss<Machine>::Base::Opcode = "max";         \
-  template <> const char *InstX86Minss<Machine>::Base::Opcode = "min";         \
-  template <> const char *InstX86Padd<Machine>::Base::Opcode = "padd";         \
-  template <> const char *InstX86Sub<Machine>::Base::Opcode = "sub";           \
-  template <> const char *InstX86SubRMW<Machine>::Base::Opcode = "sub";        \
-  template <> const char *InstX86Subps<Machine>::Base::Opcode = "sub";         \
-  template <> const char *InstX86Subss<Machine>::Base::Opcode = "sub";         \
-  template <> const char *InstX86Sbb<Machine>::Base::Opcode = "sbb";           \
-  template <> const char *InstX86SbbRMW<Machine>::Base::Opcode = "sbb";        \
-  template <> const char *InstX86Psub<Machine>::Base::Opcode = "psub";         \
-  template <> const char *InstX86And<Machine>::Base::Opcode = "and";           \
-  template <> const char *InstX86AndRMW<Machine>::Base::Opcode = "and";        \
-  template <> const char *InstX86Pand<Machine>::Base::Opcode = "pand";         \
-  template <> const char *InstX86Pandn<Machine>::Base::Opcode = "pandn";       \
-  template <> const char *InstX86Or<Machine>::Base::Opcode = "or";             \
-  template <> const char *InstX86Orps<Machine>::Base::Opcode = "or";           \
-  template <> const char *InstX86OrRMW<Machine>::Base::Opcode = "or";          \
-  template <> const char *InstX86Por<Machine>::Base::Opcode = "por";           \
-  template <> const char *InstX86Xor<Machine>::Base::Opcode = "xor";           \
-  template <> const char *InstX86Xorps<Machine>::Base::Opcode = "xor";         \
-  template <> const char *InstX86XorRMW<Machine>::Base::Opcode = "xor";        \
-  template <> const char *InstX86Pxor<Machine>::Base::Opcode = "pxor";         \
-  template <> const char *InstX86Imul<Machine>::Base::Opcode = "imul";         \
-  template <> const char *InstX86ImulImm<Machine>::Base::Opcode = "imul";      \
-  template <> const char *InstX86Mulps<Machine>::Base::Opcode = "mul";         \
-  template <> const char *InstX86Mulss<Machine>::Base::Opcode = "mul";         \
-  template <> const char *InstX86Pmull<Machine>::Base::Opcode = "pmull";       \
-  template <> const char *InstX86Pmuludq<Machine>::Base::Opcode = "pmuludq";   \
-  template <> const char *InstX86Div<Machine>::Base::Opcode = "div";           \
-  template <> const char *InstX86Divps<Machine>::Base::Opcode = "div";         \
-  template <> const char *InstX86Divss<Machine>::Base::Opcode = "div";         \
-  template <> const char *InstX86Idiv<Machine>::Base::Opcode = "idiv";         \
-  template <> const char *InstX86Rol<Machine>::Base::Opcode = "rol";           \
-  template <> const char *InstX86Shl<Machine>::Base::Opcode = "shl";           \
-  template <> const char *InstX86Psll<Machine>::Base::Opcode = "psll";         \
-  template <> const char *InstX86Shr<Machine>::Base::Opcode = "shr";           \
-  template <> const char *InstX86Sar<Machine>::Base::Opcode = "sar";           \
-  template <> const char *InstX86Psra<Machine>::Base::Opcode = "psra";         \
-  template <> const char *InstX86Psrl<Machine>::Base::Opcode = "psrl";         \
-  template <> const char *InstX86Pcmpeq<Machine>::Base::Opcode = "pcmpeq";     \
-  template <> const char *InstX86Pcmpgt<Machine>::Base::Opcode = "pcmpgt";     \
-  template <> const char *InstX86MovssRegs<Machine>::Base::Opcode = "movss";   \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Add::Base::Opcode = "add";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86AddRMW::Base::Opcode = "add";       \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Addps::Base::Opcode = "add";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Adc::Base::Opcode = "adc";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86AdcRMW::Base::Opcode = "adc";       \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Addss::Base::Opcode = "add";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Andnps::Base::Opcode = "andn";      \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Andps::Base::Opcode = "and";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Maxss::Base::Opcode = "max";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Minss::Base::Opcode = "min";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Padd::Base::Opcode = "padd";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Sub::Base::Opcode = "sub";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86SubRMW::Base::Opcode = "sub";       \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Subps::Base::Opcode = "sub";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Subss::Base::Opcode = "sub";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Sbb::Base::Opcode = "sbb";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86SbbRMW::Base::Opcode = "sbb";       \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Psub::Base::Opcode = "psub";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86And::Base::Opcode = "and";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86AndRMW::Base::Opcode = "and";       \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pand::Base::Opcode = "pand";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pandn::Base::Opcode = "pandn";      \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Or::Base::Opcode = "or";            \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Orps::Base::Opcode = "or";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86OrRMW::Base::Opcode = "or";         \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Por::Base::Opcode = "por";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Xor::Base::Opcode = "xor";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Xorps::Base::Opcode = "xor";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86XorRMW::Base::Opcode = "xor";       \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pxor::Base::Opcode = "pxor";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Imul::Base::Opcode = "imul";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86ImulImm::Base::Opcode = "imul";     \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Mulps::Base::Opcode = "mul";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Mulss::Base::Opcode = "mul";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pmull::Base::Opcode = "pmull";      \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pmuludq::Base::Opcode = "pmuludq";  \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Div::Base::Opcode = "div";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Divps::Base::Opcode = "div";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Divss::Base::Opcode = "div";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Idiv::Base::Opcode = "idiv";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Rol::Base::Opcode = "rol";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Shl::Base::Opcode = "shl";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Psll::Base::Opcode = "psll";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Shr::Base::Opcode = "shr";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Sar::Base::Opcode = "sar";          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Psra::Base::Opcode = "psra";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Psrl::Base::Opcode = "psrl";        \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pcmpeq::Base::Opcode = "pcmpeq";    \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pcmpgt::Base::Opcode = "pcmpgt";    \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86MovssRegs::Base::Opcode = "movss";  \
   /* Ternary ops */                                                            \
-  template <> const char *InstX86Insertps<Machine>::Base::Opcode = "insertps"; \
-  template <> const char *InstX86Shufps<Machine>::Base::Opcode = "shufps";     \
-  template <> const char *InstX86Pinsr<Machine>::Base::Opcode = "pinsr";       \
-  template <> const char *InstX86Blendvps<Machine>::Base::Opcode = "blendvps"; \
-  template <> const char *InstX86Pblendvb<Machine>::Base::Opcode = "pblendvb"; \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Insertps::Base::Opcode =            \
+      "insertps";                                                              \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Shufps::Base::Opcode = "shufps";    \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pinsr::Base::Opcode = "pinsr";      \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Blendvps::Base::Opcode =            \
+      "blendvps";                                                              \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pblendvb::Base::Opcode =            \
+      "pblendvb";                                                              \
   /* Three address ops */                                                      \
-  template <> const char *InstX86Pextr<Machine>::Base::Opcode = "pextr";       \
-  template <> const char *InstX86Pshufd<Machine>::Base::Opcode = "pshufd";     \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pextr::Base::Opcode = "pextr";      \
+  template <>                                                                  \
+  template <>                                                                  \
+  const char *InstImpl<TraitsType>::InstX86Pshufd::Base::Opcode = "pshufd";    \
   /* Inplace GPR ops */                                                        \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp               \
-      InstX86Bswap<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::bswap,                     \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterOneOp                       \
+      InstImpl<TraitsType>::InstX86Bswap::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::bswap,                             \
           nullptr /* only a reg form exists */                                 \
   };                                                                           \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp               \
-      InstX86Neg<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::neg,                       \
-          &InstX86Base<Machine>::Traits::Assembler::neg};                      \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterOneOp                       \
+      InstImpl<TraitsType>::InstX86Neg::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::neg,                               \
+          &InstImpl<TraitsType>::Assembler::neg};                              \
                                                                                \
   /* Unary GPR ops */                                                          \
+  template <>                                                                  \
   template <> /* uses specialized emitter. */                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Cbwdq<Machine>::Base::Emitter = {nullptr, nullptr, nullptr};      \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Cbwdq::Base::Emitter = {nullptr, nullptr,   \
+                                                           nullptr};           \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Bsf<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::bsf,                       \
-          &InstX86Base<Machine>::Traits::Assembler::bsf, nullptr};             \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Bsr<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::bsr,                       \
-          &InstX86Base<Machine>::Traits::Assembler::bsr, nullptr};             \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Bsf::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::bsf,                               \
+          &InstImpl<TraitsType>::Assembler::bsf, nullptr};                     \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Lea<Machine>::Base::Emitter = {                                   \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Bsr::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::bsr,                               \
+          &InstImpl<TraitsType>::Assembler::bsr, nullptr};                     \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Lea::Base::Emitter = {                      \
           /* reg/reg and reg/imm are illegal */ nullptr,                       \
-          &InstX86Base<Machine>::Traits::Assembler::lea, nullptr};             \
+          &InstImpl<TraitsType>::Assembler::lea, nullptr};                     \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Movsx<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::movsx,                     \
-          &InstX86Base<Machine>::Traits::Assembler::movsx, nullptr};           \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Movzx<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::movzx,                     \
-          &InstX86Base<Machine>::Traits::Assembler::movzx, nullptr};           \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Movsx::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::movsx,                             \
+          &InstImpl<TraitsType>::Assembler::movsx, nullptr};                   \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Movzx::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::movzx,                             \
+          &InstImpl<TraitsType>::Assembler::movzx, nullptr};                   \
                                                                                \
   /* Unary XMM ops */                                                          \
-  template <> /* uses specialized emitter. */                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Movd<Machine>::Base::Emitter = {nullptr, nullptr};                \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Sqrtss<Machine>::Base::Emitter = {                                \
-          &InstX86Base<Machine>::Traits::Assembler::sqrtss,                    \
-          &InstX86Base<Machine>::Traits::Assembler::sqrtss};                   \
+  template <> /* uses specialized emitter. */                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Movd::Base::Emitter = {nullptr, nullptr};   \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Sqrtss::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::sqrtss,                            \
+          &InstImpl<TraitsType>::Assembler::sqrtss};                           \
                                                                                \
   /* Binary GPR ops */                                                         \
+  template <>                                                                  \
   template <> /* uses specialized emitter. */                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Imul<Machine>::Base::Emitter = {nullptr, nullptr, nullptr};       \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Imul::Base::Emitter = {nullptr, nullptr,    \
+                                                          nullptr};            \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Add<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::add,                       \
-          &InstX86Base<Machine>::Traits::Assembler::add,                       \
-          &InstX86Base<Machine>::Traits::Assembler::add};                      \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
-      InstX86AddRMW<Machine>::Base::Emitter = {                                \
-          &InstX86Base<Machine>::Traits::Assembler::add,                       \
-          &InstX86Base<Machine>::Traits::Assembler::add};                      \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Add::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::add,                               \
+          &InstImpl<TraitsType>::Assembler::add,                               \
+          &InstImpl<TraitsType>::Assembler::add};                              \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Adc<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::adc,                       \
-          &InstX86Base<Machine>::Traits::Assembler::adc,                       \
-          &InstX86Base<Machine>::Traits::Assembler::adc};                      \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
-      InstX86AdcRMW<Machine>::Base::Emitter = {                                \
-          &InstX86Base<Machine>::Traits::Assembler::adc,                       \
-          &InstX86Base<Machine>::Traits::Assembler::adc};                      \
+  const InstImpl<TraitsType>::Assembler::GPREmitterAddrOp                      \
+      InstImpl<TraitsType>::InstX86AddRMW::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::add,                               \
+          &InstImpl<TraitsType>::Assembler::add};                              \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86And<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::And,                       \
-          &InstX86Base<Machine>::Traits::Assembler::And,                       \
-          &InstX86Base<Machine>::Traits::Assembler::And};                      \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
-      InstX86AndRMW<Machine>::Base::Emitter = {                                \
-          &InstX86Base<Machine>::Traits::Assembler::And,                       \
-          &InstX86Base<Machine>::Traits::Assembler::And};                      \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Adc::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::adc,                               \
+          &InstImpl<TraitsType>::Assembler::adc,                               \
+          &InstImpl<TraitsType>::Assembler::adc};                              \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Or<Machine>::Base::Emitter = {                                    \
-          &InstX86Base<Machine>::Traits::Assembler::Or,                        \
-          &InstX86Base<Machine>::Traits::Assembler::Or,                        \
-          &InstX86Base<Machine>::Traits::Assembler::Or};                       \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
-      InstX86OrRMW<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::Or,                        \
-          &InstX86Base<Machine>::Traits::Assembler::Or};                       \
+  const InstImpl<TraitsType>::Assembler::GPREmitterAddrOp                      \
+      InstImpl<TraitsType>::InstX86AdcRMW::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::adc,                               \
+          &InstImpl<TraitsType>::Assembler::adc};                              \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Sbb<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::sbb,                       \
-          &InstX86Base<Machine>::Traits::Assembler::sbb,                       \
-          &InstX86Base<Machine>::Traits::Assembler::sbb};                      \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
-      InstX86SbbRMW<Machine>::Base::Emitter = {                                \
-          &InstX86Base<Machine>::Traits::Assembler::sbb,                       \
-          &InstX86Base<Machine>::Traits::Assembler::sbb};                      \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86And::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::And,                               \
+          &InstImpl<TraitsType>::Assembler::And,                               \
+          &InstImpl<TraitsType>::Assembler::And};                              \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Sub<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::sub,                       \
-          &InstX86Base<Machine>::Traits::Assembler::sub,                       \
-          &InstX86Base<Machine>::Traits::Assembler::sub};                      \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
-      InstX86SubRMW<Machine>::Base::Emitter = {                                \
-          &InstX86Base<Machine>::Traits::Assembler::sub,                       \
-          &InstX86Base<Machine>::Traits::Assembler::sub};                      \
+  const InstImpl<TraitsType>::Assembler::GPREmitterAddrOp                      \
+      InstImpl<TraitsType>::InstX86AndRMW::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::And,                               \
+          &InstImpl<TraitsType>::Assembler::And};                              \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp               \
-      InstX86Xor<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::Xor,                       \
-          &InstX86Base<Machine>::Traits::Assembler::Xor,                       \
-          &InstX86Base<Machine>::Traits::Assembler::Xor};                      \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp              \
-      InstX86XorRMW<Machine>::Base::Emitter = {                                \
-          &InstX86Base<Machine>::Traits::Assembler::Xor,                       \
-          &InstX86Base<Machine>::Traits::Assembler::Xor};                      \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Or::Base::Emitter = {                       \
+          &InstImpl<TraitsType>::Assembler::Or,                                \
+          &InstImpl<TraitsType>::Assembler::Or,                                \
+          &InstImpl<TraitsType>::Assembler::Or};                               \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterAddrOp                      \
+      InstImpl<TraitsType>::InstX86OrRMW::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::Or,                                \
+          &InstImpl<TraitsType>::Assembler::Or};                               \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Sbb::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::sbb,                               \
+          &InstImpl<TraitsType>::Assembler::sbb,                               \
+          &InstImpl<TraitsType>::Assembler::sbb};                              \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterAddrOp                      \
+      InstImpl<TraitsType>::InstX86SbbRMW::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::sbb,                               \
+          &InstImpl<TraitsType>::Assembler::sbb};                              \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Sub::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::sub,                               \
+          &InstImpl<TraitsType>::Assembler::sub,                               \
+          &InstImpl<TraitsType>::Assembler::sub};                              \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterAddrOp                      \
+      InstImpl<TraitsType>::InstX86SubRMW::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::sub,                               \
+          &InstImpl<TraitsType>::Assembler::sub};                              \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Xor::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::Xor,                               \
+          &InstImpl<TraitsType>::Assembler::Xor,                               \
+          &InstImpl<TraitsType>::Assembler::Xor};                              \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterAddrOp                      \
+      InstImpl<TraitsType>::InstX86XorRMW::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::Xor,                               \
+          &InstImpl<TraitsType>::Assembler::Xor};                              \
                                                                                \
   /* Binary Shift GPR ops */                                                   \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftOp             \
-      InstX86Rol<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::rol,                       \
-          &InstX86Base<Machine>::Traits::Assembler::rol};                      \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftOp             \
-      InstX86Sar<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::sar,                       \
-          &InstX86Base<Machine>::Traits::Assembler::sar};                      \
+  const InstImpl<TraitsType>::Assembler::GPREmitterShiftOp                     \
+      InstImpl<TraitsType>::InstX86Rol::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::rol,                               \
+          &InstImpl<TraitsType>::Assembler::rol};                              \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftOp             \
-      InstX86Shl<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::shl,                       \
-          &InstX86Base<Machine>::Traits::Assembler::shl};                      \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftOp             \
-      InstX86Shr<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::shr,                       \
-          &InstX86Base<Machine>::Traits::Assembler::shr};                      \
+  const InstImpl<TraitsType>::Assembler::GPREmitterShiftOp                     \
+      InstImpl<TraitsType>::InstX86Sar::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::sar,                               \
+          &InstImpl<TraitsType>::Assembler::sar};                              \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterShiftOp                     \
+      InstImpl<TraitsType>::InstX86Shl::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::shl,                               \
+          &InstImpl<TraitsType>::Assembler::shl};                              \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::GPREmitterShiftOp                     \
+      InstImpl<TraitsType>::InstX86Shr::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::shr,                               \
+          &InstImpl<TraitsType>::Assembler::shr};                              \
                                                                                \
   /* Binary XMM ops */                                                         \
+  template <>                                                                  \
   template <> /* uses specialized emitter. */                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86MovssRegs<Machine>::Base::Emitter = {nullptr, nullptr};           \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86MovssRegs::Base::Emitter = {nullptr,        \
+                                                               nullptr};       \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Addss<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::addss,                     \
-          &InstX86Base<Machine>::Traits::Assembler::addss};                    \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Addps<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::addps,                     \
-          &InstX86Base<Machine>::Traits::Assembler::addps};                    \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Addss::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::addss,                             \
+          &InstImpl<TraitsType>::Assembler::addss};                            \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Divss<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::divss,                     \
-          &InstX86Base<Machine>::Traits::Assembler::divss};                    \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Divps<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::divps,                     \
-          &InstX86Base<Machine>::Traits::Assembler::divps};                    \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Addps::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::addps,                             \
+          &InstImpl<TraitsType>::Assembler::addps};                            \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Mulss<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::mulss,                     \
-          &InstX86Base<Machine>::Traits::Assembler::mulss};                    \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Mulps<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::mulps,                     \
-          &InstX86Base<Machine>::Traits::Assembler::mulps};                    \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Divss::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::divss,                             \
+          &InstImpl<TraitsType>::Assembler::divss};                            \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Padd<Machine>::Base::Emitter = {                                  \
-          &InstX86Base<Machine>::Traits::Assembler::padd,                      \
-          &InstX86Base<Machine>::Traits::Assembler::padd};                     \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Pand<Machine>::Base::Emitter = {                                  \
-          &InstX86Base<Machine>::Traits::Assembler::pand,                      \
-          &InstX86Base<Machine>::Traits::Assembler::pand};                     \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Divps::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::divps,                             \
+          &InstImpl<TraitsType>::Assembler::divps};                            \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Pandn<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::pandn,                     \
-          &InstX86Base<Machine>::Traits::Assembler::pandn};                    \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Pcmpeq<Machine>::Base::Emitter = {                                \
-          &InstX86Base<Machine>::Traits::Assembler::pcmpeq,                    \
-          &InstX86Base<Machine>::Traits::Assembler::pcmpeq};                   \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Mulss::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::mulss,                             \
+          &InstImpl<TraitsType>::Assembler::mulss};                            \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Pcmpgt<Machine>::Base::Emitter = {                                \
-          &InstX86Base<Machine>::Traits::Assembler::pcmpgt,                    \
-          &InstX86Base<Machine>::Traits::Assembler::pcmpgt};                   \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Pmull<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::pmull,                     \
-          &InstX86Base<Machine>::Traits::Assembler::pmull};                    \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Mulps::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::mulps,                             \
+          &InstImpl<TraitsType>::Assembler::mulps};                            \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Pmuludq<Machine>::Base::Emitter = {                               \
-          &InstX86Base<Machine>::Traits::Assembler::pmuludq,                   \
-          &InstX86Base<Machine>::Traits::Assembler::pmuludq};                  \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Por<Machine>::Base::Emitter = {                                   \
-          &InstX86Base<Machine>::Traits::Assembler::por,                       \
-          &InstX86Base<Machine>::Traits::Assembler::por};                      \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Padd::Base::Emitter = {                     \
+          &InstImpl<TraitsType>::Assembler::padd,                              \
+          &InstImpl<TraitsType>::Assembler::padd};                             \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Psub<Machine>::Base::Emitter = {                                  \
-          &InstX86Base<Machine>::Traits::Assembler::psub,                      \
-          &InstX86Base<Machine>::Traits::Assembler::psub};                     \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Pxor<Machine>::Base::Emitter = {                                  \
-          &InstX86Base<Machine>::Traits::Assembler::pxor,                      \
-          &InstX86Base<Machine>::Traits::Assembler::pxor};                     \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Pand::Base::Emitter = {                     \
+          &InstImpl<TraitsType>::Assembler::pand,                              \
+          &InstImpl<TraitsType>::Assembler::pand};                             \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Subss<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::subss,                     \
-          &InstX86Base<Machine>::Traits::Assembler::subss};                    \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Subps<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::subps,                     \
-          &InstX86Base<Machine>::Traits::Assembler::subps};                    \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Pandn::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::pandn,                             \
+          &InstImpl<TraitsType>::Assembler::pandn};                            \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Andnps<Machine>::Base::Emitter = {                                \
-          &InstX86Base<Machine>::Traits::Assembler::andnps,                    \
-          &InstX86Base<Machine>::Traits::Assembler::andnps};                   \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Andps<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::andps,                     \
-          &InstX86Base<Machine>::Traits::Assembler::andps};                    \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Pcmpeq::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::pcmpeq,                            \
+          &InstImpl<TraitsType>::Assembler::pcmpeq};                           \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Maxss<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::maxss,                     \
-          &InstX86Base<Machine>::Traits::Assembler::maxss};                    \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Minss<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::minss,                     \
-          &InstX86Base<Machine>::Traits::Assembler::minss};                    \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Pcmpgt::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::pcmpgt,                            \
+          &InstImpl<TraitsType>::Assembler::pcmpgt};                           \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Orps<Machine>::Base::Emitter = {                                  \
-          &InstX86Base<Machine>::Traits::Assembler::orps,                      \
-          &InstX86Base<Machine>::Traits::Assembler::orps};                     \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp               \
-      InstX86Xorps<Machine>::Base::Emitter = {                                 \
-          &InstX86Base<Machine>::Traits::Assembler::xorps,                     \
-          &InstX86Base<Machine>::Traits::Assembler::xorps};                    \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Pmull::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::pmull,                             \
+          &InstImpl<TraitsType>::Assembler::pmull};                            \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Pmuludq::Base::Emitter = {                  \
+          &InstImpl<TraitsType>::Assembler::pmuludq,                           \
+          &InstImpl<TraitsType>::Assembler::pmuludq};                          \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Por::Base::Emitter = {                      \
+          &InstImpl<TraitsType>::Assembler::por,                               \
+          &InstImpl<TraitsType>::Assembler::por};                              \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Psub::Base::Emitter = {                     \
+          &InstImpl<TraitsType>::Assembler::psub,                              \
+          &InstImpl<TraitsType>::Assembler::psub};                             \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Pxor::Base::Emitter = {                     \
+          &InstImpl<TraitsType>::Assembler::pxor,                              \
+          &InstImpl<TraitsType>::Assembler::pxor};                             \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Subss::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::subss,                             \
+          &InstImpl<TraitsType>::Assembler::subss};                            \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Subps::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::subps,                             \
+          &InstImpl<TraitsType>::Assembler::subps};                            \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Andnps::Base::Emitter = {                   \
+          &InstImpl<TraitsType>::Assembler::andnps,                            \
+          &InstImpl<TraitsType>::Assembler::andnps};                           \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Andps::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::andps,                             \
+          &InstImpl<TraitsType>::Assembler::andps};                            \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Maxss::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::maxss,                             \
+          &InstImpl<TraitsType>::Assembler::maxss};                            \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Minss::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::minss,                             \
+          &InstImpl<TraitsType>::Assembler::minss};                            \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Orps::Base::Emitter = {                     \
+          &InstImpl<TraitsType>::Assembler::orps,                              \
+          &InstImpl<TraitsType>::Assembler::orps};                             \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp                       \
+      InstImpl<TraitsType>::InstX86Xorps::Base::Emitter = {                    \
+          &InstImpl<TraitsType>::Assembler::xorps,                             \
+          &InstImpl<TraitsType>::Assembler::xorps};                            \
                                                                                \
   /* Binary XMM Shift ops */                                                   \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterShiftOp             \
-      InstX86Psll<Machine>::Base::Emitter = {                                  \
-          &InstX86Base<Machine>::Traits::Assembler::psll,                      \
-          &InstX86Base<Machine>::Traits::Assembler::psll,                      \
-          &InstX86Base<Machine>::Traits::Assembler::psll};                     \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterShiftOp             \
-      InstX86Psra<Machine>::Base::Emitter = {                                  \
-          &InstX86Base<Machine>::Traits::Assembler::psra,                      \
-          &InstX86Base<Machine>::Traits::Assembler::psra,                      \
-          &InstX86Base<Machine>::Traits::Assembler::psra};                     \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterShiftOp                     \
+      InstImpl<TraitsType>::InstX86Psll::Base::Emitter = {                     \
+          &InstImpl<TraitsType>::Assembler::psll,                              \
+          &InstImpl<TraitsType>::Assembler::psll,                              \
+          &InstImpl<TraitsType>::Assembler::psll};                             \
   template <>                                                                  \
-  const InstX86Base<Machine>::Traits::Assembler::XmmEmitterShiftOp             \
-      InstX86Psrl<Machine>::Base::Emitter = {                                  \
-          &InstX86Base<Machine>::Traits::Assembler::psrl,                      \
-          &InstX86Base<Machine>::Traits::Assembler::psrl,                      \
-          &InstX86Base<Machine>::Traits::Assembler::psrl};                     \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterShiftOp                     \
+      InstImpl<TraitsType>::InstX86Psra::Base::Emitter = {                     \
+          &InstImpl<TraitsType>::Assembler::psra,                              \
+          &InstImpl<TraitsType>::Assembler::psra,                              \
+          &InstImpl<TraitsType>::Assembler::psra};                             \
+  template <>                                                                  \
+  template <>                                                                  \
+  const InstImpl<TraitsType>::Assembler::XmmEmitterShiftOp                     \
+      InstImpl<TraitsType>::InstX86Psrl::Base::Emitter = {                     \
+          &InstImpl<TraitsType>::Assembler::psrl,                              \
+          &InstImpl<TraitsType>::Assembler::psrl,                              \
+          &InstImpl<TraitsType>::Assembler::psrl};                             \
   }                                                                            \
   }
 
-} // end of namespace X86Internal
+} // end of namespace X86NAMESPACE
 } // end of namespace Ice
 
 #include "IceInstX86BaseImpl.h"
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 2d8f6d1..96ed422 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -27,83 +27,86 @@
 
 namespace Ice {
 
-namespace X86Internal {
+namespace X86NAMESPACE {
 
-template <class Machine>
-const char *InstX86Base<Machine>::getWidthString(Type Ty) {
+template <typename TraitsType>
+const char *InstImpl<TraitsType>::InstX86Base::getWidthString(Type Ty) {
   return Traits::TypeAttributes[Ty].WidthString;
 }
 
-template <class Machine>
-const char *InstX86Base<Machine>::getFldString(Type Ty) {
+template <typename TraitsType>
+const char *InstImpl<TraitsType>::InstX86Base::getFldString(Type Ty) {
   return Traits::TypeAttributes[Ty].FldString;
 }
 
-template <class Machine>
-typename InstX86Base<Machine>::Traits::Cond::BrCond
-InstX86Base<Machine>::getOppositeCondition(typename Traits::Cond::BrCond Cond) {
+template <typename TraitsType>
+typename InstImpl<TraitsType>::Cond::BrCond
+InstImpl<TraitsType>::InstX86Base::getOppositeCondition(BrCond Cond) {
   return Traits::InstBrAttributes[Cond].Opposite;
 }
 
-template <class Machine>
-InstX86FakeRMW<Machine>::InstX86FakeRMW(Cfg *Func, Operand *Data, Operand *Addr,
-                                        InstArithmetic::OpKind Op,
-                                        Variable *Beacon)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::FakeRMW, 3, nullptr),
-      Op(Op) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86FakeRMW::InstX86FakeRMW(Cfg *Func, Operand *Data,
+                                                     Operand *Addr,
+                                                     InstArithmetic::OpKind Op,
+                                                     Variable *Beacon)
+    : InstX86Base(Func, InstX86Base::FakeRMW, 3, nullptr), Op(Op) {
   this->addSource(Data);
   this->addSource(Addr);
   this->addSource(Beacon);
 }
 
-template <class Machine>
-InstX86Mul<Machine>::InstX86Mul(Cfg *Func, Variable *Dest, Variable *Source1,
-                                Operand *Source2)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Mul, 2, Dest) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Mul::InstX86Mul(Cfg *Func, Variable *Dest,
+                                             Variable *Source1,
+                                             Operand *Source2)
+    : InstX86Base(Func, InstX86Base::Mul, 2, Dest) {
   this->addSource(Source1);
   this->addSource(Source2);
 }
 
-template <class Machine>
-InstX86Shld<Machine>::InstX86Shld(Cfg *Func, Variable *Dest, Variable *Source1,
-                                  Operand *Source2)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Shld, 3, Dest) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Shld::InstX86Shld(Cfg *Func, Variable *Dest,
+                                               Variable *Source1,
+                                               Operand *Source2)
+    : InstX86Base(Func, InstX86Base::Shld, 3, Dest) {
   this->addSource(Dest);
   this->addSource(Source1);
   this->addSource(Source2);
 }
 
-template <class Machine>
-InstX86Shrd<Machine>::InstX86Shrd(Cfg *Func, Variable *Dest, Variable *Source1,
-                                  Operand *Source2)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Shrd, 3, Dest) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Shrd::InstX86Shrd(Cfg *Func, Variable *Dest,
+                                               Variable *Source1,
+                                               Operand *Source2)
+    : InstX86Base(Func, InstX86Base::Shrd, 3, Dest) {
   this->addSource(Dest);
   this->addSource(Source1);
   this->addSource(Source2);
 }
 
-template <class Machine>
-InstX86Label<Machine>::InstX86Label(
-    Cfg *Func, typename InstX86Base<Machine>::Traits::TargetLowering *Target)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Label, 0, nullptr),
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Label::InstX86Label(Cfg *Func,
+                                                 TargetLowering *Target)
+    : InstX86Base(Func, InstX86Base::Label, 0, nullptr),
       Number(Target->makeNextLabelNumber()) {}
 
-template <class Machine>
-IceString InstX86Label<Machine>::getName(const Cfg *Func) const {
+template <typename TraitsType>
+IceString InstImpl<TraitsType>::InstX86Label::getName(const Cfg *Func) const {
   return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
 }
 
-template <class Machine>
-InstX86Br<Machine>::InstX86Br(
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Br::InstX86Br(
     Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
-    const InstX86Label<Machine> *Label,
-    typename InstX86Base<Machine>::Traits::Cond::BrCond Condition, Mode Kind)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Br, 0, nullptr),
-      Condition(Condition), TargetTrue(TargetTrue), TargetFalse(TargetFalse),
-      Label(Label), Kind(Kind) {}
+    const InstImpl<TraitsType>::InstX86Label *Label, BrCond Condition,
+    Mode Kind)
+    : InstX86Base(Func, InstX86Base::Br, 0, nullptr), Condition(Condition),
+      TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label),
+      Kind(Kind) {}
 
-template <class Machine>
-bool InstX86Br<Machine>::optimizeBranch(const CfgNode *NextNode) {
+template <typename TraitsType>
+bool InstImpl<TraitsType>::InstX86Br::optimizeBranch(const CfgNode *NextNode) {
   // If there is no next block, then there can be no fallthrough to optimize.
   if (NextNode == nullptr)
     return false;
@@ -116,8 +119,7 @@
     return false;
 
   // Unconditional branch to the next node can be removed.
-  if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None &&
-      getTargetFalse() == NextNode) {
+  if (Condition == Cond::Br_None && getTargetFalse() == NextNode) {
     assert(getTargetTrue() == nullptr);
     this->setDeleted();
     return true;
@@ -132,7 +134,7 @@
   // already tested above), then invert the branch condition, swap the targets,
   // and set new fallthrough to nullptr.
   if (getTargetTrue() == NextNode) {
-    assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
+    assert(Condition != Cond::Br_None);
     Condition = this->getOppositeCondition(Condition);
     TargetTrue = getTargetFalse();
     TargetFalse = nullptr;
@@ -141,8 +143,9 @@
   return false;
 }
 
-template <class Machine>
-bool InstX86Br<Machine>::repointEdges(CfgNode *OldNode, CfgNode *NewNode) {
+template <typename TraitsType>
+bool InstImpl<TraitsType>::InstX86Br::repointEdges(CfgNode *OldNode,
+                                                   CfgNode *NewNode) {
   bool Found = false;
   if (TargetFalse == OldNode) {
     TargetFalse = NewNode;
@@ -155,72 +158,67 @@
   return Found;
 }
 
-template <class Machine>
-InstX86Jmp<Machine>::InstX86Jmp(Cfg *Func, Operand *Target)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Jmp, 1, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Jmp::InstX86Jmp(Cfg *Func, Operand *Target)
+    : InstX86Base(Func, InstX86Base::Jmp, 1, nullptr) {
   this->addSource(Target);
 }
 
-template <class Machine>
-InstX86Call<Machine>::InstX86Call(Cfg *Func, Variable *Dest,
-                                  Operand *CallTarget)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Call, 1, Dest) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Call::InstX86Call(Cfg *Func, Variable *Dest,
+                                               Operand *CallTarget)
+    : InstX86Base(Func, InstX86Base::Call, 1, Dest) {
   this->HasSideEffects = true;
   this->addSource(CallTarget);
 }
 
-template <class Machine>
-InstX86Cmov<Machine>::InstX86Cmov(
-    Cfg *Func, Variable *Dest, Operand *Source,
-    typename InstX86Base<Machine>::Traits::Cond::BrCond Condition)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Cmov, 2, Dest),
-      Condition(Condition) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Cmov::InstX86Cmov(Cfg *Func, Variable *Dest,
+                                               Operand *Source,
+                                               BrCond Condition)
+    : InstX86Base(Func, InstX86Base::Cmov, 2, Dest), Condition(Condition) {
   // The final result is either the original Dest, or Source, so mark both as
   // sources.
   this->addSource(Dest);
   this->addSource(Source);
 }
 
-template <class Machine>
-InstX86Cmpps<Machine>::InstX86Cmpps(
-    Cfg *Func, Variable *Dest, Operand *Source,
-    typename InstX86Base<Machine>::Traits::Cond::CmppsCond Condition)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Cmpps, 2, Dest),
-      Condition(Condition) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Cmpps::InstX86Cmpps(Cfg *Func, Variable *Dest,
+                                                 Operand *Source,
+                                                 CmppsCond Condition)
+    : InstX86Base(Func, InstX86Base::Cmpps, 2, Dest), Condition(Condition) {
   this->addSource(Dest);
   this->addSource(Source);
 }
 
-template <class Machine>
-InstX86Cmpxchg<Machine>::InstX86Cmpxchg(Cfg *Func, Operand *DestOrAddr,
-                                        Variable *Eax, Variable *Desired,
-                                        bool Locked)
-    : InstX86BaseLockable<Machine>(Func, InstX86Base<Machine>::Cmpxchg, 3,
-                                   llvm::dyn_cast<Variable>(DestOrAddr),
-                                   Locked) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Cmpxchg::InstX86Cmpxchg(Cfg *Func,
+                                                     Operand *DestOrAddr,
+                                                     Variable *Eax,
+                                                     Variable *Desired,
+                                                     bool Locked)
+    : InstImpl<TraitsType>::InstX86BaseLockable(
+          Func, InstX86Base::Cmpxchg, 3, llvm::dyn_cast<Variable>(DestOrAddr),
+          Locked) {
   constexpr uint16_t Encoded_rAX = 0;
   (void)Encoded_rAX;
-  assert(InstX86Base<Machine>::Traits::getEncodedGPR(Eax->getRegNum()) ==
-         Encoded_rAX);
+  assert(Traits::getEncodedGPR(Eax->getRegNum()) == Encoded_rAX);
   this->addSource(DestOrAddr);
   this->addSource(Eax);
   this->addSource(Desired);
 }
 
-template <class Machine>
-InstX86Cmpxchg8b<Machine>::InstX86Cmpxchg8b(
-    Cfg *Func, typename InstX86Base<Machine>::Traits::X86OperandMem *Addr,
-    Variable *Edx, Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked)
-    : InstX86BaseLockable<Machine>(Func, InstX86Base<Machine>::Cmpxchg, 5,
-                                   nullptr, Locked) {
-  assert(Edx->getRegNum() ==
-         InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
-  assert(Eax->getRegNum() ==
-         InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
-  assert(Ecx->getRegNum() ==
-         InstX86Base<Machine>::Traits::RegisterSet::Reg_ecx);
-  assert(Ebx->getRegNum() ==
-         InstX86Base<Machine>::Traits::RegisterSet::Reg_ebx);
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Cmpxchg8b::InstX86Cmpxchg8b(
+    Cfg *Func, X86OperandMem *Addr, Variable *Edx, Variable *Eax, Variable *Ecx,
+    Variable *Ebx, bool Locked)
+    : InstImpl<TraitsType>::InstX86BaseLockable(Func, InstX86Base::Cmpxchg, 5,
+                                                nullptr, Locked) {
+  assert(Edx->getRegNum() == RegisterSet::Reg_edx);
+  assert(Eax->getRegNum() == RegisterSet::Reg_eax);
+  assert(Ecx->getRegNum() == RegisterSet::Reg_ecx);
+  assert(Ebx->getRegNum() == RegisterSet::Reg_ebx);
   this->addSource(Addr);
   this->addSource(Edx);
   this->addSource(Eax);
@@ -228,90 +226,90 @@
   this->addSource(Ebx);
 }
 
-template <class Machine>
-InstX86Cvt<Machine>::InstX86Cvt(Cfg *Func, Variable *Dest, Operand *Source,
-                                CvtVariant Variant)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Cvt, 1, Dest),
-      Variant(Variant) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Cvt::InstX86Cvt(Cfg *Func, Variable *Dest,
+                                             Operand *Source,
+                                             CvtVariant Variant)
+    : InstX86Base(Func, InstX86Base::Cvt, 1, Dest), Variant(Variant) {
   this->addSource(Source);
 }
 
-template <class Machine>
-InstX86Icmp<Machine>::InstX86Icmp(Cfg *Func, Operand *Src0, Operand *Src1)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Icmp, 2, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Icmp::InstX86Icmp(Cfg *Func, Operand *Src0,
+                                               Operand *Src1)
+    : InstX86Base(Func, InstX86Base::Icmp, 2, nullptr) {
   this->addSource(Src0);
   this->addSource(Src1);
 }
 
-template <class Machine>
-InstX86Ucomiss<Machine>::InstX86Ucomiss(Cfg *Func, Operand *Src0, Operand *Src1)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Ucomiss, 2, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Ucomiss::InstX86Ucomiss(Cfg *Func, Operand *Src0,
+                                                     Operand *Src1)
+    : InstX86Base(Func, InstX86Base::Ucomiss, 2, nullptr) {
   this->addSource(Src0);
   this->addSource(Src1);
 }
 
-template <class Machine>
-InstX86UD2<Machine>::InstX86UD2(Cfg *Func)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::UD2, 0, nullptr) {}
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86UD2::InstX86UD2(Cfg *Func)
+    : InstX86Base(Func, InstX86Base::UD2, 0, nullptr) {}
 
-template <class Machine>
-InstX86Test<Machine>::InstX86Test(Cfg *Func, Operand *Src1, Operand *Src2)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Test, 2, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Test::InstX86Test(Cfg *Func, Operand *Src1,
+                                               Operand *Src2)
+    : InstX86Base(Func, InstX86Base::Test, 2, nullptr) {
   this->addSource(Src1);
   this->addSource(Src2);
 }
 
-template <class Machine>
-InstX86Mfence<Machine>::InstX86Mfence(Cfg *Func)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Mfence, 0, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Mfence::InstX86Mfence(Cfg *Func)
+    : InstX86Base(Func, InstX86Base::Mfence, 0, nullptr) {
   this->HasSideEffects = true;
 }
 
-template <class Machine>
-InstX86Store<Machine>::InstX86Store(
-    Cfg *Func, Operand *Value,
-    typename InstX86Base<Machine>::Traits::X86Operand *Mem)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Store, 2, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Store::InstX86Store(Cfg *Func, Operand *Value,
+                                                 X86Operand *Mem)
+    : InstX86Base(Func, InstX86Base::Store, 2, nullptr) {
   this->addSource(Value);
   this->addSource(Mem);
 }
 
-template <class Machine>
-InstX86StoreP<Machine>::InstX86StoreP(
-    Cfg *Func, Variable *Value,
-    typename InstX86Base<Machine>::Traits::X86OperandMem *Mem)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::StoreP, 2, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86StoreP::InstX86StoreP(Cfg *Func, Variable *Value,
+                                                   X86OperandMem *Mem)
+    : InstX86Base(Func, InstX86Base::StoreP, 2, nullptr) {
   this->addSource(Value);
   this->addSource(Mem);
 }
 
-template <class Machine>
-InstX86StoreQ<Machine>::InstX86StoreQ(
-    Cfg *Func, Variable *Value,
-    typename InstX86Base<Machine>::Traits::X86OperandMem *Mem)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::StoreQ, 2, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86StoreQ::InstX86StoreQ(Cfg *Func, Variable *Value,
+                                                   X86OperandMem *Mem)
+    : InstX86Base(Func, InstX86Base::StoreQ, 2, nullptr) {
   this->addSource(Value);
   this->addSource(Mem);
 }
 
-template <class Machine>
-InstX86Nop<Machine>::InstX86Nop(Cfg *Func, InstX86Nop::NopVariant Variant)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Nop, 0, nullptr),
-      Variant(Variant) {}
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Nop::InstX86Nop(Cfg *Func,
+                                             InstX86Nop::NopVariant Variant)
+    : InstX86Base(Func, InstX86Base::Nop, 0, nullptr), Variant(Variant) {}
 
-template <class Machine>
-InstX86Fld<Machine>::InstX86Fld(Cfg *Func, Operand *Src)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Fld, 1, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Fld::InstX86Fld(Cfg *Func, Operand *Src)
+    : InstX86Base(Func, InstX86Base::Fld, 1, nullptr) {
   this->addSource(Src);
 }
 
-template <class Machine>
-InstX86Fstp<Machine>::InstX86Fstp(Cfg *Func, Variable *Dest)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Fstp, 0, Dest) {}
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Fstp::InstX86Fstp(Cfg *Func, Variable *Dest)
+    : InstX86Base(Func, InstX86Base::Fstp, 0, Dest) {}
 
-template <class Machine>
-InstX86Pop<Machine>::InstX86Pop(Cfg *Func, Variable *Dest)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Pop, 0, Dest) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Pop::InstX86Pop(Cfg *Func, Variable *Dest)
+    : InstX86Base(Func, InstX86Base::Pop, 0, Dest) {
   // A pop instruction affects the stack pointer and so it should not be
   // allowed to be automatically dead-code eliminated. (The corresponding push
   // instruction doesn't need this treatment because it has no dest variable
@@ -320,60 +318,57 @@
   this->HasSideEffects = true;
 }
 
-template <class Machine>
-InstX86Push<Machine>::InstX86Push(Cfg *Func, Variable *Source)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Push, 1, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Push::InstX86Push(Cfg *Func, Variable *Source)
+    : InstX86Base(Func, InstX86Base::Push, 1, nullptr) {
   this->addSource(Source);
 }
 
-template <class Machine>
-InstX86Ret<Machine>::InstX86Ret(Cfg *Func, Variable *Source)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Ret, Source ? 1 : 0,
-                           nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Ret::InstX86Ret(Cfg *Func, Variable *Source)
+    : InstX86Base(Func, InstX86Base::Ret, Source ? 1 : 0, nullptr) {
   if (Source)
     this->addSource(Source);
 }
 
-template <class Machine>
-InstX86Setcc<Machine>::InstX86Setcc(
-    Cfg *Func, Variable *Dest,
-    typename InstX86Base<Machine>::Traits::Cond::BrCond Cond)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Setcc, 0, Dest),
-      Condition(Cond) {}
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Setcc::InstX86Setcc(Cfg *Func, Variable *Dest,
+                                                 BrCond Cond)
+    : InstX86Base(Func, InstX86Base::Setcc, 0, Dest), Condition(Cond) {}
 
-template <class Machine>
-InstX86Xadd<Machine>::InstX86Xadd(Cfg *Func, Operand *Dest, Variable *Source,
-                                  bool Locked)
-    : InstX86BaseLockable<Machine>(Func, InstX86Base<Machine>::Xadd, 2,
-                                   llvm::dyn_cast<Variable>(Dest), Locked) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Xadd::InstX86Xadd(Cfg *Func, Operand *Dest,
+                                               Variable *Source, bool Locked)
+    : InstImpl<TraitsType>::InstX86BaseLockable(
+          Func, InstX86Base::Xadd, 2, llvm::dyn_cast<Variable>(Dest), Locked) {
   this->addSource(Dest);
   this->addSource(Source);
 }
 
-template <class Machine>
-InstX86Xchg<Machine>::InstX86Xchg(Cfg *Func, Operand *Dest, Variable *Source)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::Xchg, 2,
-                           llvm::dyn_cast<Variable>(Dest)) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86Xchg::InstX86Xchg(Cfg *Func, Operand *Dest,
+                                               Variable *Source)
+    : InstX86Base(Func, InstX86Base::Xchg, 2, llvm::dyn_cast<Variable>(Dest)) {
   this->addSource(Dest);
   this->addSource(Source);
 }
 
-template <class Machine>
-InstX86IacaStart<Machine>::InstX86IacaStart(Cfg *Func)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::IacaStart, 0, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86IacaStart::InstX86IacaStart(Cfg *Func)
+    : InstX86Base(Func, InstX86Base::IacaStart, 0, nullptr) {
   assert(Func->getContext()->getFlags().getAllowIacaMarks());
 }
 
-template <class Machine>
-InstX86IacaEnd<Machine>::InstX86IacaEnd(Cfg *Func)
-    : InstX86Base<Machine>(Func, InstX86Base<Machine>::IacaEnd, 0, nullptr) {
+template <typename TraitsType>
+InstImpl<TraitsType>::InstX86IacaEnd::InstX86IacaEnd(Cfg *Func)
+    : InstX86Base(Func, InstX86Base::IacaEnd, 0, nullptr) {
   assert(Func->getContext()->getFlags().getAllowIacaMarks());
 }
 
 // ======================== Dump routines ======================== //
 
-template <class Machine>
-void InstX86Base<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Base::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -381,8 +376,8 @@
   Inst::dump(Func);
 }
 
-template <class Machine>
-void InstX86FakeRMW<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86FakeRMW::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -395,45 +390,45 @@
   getBeacon()->dump(Func);
 }
 
-template <class Machine>
-void InstX86Label<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Label::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   Str << getName(Func) << ":";
 }
 
-template <class Machine>
-void InstX86Label<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Label::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   Asm->bindLocalLabel(Number);
 }
 
-template <class Machine>
-void InstX86Label<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Label::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
   Str << getName(Func) << ":";
 }
 
-template <class Machine> void InstX86Br<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Br::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   Str << "\t";
 
-  if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
+  if (Condition == Cond::Br_None) {
     Str << "jmp";
   } else {
-    Str << InstX86Base<Machine>::Traits::InstBrAttributes[Condition].EmitString;
+    Str << Traits::InstBrAttributes[Condition].EmitString;
   }
 
   if (Label) {
     Str << "\t" << Label->getName(Func);
   } else {
-    if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
+    if (Condition == Cond::Br_None) {
       Str << "\t" << getTargetFalse()->getAsmName();
     } else {
       Str << "\t" << getTargetTrue()->getAsmName();
@@ -445,50 +440,46 @@
   }
 }
 
-template <class Machine>
-void InstX86Br<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Br::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   if (Label) {
-    class Label *L = Asm->getOrCreateLocalLabel(Label->getNumber());
-    if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
+    auto *L = Asm->getOrCreateLocalLabel(Label->getNumber());
+    if (Condition == Cond::Br_None) {
       Asm->jmp(L, isNear());
     } else {
       Asm->j(Condition, L, isNear());
     }
   } else {
-    if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
-      class Label *L =
-          Asm->getOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
+    if (Condition == Cond::Br_None) {
+      auto *L = Asm->getOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
       assert(!getTargetTrue());
       Asm->jmp(L, isNear());
     } else {
-      class Label *L =
-          Asm->getOrCreateCfgNodeLabel(getTargetTrue()->getIndex());
+      auto *L = Asm->getOrCreateCfgNodeLabel(getTargetTrue()->getIndex());
       Asm->j(Condition, L, isNear());
       if (getTargetFalse()) {
-        class Label *L2 =
-            Asm->getOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
+        auto *L2 = Asm->getOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
         Asm->jmp(L2, isNear());
       }
     }
   }
 }
 
-template <class Machine> void InstX86Br<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Br::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "br ";
 
-  if (Condition == InstX86Base<Machine>::Traits::Cond::Br_None) {
+  if (Condition == Cond::Br_None) {
     Str << "label %"
         << (Label ? Label->getName(Func) : getTargetFalse()->getName());
     return;
   }
 
-  Str << InstX86Base<Machine>::Traits::InstBrAttributes[Condition]
-             .DisplayString;
+  Str << Traits::InstBrAttributes[Condition].DisplayString;
   if (Label) {
     Str << ", label %" << Label->getName(Func);
   } else {
@@ -501,7 +492,8 @@
   Str << " // (" << (isNear() ? "near" : "far") << " jump)";
 }
 
-template <class Machine> void InstX86Jmp<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Jmp::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -511,15 +503,15 @@
   getJmpTarget()->emit(Func);
 }
 
-template <class Machine>
-void InstX86Jmp<Machine>::emitIAS(const Cfg *Func) const {
-  // Note: Adapted (mostly copied) from InstX86Call<Machine>::emitIAS().
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Jmp::emitIAS(const Cfg *Func) const {
+  // Note: Adapted (mostly copied) from
+  // InstImpl<TraitsType>::InstX86Call::emitIAS().
+  Assembler *Asm = Func->getAssembler<Assembler>();
   Operand *Target = getJmpTarget();
   if (const auto *Var = llvm::dyn_cast<Variable>(Target)) {
     if (Var->hasReg()) {
-      Asm->jmp(InstX86Base<Machine>::Traits::getEncodedGPR(Var->getRegNum()));
+      Asm->jmp(Traits::getEncodedGPR(Var->getRegNum()));
     } else {
       // The jmp instruction with a memory operand should be possible to
       // encode, but it isn't a valid sandboxed instruction, and there
@@ -527,12 +519,9 @@
       // register, so we don't really need to bother implementing it.
       llvm::report_fatal_error("Assembler can't jmp to memory operand");
     }
-  } else if (const auto *Mem = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::X86OperandMem>(
-                 Target)) {
+  } else if (const auto *Mem = llvm::dyn_cast<X86OperandMem>(Target)) {
     (void)Mem;
-    assert(Mem->getSegmentRegister() ==
-           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+    assert(Mem->getSegmentRegister() == X86OperandMem::DefaultSegment);
     llvm::report_fatal_error("Assembler can't jmp to memory operand");
   } else if (const auto *CR = llvm::dyn_cast<ConstantRelocatable>(Target)) {
     assert(CR->getOffset() == 0 && "We only support jumping to a function");
@@ -550,7 +539,8 @@
   }
 }
 
-template <class Machine> void InstX86Jmp<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Jmp::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -558,8 +548,8 @@
   getJmpTarget()->dump(Func);
 }
 
-template <class Machine>
-void InstX86Call<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Call::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -567,7 +557,7 @@
   Str << "\t"
          "call\t";
   Operand *CallTarget = getCallTarget();
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+  auto *Target = InstX86Base::getTarget(Func);
   if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(CallTarget)) {
     // Emit without a leading '$'.
     Str << CI->getValue();
@@ -580,36 +570,32 @@
   }
 }
 
-template <class Machine>
-void InstX86Call<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Call::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   Operand *CallTarget = getCallTarget();
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+  auto *Target = InstX86Base::getTarget(Func);
   if (const auto *Var = llvm::dyn_cast<Variable>(CallTarget)) {
     if (Var->hasReg()) {
-      Asm->call(InstX86Base<Machine>::Traits::getEncodedGPR(Var->getRegNum()));
+      Asm->call(Traits::getEncodedGPR(Var->getRegNum()));
     } else {
       Asm->call(Target->stackVarToAsmOperand(Var));
     }
-  } else if (const auto *Mem = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::X86OperandMem>(
-                 CallTarget)) {
-    assert(Mem->getSegmentRegister() ==
-           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  } else if (const auto *Mem = llvm::dyn_cast<X86OperandMem>(CallTarget)) {
+    assert(Mem->getSegmentRegister() == X86OperandMem::DefaultSegment);
     Asm->call(Mem->toAsmAddress(Asm, Target));
   } else if (const auto *CR = llvm::dyn_cast<ConstantRelocatable>(CallTarget)) {
     assert(CR->getOffset() == 0 && "We only support calling a function");
     Asm->call(CR);
   } else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(CallTarget)) {
-    Asm->call(Immediate(Imm->getValue()));
+    Asm->call(AssemblerImmediate(Imm->getValue()));
   } else {
     llvm_unreachable("Unexpected operand type");
   }
 }
 
-template <class Machine>
-void InstX86Call<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Call::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -623,9 +609,9 @@
 
 // The this->Opcode parameter needs to be char* and not IceString because of
 // template issues.
-template <class Machine>
-void InstX86Base<Machine>::emitTwoAddress(const Cfg *Func, const char *Opcode,
-                                          const char *Suffix) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Base::emitTwoAddress(
+    const Cfg *Func, const char *Opcode, const char *Suffix) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -636,32 +622,28 @@
   assert(Dest == getSrc(0));
   Operand *Src1 = getSrc(1);
   Str << "\t" << Opcode << Suffix
-      << InstX86Base<Machine>::getWidthString(Dest->getType()) << "\t";
+      << InstX86Base::getWidthString(Dest->getType()) << "\t";
   Src1->emit(Func);
   Str << ", ";
   Dest->emit(Func);
 }
 
-template <class Machine>
-void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Op,
-                    const typename InstX86Base<
-                        Machine>::Traits::Assembler::GPREmitterOneOp &Emitter) {
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::emitIASOpTyGPR(const Cfg *Func, Type Ty,
+                                          const Operand *Op,
+                                          const GPREmitterOneOp &Emitter) {
+  auto *Target = InstX86Base::getTarget(Func);
+  Assembler *Asm = Func->getAssembler<Assembler>();
   if (const auto *Var = llvm::dyn_cast<Variable>(Op)) {
     if (Var->hasReg()) {
       // We cheat a little and use GPRRegister even for byte operations.
-      typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister VarReg =
-          InstX86Base<Machine>::Traits::getEncodedGPR(Var->getRegNum());
+      GPRRegister VarReg = Traits::getEncodedGPR(Var->getRegNum());
       (Asm->*(Emitter.Reg))(Ty, VarReg);
     } else {
-      typename InstX86Base<Machine>::Traits::Address StackAddr(
-          Target->stackVarToAsmOperand(Var));
+      Address StackAddr(Target->stackVarToAsmOperand(Var));
       (Asm->*(Emitter.Addr))(Ty, StackAddr);
     }
-  } else if (const auto *Mem = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Op)) {
+  } else if (const auto *Mem = llvm::dyn_cast<X86OperandMem>(Op)) {
     Mem->emitSegmentOverride(Asm);
     (Asm->*(Emitter.Addr))(Ty, Mem->toAsmAddress(Asm, Target));
   } else {
@@ -669,236 +651,196 @@
   }
 }
 
-template <class Machine, bool VarCanBeByte, bool SrcCanBeByte>
-void emitIASRegOpTyGPR(
-    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
-    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
-        &Emitter) {
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+template <bool VarCanBeByte, bool SrcCanBeByte>
+void InstImpl<TraitsType>::emitIASRegOpTyGPR(const Cfg *Func, Type Ty,
+                                             const Variable *Var,
+                                             const Operand *Src,
+                                             const GPREmitterRegOp &Emitter) {
+  auto *Target = InstX86Base::getTarget(Func);
+  Assembler *Asm = Func->getAssembler<Assembler>();
   assert(Var->hasReg());
   // We cheat a little and use GPRRegister even for byte operations.
-  typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister VarReg =
-      VarCanBeByte
-          ? InstX86Base<Machine>::Traits::getEncodedGPR(Var->getRegNum())
-          : InstX86Base<Machine>::Traits::getEncodedGPR(Var->getRegNum());
+  GPRRegister VarReg = VarCanBeByte ? Traits::getEncodedGPR(Var->getRegNum())
+                                    : Traits::getEncodedGPR(Var->getRegNum());
   if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
     if (SrcVar->hasReg()) {
-      typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister SrcReg =
-          SrcCanBeByte
-              ? InstX86Base<Machine>::Traits::getEncodedGPR(SrcVar->getRegNum())
-              : InstX86Base<Machine>::Traits::getEncodedGPR(
-                    SrcVar->getRegNum());
+      GPRRegister SrcReg = SrcCanBeByte
+                               ? Traits::getEncodedGPR(SrcVar->getRegNum())
+                               : Traits::getEncodedGPR(SrcVar->getRegNum());
       (Asm->*(Emitter.GPRGPR))(Ty, VarReg, SrcReg);
     } else {
-      typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
-          Target->stackVarToAsmOperand(SrcVar);
+      Address SrcStackAddr = Target->stackVarToAsmOperand(SrcVar);
       (Asm->*(Emitter.GPRAddr))(Ty, VarReg, SrcStackAddr);
     }
-  } else if (const auto *Mem = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
+  } else if (const auto *Mem = llvm::dyn_cast<X86OperandMem>(Src)) {
     Mem->emitSegmentOverride(Asm);
     (Asm->*(Emitter.GPRAddr))(Ty, VarReg, Mem->toAsmAddress(Asm, Target));
   } else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
-    (Asm->*(Emitter.GPRImm))(Ty, VarReg, Immediate(Imm->getValue()));
+    (Asm->*(Emitter.GPRImm))(Ty, VarReg, AssemblerImmediate(Imm->getValue()));
   } else if (const auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
-    AssemblerFixup *Fixup =
-        Asm->createFixup(InstX86Base<Machine>::Traits::RelFixup, Reloc);
-    (Asm->*(Emitter.GPRImm))(Ty, VarReg, Immediate(Reloc->getOffset(), Fixup));
-  } else if (const auto *Split = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::VariableSplit>(Src)) {
+    AssemblerFixup *Fixup = Asm->createFixup(Traits::RelFixup, Reloc);
+    (Asm->*(Emitter.GPRImm))(Ty, VarReg,
+                             AssemblerImmediate(Reloc->getOffset(), Fixup));
+  } else if (const auto *Split = llvm::dyn_cast<VariableSplit>(Src)) {
     (Asm->*(Emitter.GPRAddr))(Ty, VarReg, Split->toAsmAddress(Func));
   } else {
     llvm_unreachable("Unexpected operand type");
   }
 }
 
-template <class Machine>
-void emitIASAddrOpTyGPR(
-    const Cfg *Func, Type Ty,
-    const typename InstX86Base<Machine>::Traits::Address &Addr,
-    const Operand *Src,
-    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp
-        &Emitter) {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
-  // Src can only be Reg or Immediate.
+template <typename TraitsType>
+void InstImpl<TraitsType>::emitIASAddrOpTyGPR(const Cfg *Func, Type Ty,
+                                              const Address &Addr,
+                                              const Operand *Src,
+                                              const GPREmitterAddrOp &Emitter) {
+  Assembler *Asm = Func->getAssembler<Assembler>();
+  // Src can only be Reg or AssemblerImmediate.
   if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
     assert(SrcVar->hasReg());
-    typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister SrcReg =
-        InstX86Base<Machine>::Traits::getEncodedGPR(SrcVar->getRegNum());
+    GPRRegister SrcReg = Traits::getEncodedGPR(SrcVar->getRegNum());
     (Asm->*(Emitter.AddrGPR))(Ty, Addr, SrcReg);
   } else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
-    (Asm->*(Emitter.AddrImm))(Ty, Addr, Immediate(Imm->getValue()));
+    (Asm->*(Emitter.AddrImm))(Ty, Addr, AssemblerImmediate(Imm->getValue()));
   } else if (const auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
-    AssemblerFixup *Fixup =
-        Asm->createFixup(InstX86Base<Machine>::Traits::RelFixup, Reloc);
-    (Asm->*(Emitter.AddrImm))(Ty, Addr, Immediate(Reloc->getOffset(), Fixup));
+    AssemblerFixup *Fixup = Asm->createFixup(Traits::RelFixup, Reloc);
+    (Asm->*(Emitter.AddrImm))(Ty, Addr,
+                              AssemblerImmediate(Reloc->getOffset(), Fixup));
   } else {
     llvm_unreachable("Unexpected operand type");
   }
 }
 
-template <class Machine>
-void emitIASAsAddrOpTyGPR(
+template <typename TraitsType>
+void InstImpl<TraitsType>::emitIASAsAddrOpTyGPR(
     const Cfg *Func, Type Ty, const Operand *Op0, const Operand *Op1,
-    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp
-        &Emitter) {
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+    const GPREmitterAddrOp &Emitter) {
+  auto *Target = InstX86Base::getTarget(Func);
   if (const auto *Op0Var = llvm::dyn_cast<Variable>(Op0)) {
     assert(!Op0Var->hasReg());
-    typename InstX86Base<Machine>::Traits::Address StackAddr(
-        Target->stackVarToAsmOperand(Op0Var));
-    emitIASAddrOpTyGPR<Machine>(Func, Ty, StackAddr, Op1, Emitter);
-  } else if (const auto *Op0Mem = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Op0)) {
-    typename InstX86Base<Machine>::Traits::Assembler *Asm =
-        Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+    Address StackAddr(Target->stackVarToAsmOperand(Op0Var));
+    emitIASAddrOpTyGPR(Func, Ty, StackAddr, Op1, Emitter);
+  } else if (const auto *Op0Mem = llvm::dyn_cast<X86OperandMem>(Op0)) {
+    Assembler *Asm = Func->getAssembler<Assembler>();
     Op0Mem->emitSegmentOverride(Asm);
-    emitIASAddrOpTyGPR<Machine>(Func, Ty, Op0Mem->toAsmAddress(Asm, Target),
-                                Op1, Emitter);
-  } else if (const auto *Split = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::VariableSplit>(Op0)) {
-    emitIASAddrOpTyGPR<Machine>(Func, Ty, Split->toAsmAddress(Func), Op1,
-                                Emitter);
+    emitIASAddrOpTyGPR(Func, Ty, Op0Mem->toAsmAddress(Asm, Target), Op1,
+                       Emitter);
+  } else if (const auto *Split = llvm::dyn_cast<VariableSplit>(Op0)) {
+    emitIASAddrOpTyGPR(Func, Ty, Split->toAsmAddress(Func), Op1, Emitter);
   } else {
     llvm_unreachable("Unexpected operand type");
   }
 }
 
-template <class Machine>
-void InstX86Base<Machine>::emitIASGPRShift(
-    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
-    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftOp
-        &Emitter) {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::emitIASGPRShift(const Cfg *Func, Type Ty,
+                                           const Variable *Var,
+                                           const Operand *Src,
+                                           const GPREmitterShiftOp &Emitter) {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   // Technically, the Dest Var can be mem as well, but we only use Reg. We can
   // extend this to check Dest if we decide to use that form.
   assert(Var->hasReg());
   // We cheat a little and use GPRRegister even for byte operations.
-  typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister VarReg =
-      InstX86Base<Machine>::Traits::getEncodedGPR(Var->getRegNum());
+  GPRRegister VarReg = Traits::getEncodedGPR(Var->getRegNum());
   // Src must be reg == ECX or an Imm8. This is asserted by the assembler.
   if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
     assert(SrcVar->hasReg());
-    typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister SrcReg =
-        InstX86Base<Machine>::Traits::getEncodedGPR(SrcVar->getRegNum());
+    GPRRegister SrcReg = Traits::getEncodedGPR(SrcVar->getRegNum());
     (Asm->*(Emitter.GPRGPR))(Ty, VarReg, SrcReg);
   } else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
-    (Asm->*(Emitter.GPRImm))(Ty, VarReg, Immediate(Imm->getValue()));
+    (Asm->*(Emitter.GPRImm))(Ty, VarReg, AssemblerImmediate(Imm->getValue()));
   } else {
     llvm_unreachable("Unexpected operand type");
   }
 }
 
-template <class Machine>
-void emitIASGPRShiftDouble(
+template <typename TraitsType>
+void InstImpl<TraitsType>::emitIASGPRShiftDouble(
     const Cfg *Func, const Variable *Dest, const Operand *Src1Op,
-    const Operand *Src2Op,
-    const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterShiftD
-        &Emitter) {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+    const Operand *Src2Op, const GPREmitterShiftD &Emitter) {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   // Dest can be reg or mem, but we only use the reg variant.
   assert(Dest->hasReg());
-  typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister DestReg =
-      InstX86Base<Machine>::Traits::getEncodedGPR(Dest->getRegNum());
+  GPRRegister DestReg = Traits::getEncodedGPR(Dest->getRegNum());
   // SrcVar1 must be reg.
   const auto *SrcVar1 = llvm::cast<Variable>(Src1Op);
   assert(SrcVar1->hasReg());
-  typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister SrcReg =
-      InstX86Base<Machine>::Traits::getEncodedGPR(SrcVar1->getRegNum());
+  GPRRegister SrcReg = Traits::getEncodedGPR(SrcVar1->getRegNum());
   Type Ty = SrcVar1->getType();
   // Src2 can be the implicit CL register or an immediate.
   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2Op)) {
     (Asm->*(Emitter.GPRGPRImm))(Ty, DestReg, SrcReg,
-                                Immediate(Imm->getValue()));
+                                AssemblerImmediate(Imm->getValue()));
   } else {
-    assert(llvm::cast<Variable>(Src2Op)->getRegNum() ==
-           InstX86Base<Machine>::Traits::RegisterSet::Reg_cl);
+    assert(llvm::cast<Variable>(Src2Op)->getRegNum() == RegisterSet::Reg_cl);
     (Asm->*(Emitter.GPRGPR))(Ty, DestReg, SrcReg);
   }
 }
 
-template <class Machine>
-void emitIASXmmShift(
-    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
-    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterShiftOp
-        &Emitter) {
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::emitIASXmmShift(const Cfg *Func, Type Ty,
+                                           const Variable *Var,
+                                           const Operand *Src,
+                                           const XmmEmitterShiftOp &Emitter) {
+  auto *Target = InstX86Base::getTarget(Func);
+  Assembler *Asm = Func->getAssembler<Assembler>();
   assert(Var->hasReg());
-  typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister VarReg =
-      InstX86Base<Machine>::Traits::getEncodedXmm(Var->getRegNum());
+  XmmRegister VarReg = Traits::getEncodedXmm(Var->getRegNum());
   if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
     if (SrcVar->hasReg()) {
-      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister SrcReg =
-          InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum());
+      XmmRegister SrcReg = Traits::getEncodedXmm(SrcVar->getRegNum());
       (Asm->*(Emitter.XmmXmm))(Ty, VarReg, SrcReg);
     } else {
-      typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
-          Target->stackVarToAsmOperand(SrcVar);
+      Address SrcStackAddr = Target->stackVarToAsmOperand(SrcVar);
       (Asm->*(Emitter.XmmAddr))(Ty, VarReg, SrcStackAddr);
     }
-  } else if (const auto *Mem = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
-    assert(Mem->getSegmentRegister() ==
-           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  } else if (const auto *Mem = llvm::dyn_cast<X86OperandMem>(Src)) {
+    assert(Mem->getSegmentRegister() == X86OperandMem::DefaultSegment);
     (Asm->*(Emitter.XmmAddr))(Ty, VarReg, Mem->toAsmAddress(Asm, Target));
   } else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
-    (Asm->*(Emitter.XmmImm))(Ty, VarReg, Immediate(Imm->getValue()));
+    (Asm->*(Emitter.XmmImm))(Ty, VarReg, AssemblerImmediate(Imm->getValue()));
   } else {
     llvm_unreachable("Unexpected operand type");
   }
 }
 
-template <class Machine>
-void emitIASRegOpTyXMM(
-    const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
-    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
-        &Emitter) {
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::emitIASRegOpTyXMM(const Cfg *Func, Type Ty,
+                                             const Variable *Var,
+                                             const Operand *Src,
+                                             const XmmEmitterRegOp &Emitter) {
+  auto *Target = InstX86Base::getTarget(Func);
+  Assembler *Asm = Func->getAssembler<Assembler>();
   assert(Var->hasReg());
-  typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister VarReg =
-      InstX86Base<Machine>::Traits::getEncodedXmm(Var->getRegNum());
+  XmmRegister VarReg = Traits::getEncodedXmm(Var->getRegNum());
   if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
     if (SrcVar->hasReg()) {
-      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister SrcReg =
-          InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum());
+      XmmRegister SrcReg = Traits::getEncodedXmm(SrcVar->getRegNum());
       (Asm->*(Emitter.XmmXmm))(Ty, VarReg, SrcReg);
     } else {
-      typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
-          Target->stackVarToAsmOperand(SrcVar);
+      Address SrcStackAddr = Target->stackVarToAsmOperand(SrcVar);
       (Asm->*(Emitter.XmmAddr))(Ty, VarReg, SrcStackAddr);
     }
-  } else if (const auto *Mem = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
-    assert(Mem->getSegmentRegister() ==
-           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  } else if (const auto *Mem = llvm::dyn_cast<X86OperandMem>(Src)) {
+    assert(Mem->getSegmentRegister() == X86OperandMem::DefaultSegment);
     (Asm->*(Emitter.XmmAddr))(Ty, VarReg, Mem->toAsmAddress(Asm, Target));
   } else if (const auto *Imm = llvm::dyn_cast<Constant>(Src)) {
-    (Asm->*(Emitter.XmmAddr))(
-        Ty, VarReg,
-        InstX86Base<Machine>::Traits::Address::ofConstPool(Asm, Imm));
+    (Asm->*(Emitter.XmmAddr))(Ty, VarReg,
+                              Traits::Address::ofConstPool(Asm, Imm));
   } else {
     llvm_unreachable("Unexpected operand type");
   }
 }
 
-template <class Machine, typename DReg_t, typename SReg_t,
-          DReg_t (*destEnc)(int32_t), SReg_t (*srcEnc)(int32_t)>
-void emitIASCastRegOp(const Cfg *Func, Type DestTy, const Variable *Dest,
-                      Type SrcTy, const Operand *Src,
-                      const typename InstX86Base<Machine>::Traits::Assembler::
-                          template CastEmitterRegOp<DReg_t, SReg_t> &Emitter) {
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+template <typename DReg_t, typename SReg_t, DReg_t (*destEnc)(int32_t),
+          SReg_t (*srcEnc)(int32_t)>
+void InstImpl<TraitsType>::emitIASCastRegOp(
+    const Cfg *Func, Type DestTy, const Variable *Dest, Type SrcTy,
+    const Operand *Src, const CastEmitterRegOp<DReg_t, SReg_t> &Emitter) {
+  auto *Target = InstX86Base::getTarget(Func);
+  Assembler *Asm = Func->getAssembler<Assembler>();
   assert(Dest->hasReg());
   DReg_t DestReg = destEnc(Dest->getRegNum());
   if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
@@ -906,12 +848,10 @@
       SReg_t SrcReg = srcEnc(SrcVar->getRegNum());
       (Asm->*(Emitter.RegReg))(DestTy, DestReg, SrcTy, SrcReg);
     } else {
-      typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
-          Target->stackVarToAsmOperand(SrcVar);
+      Address SrcStackAddr = Target->stackVarToAsmOperand(SrcVar);
       (Asm->*(Emitter.RegAddr))(DestTy, DestReg, SrcTy, SrcStackAddr);
     }
-  } else if (const auto *Mem = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
+  } else if (const auto *Mem = llvm::dyn_cast<X86OperandMem>(Src)) {
     Mem->emitSegmentOverride(Asm);
     (Asm->*(Emitter.RegAddr))(DestTy, DestReg, SrcTy,
                               Mem->toAsmAddress(Asm, Target));
@@ -920,31 +860,27 @@
   }
 }
 
-template <class Machine, typename DReg_t, typename SReg_t,
-          DReg_t (*destEnc)(int32_t), SReg_t (*srcEnc)(int32_t)>
-void emitIASThreeOpImmOps(
+template <typename TraitsType>
+template <typename DReg_t, typename SReg_t, DReg_t (*destEnc)(int32_t),
+          SReg_t (*srcEnc)(int32_t)>
+void InstImpl<TraitsType>::emitIASThreeOpImmOps(
     const Cfg *Func, Type DispatchTy, const Variable *Dest, const Operand *Src0,
-    const Operand *Src1,
-    const typename InstX86Base<Machine>::Traits::Assembler::
-        template ThreeOpImmEmitter<DReg_t, SReg_t> Emitter) {
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+    const Operand *Src1, const ThreeOpImmEmitter<DReg_t, SReg_t> Emitter) {
+  auto *Target = InstX86Base::getTarget(Func);
+  Assembler *Asm = Func->getAssembler<Assembler>();
   // This only handles Dest being a register, and Src1 being an immediate.
   assert(Dest->hasReg());
   DReg_t DestReg = destEnc(Dest->getRegNum());
-  Immediate Imm(llvm::cast<ConstantInteger32>(Src1)->getValue());
+  AssemblerImmediate Imm(llvm::cast<ConstantInteger32>(Src1)->getValue());
   if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src0)) {
     if (SrcVar->hasReg()) {
       SReg_t SrcReg = srcEnc(SrcVar->getRegNum());
       (Asm->*(Emitter.RegRegImm))(DispatchTy, DestReg, SrcReg, Imm);
     } else {
-      typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
-          Target->stackVarToAsmOperand(SrcVar);
+      Address SrcStackAddr = Target->stackVarToAsmOperand(SrcVar);
       (Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg, SrcStackAddr, Imm);
     }
-  } else if (const auto *Mem = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src0)) {
+  } else if (const auto *Mem = llvm::dyn_cast<X86OperandMem>(Src0)) {
     Mem->emitSegmentOverride(Asm);
     (Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg,
                                  Mem->toAsmAddress(Asm, Target), Imm);
@@ -953,49 +889,41 @@
   }
 }
 
-template <class Machine>
-void emitIASMovlikeXMM(
-    const Cfg *Func, const Variable *Dest, const Operand *Src,
-    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterMovOps
-        Emitter) {
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::emitIASMovlikeXMM(const Cfg *Func,
+                                             const Variable *Dest,
+                                             const Operand *Src,
+                                             const XmmEmitterMovOps Emitter) {
+  auto *Target = InstX86Base::getTarget(Func);
+  Assembler *Asm = Func->getAssembler<Assembler>();
   if (Dest->hasReg()) {
-    typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister DestReg =
-        InstX86Base<Machine>::Traits::getEncodedXmm(Dest->getRegNum());
+    XmmRegister DestReg = Traits::getEncodedXmm(Dest->getRegNum());
     if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
       if (SrcVar->hasReg()) {
-        (Asm->*(Emitter.XmmXmm))(
-            DestReg,
-            InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum()));
+        (Asm->*(Emitter.XmmXmm))(DestReg,
+                                 Traits::getEncodedXmm(SrcVar->getRegNum()));
       } else {
-        typename InstX86Base<Machine>::Traits::Address StackAddr(
-            Target->stackVarToAsmOperand(SrcVar));
+        Address StackAddr(Target->stackVarToAsmOperand(SrcVar));
         (Asm->*(Emitter.XmmAddr))(DestReg, StackAddr);
       }
-    } else if (const auto *SrcMem = llvm::dyn_cast<
-                   typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
-      assert(SrcMem->getSegmentRegister() ==
-             InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+    } else if (const auto *SrcMem = llvm::dyn_cast<X86OperandMem>(Src)) {
+      assert(SrcMem->getSegmentRegister() == X86OperandMem::DefaultSegment);
       (Asm->*(Emitter.XmmAddr))(DestReg, SrcMem->toAsmAddress(Asm, Target));
     } else {
       llvm_unreachable("Unexpected operand type");
     }
   } else {
-    typename InstX86Base<Machine>::Traits::Address StackAddr(
-        Target->stackVarToAsmOperand(Dest));
+    Address StackAddr(Target->stackVarToAsmOperand(Dest));
     // Src must be a register in this case.
     const auto *SrcVar = llvm::cast<Variable>(Src);
     assert(SrcVar->hasReg());
-    (Asm->*(Emitter.AddrXmm))(
-        StackAddr,
-        InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum()));
+    (Asm->*(Emitter.AddrXmm))(StackAddr,
+                              Traits::getEncodedXmm(SrcVar->getRegNum()));
   }
 }
 
-template <class Machine>
-void InstX86Sqrtss<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Sqrtss::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1003,14 +931,14 @@
   Type Ty = this->getSrc(0)->getType();
   assert(isScalarFloatingType(Ty));
   Str << "\t"
-         "sqrt" << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString
-      << "\t";
+         "sqrt" << Traits::TypeAttributes[Ty].SdSsString << "\t";
   this->getSrc(0)->emit(Func);
   Str << ", ";
   this->getDest()->emit(Func);
 }
 
-template <class Machine> void InstX86Div<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Div::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1020,19 +948,17 @@
   Src1->emit(Func);
 }
 
-template <class Machine>
-void InstX86Div<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Div::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 3);
   const Operand *Src = this->getSrc(1);
   Type Ty = Src->getType();
-  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp
-      Emitter = {&InstX86Base<Machine>::Traits::Assembler::div,
-                 &InstX86Base<Machine>::Traits::Assembler::div};
-  emitIASOpTyGPR<Machine>(Func, Ty, Src, Emitter);
+  static GPREmitterOneOp Emitter = {&Assembler::div, &Assembler::div};
+  emitIASOpTyGPR(Func, Ty, Src, Emitter);
 }
 
-template <class Machine>
-void InstX86Idiv<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Idiv::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1042,78 +968,73 @@
   Src1->emit(Func);
 }
 
-template <class Machine>
-void InstX86Idiv<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Idiv::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 3);
   const Operand *Src = this->getSrc(1);
   Type Ty = Src->getType();
-  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp
-      Emitter = {&InstX86Base<Machine>::Traits::Assembler::idiv,
-                 &InstX86Base<Machine>::Traits::Assembler::idiv};
-  emitIASOpTyGPR<Machine>(Func, Ty, Src, Emitter);
+  static const GPREmitterOneOp Emitter = {&Assembler::idiv, &Assembler::idiv};
+  emitIASOpTyGPR(Func, Ty, Src, Emitter);
 }
 
 // pblendvb and blendvps take xmm0 as a final implicit argument.
-template <class Machine>
-void emitVariableBlendInst(const char *Opcode, const Inst *Inst,
-                           const Cfg *Func) {
+template <typename TraitsType>
+void InstImpl<TraitsType>::emitVariableBlendInst(const char *Opcode,
+                                                 const Inst *Inst,
+                                                 const Cfg *Func) {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(Inst->getSrcSize() == 3);
   assert(llvm::cast<Variable>(Inst->getSrc(2))->getRegNum() ==
-         InstX86Base<Machine>::Traits::RegisterSet::Reg_xmm0);
+         RegisterSet::Reg_xmm0);
   Str << "\t" << Opcode << "\t";
   Inst->getSrc(1)->emit(Func);
   Str << ", ";
   Inst->getDest()->emit(Func);
 }
 
-template <class Machine>
-void emitIASVariableBlendInst(
-    const Inst *Inst, const Cfg *Func,
-    const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
-        &Emitter) {
+template <typename TraitsType>
+void InstImpl<TraitsType>::emitIASVariableBlendInst(
+    const Inst *Inst, const Cfg *Func, const XmmEmitterRegOp &Emitter) {
   assert(Inst->getSrcSize() == 3);
   assert(llvm::cast<Variable>(Inst->getSrc(2))->getRegNum() ==
-         InstX86Base<Machine>::Traits::RegisterSet::Reg_xmm0);
+         RegisterSet::Reg_xmm0);
   const Variable *Dest = Inst->getDest();
   const Operand *Src = Inst->getSrc(1);
-  emitIASRegOpTyXMM<Machine>(Func, Dest->getType(), Dest, Src, Emitter);
+  emitIASRegOpTyXMM(Func, Dest->getType(), Dest, Src, Emitter);
 }
 
-template <class Machine>
-void InstX86Blendvps<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Blendvps::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
-  emitVariableBlendInst<Machine>(this->Opcode, this, Func);
+  emitVariableBlendInst(this->Opcode, this, Func);
 }
 
-template <class Machine>
-void InstX86Blendvps<Machine>::emitIAS(const Cfg *Func) const {
-  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
-      Emitter = {&InstX86Base<Machine>::Traits::Assembler::blendvps,
-                 &InstX86Base<Machine>::Traits::Assembler::blendvps};
-  emitIASVariableBlendInst<Machine>(this, Func, Emitter);
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Blendvps::emitIAS(const Cfg *Func) const {
+  static const XmmEmitterRegOp Emitter = {&Assembler::blendvps,
+                                          &Assembler::blendvps};
+  emitIASVariableBlendInst(this, Func, Emitter);
 }
 
-template <class Machine>
-void InstX86Pblendvb<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Pblendvb::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
-  emitVariableBlendInst<Machine>(this->Opcode, this, Func);
+  emitVariableBlendInst(this->Opcode, this, Func);
 }
 
-template <class Machine>
-void InstX86Pblendvb<Machine>::emitIAS(const Cfg *Func) const {
-  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
-      Emitter = {&InstX86Base<Machine>::Traits::Assembler::pblendvb,
-                 &InstX86Base<Machine>::Traits::Assembler::pblendvb};
-  emitIASVariableBlendInst<Machine>(this, Func, Emitter);
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Pblendvb::emitIAS(const Cfg *Func) const {
+  static const XmmEmitterRegOp Emitter = {&Assembler::pblendvb,
+                                          &Assembler::pblendvb};
+  emitIASVariableBlendInst(this, Func, Emitter);
 }
 
-template <class Machine>
-void InstX86Imul<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Imul::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1123,8 +1044,7 @@
     // The 8-bit version of imul only allows the form "imul r/m8".
     const auto *Src0Var = llvm::dyn_cast<Variable>(this->getSrc(0));
     (void)Src0Var;
-    assert(Src0Var->getRegNum() ==
-           InstX86Base<Machine>::Traits::RegisterSet::Reg_al);
+    assert(Src0Var->getRegNum() == RegisterSet::Reg_al);
     Str << "\t"
            "imulb\t";
     this->getSrc(1)->emit(Func);
@@ -1141,8 +1061,8 @@
   }
 }
 
-template <class Machine>
-void InstX86Imul<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Imul::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
   const Variable *Var = this->getDest();
   Type Ty = Var->getType();
@@ -1151,28 +1071,21 @@
     // The 8-bit version of imul only allows the form "imul r/m8".
     const auto *Src0Var = llvm::dyn_cast<Variable>(this->getSrc(0));
     (void)Src0Var;
-    assert(Src0Var->getRegNum() ==
-           InstX86Base<Machine>::Traits::RegisterSet::Reg_al);
-    static const typename InstX86Base<
-        Machine>::Traits::Assembler::GPREmitterOneOp Emitter = {
-        &InstX86Base<Machine>::Traits::Assembler::imul,
-        &InstX86Base<Machine>::Traits::Assembler::imul};
-    emitIASOpTyGPR<Machine>(Func, Ty, this->getSrc(1), Emitter);
+    assert(Src0Var->getRegNum() == RegisterSet::Reg_al);
+    static const GPREmitterOneOp Emitter = {&Assembler::imul, &Assembler::imul};
+    emitIASOpTyGPR(Func, Ty, this->getSrc(1), Emitter);
   } else {
     // The two-address version is used when multiplying by a non-constant
     // or doing an 8-bit multiply.
     assert(Var == this->getSrc(0));
-    static const typename InstX86Base<
-        Machine>::Traits::Assembler::GPREmitterRegOp Emitter = {
-        &InstX86Base<Machine>::Traits::Assembler::imul,
-        &InstX86Base<Machine>::Traits::Assembler::imul,
-        &InstX86Base<Machine>::Traits::Assembler::imul};
-    emitIASRegOpTyGPR<Machine>(Func, Ty, Var, Src, Emitter);
+    static const GPREmitterRegOp Emitter = {&Assembler::imul, &Assembler::imul,
+                                            &Assembler::imul};
+    emitIASRegOpTyGPR(Func, Ty, Var, Src, Emitter);
   }
 }
 
-template <class Machine>
-void InstX86ImulImm<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86ImulImm::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1189,50 +1102,35 @@
   Dest->emit(Func);
 }
 
-template <class Machine>
-void InstX86ImulImm<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86ImulImm::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
   const Variable *Dest = this->getDest();
   Type Ty = Dest->getType();
   assert(llvm::isa<Constant>(this->getSrc(1)));
-  static const typename InstX86Base<Machine>::Traits::Assembler::
-      template ThreeOpImmEmitter<
-          typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
-          typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister>
-          Emitter = {&InstX86Base<Machine>::Traits::Assembler::imul,
-                     &InstX86Base<Machine>::Traits::Assembler::imul};
-  emitIASThreeOpImmOps<
-      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
-      typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
-      InstX86Base<Machine>::Traits::getEncodedGPR,
-      InstX86Base<Machine>::Traits::getEncodedGPR>(
-      Func, Ty, Dest, this->getSrc(0), this->getSrc(1), Emitter);
+  static const ThreeOpImmEmitter<GPRRegister, GPRRegister> Emitter = {
+      &Assembler::imul, &Assembler::imul};
+  emitIASThreeOpImmOps<GPRRegister, GPRRegister, Traits::getEncodedGPR,
+                       Traits::getEncodedGPR>(Func, Ty, Dest, this->getSrc(0),
+                                              this->getSrc(1), Emitter);
 }
 
-template <class Machine>
-void InstX86Insertps<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Insertps::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 3);
-  assert(InstX86Base<Machine>::getTarget(Func)->getInstructionSet() >=
-         InstX86Base<Machine>::Traits::SSE4_1);
+  assert(InstX86Base::getTarget(Func)->getInstructionSet() >= Traits::SSE4_1);
   const Variable *Dest = this->getDest();
   assert(Dest == this->getSrc(0));
   Type Ty = Dest->getType();
-  static const typename InstX86Base<Machine>::Traits::Assembler::
-      template ThreeOpImmEmitter<
-          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister>
-          Emitter = {&InstX86Base<Machine>::Traits::Assembler::insertps,
-                     &InstX86Base<Machine>::Traits::Assembler::insertps};
-  emitIASThreeOpImmOps<
-      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-      InstX86Base<Machine>::Traits::getEncodedXmm,
-      InstX86Base<Machine>::Traits::getEncodedXmm>(
-      Func, Ty, Dest, this->getSrc(1), this->getSrc(2), Emitter);
+  static const ThreeOpImmEmitter<XmmRegister, XmmRegister> Emitter = {
+      &Assembler::insertps, &Assembler::insertps};
+  emitIASThreeOpImmOps<XmmRegister, XmmRegister, Traits::getEncodedXmm,
+                       Traits::getEncodedXmm>(Func, Ty, Dest, this->getSrc(1),
+                                              this->getSrc(2), Emitter);
 }
 
-template <class Machine>
-void InstX86Cbwdq<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cbwdq::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1247,38 +1145,36 @@
     llvm_unreachable("unexpected source type!");
     break;
   case IceType_i8:
-    assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_al);
-    assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax ||
-           DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ah);
+    assert(SrcReg == RegisterSet::Reg_al);
+    assert(DestReg == RegisterSet::Reg_ax || DestReg == RegisterSet::Reg_ah);
     Str << "\t"
            "cbtw";
     break;
   case IceType_i16:
-    assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
-    assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_dx);
+    assert(SrcReg == RegisterSet::Reg_ax);
+    assert(DestReg == RegisterSet::Reg_dx);
     Str << "\t"
            "cwtd";
     break;
   case IceType_i32:
-    assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
-    assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+    assert(SrcReg == RegisterSet::Reg_eax);
+    assert(DestReg == RegisterSet::Reg_edx);
     Str << "\t"
            "cltd";
     break;
   case IceType_i64:
-    assert(InstX86Base<Machine>::Traits::Is64Bit);
-    assert(SrcReg == InstX86Base<Machine>::Traits::getRaxOrDie());
-    assert(DestReg == InstX86Base<Machine>::Traits::getRdxOrDie());
+    assert(Traits::Is64Bit);
+    assert(SrcReg == Traits::getRaxOrDie());
+    assert(DestReg == Traits::getRdxOrDie());
     Str << "\t"
            "cqo";
     break;
   }
 }
 
-template <class Machine>
-void InstX86Cbwdq<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cbwdq::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   assert(this->getSrcSize() == 1);
   Operand *Src0 = this->getSrc(0);
   int32_t DestReg = this->getDest()->getRegNum();
@@ -1290,64 +1186,63 @@
     llvm_unreachable("unexpected source type!");
     break;
   case IceType_i8:
-    assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_al);
-    assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax ||
-           DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ah);
+    assert(SrcReg == RegisterSet::Reg_al);
+    assert(DestReg == RegisterSet::Reg_ax || DestReg == RegisterSet::Reg_ah);
     Asm->cbw();
     break;
   case IceType_i16:
-    assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
-    assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_dx);
+    assert(SrcReg == RegisterSet::Reg_ax);
+    assert(DestReg == RegisterSet::Reg_dx);
     Asm->cwd();
     break;
   case IceType_i32:
-    assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
-    assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+    assert(SrcReg == RegisterSet::Reg_eax);
+    assert(DestReg == RegisterSet::Reg_edx);
     Asm->cdq();
     break;
   case IceType_i64:
-    assert(InstX86Base<Machine>::Traits::Is64Bit);
-    assert(SrcReg == InstX86Base<Machine>::Traits::getRaxOrDie());
-    assert(DestReg == InstX86Base<Machine>::Traits::getRdxOrDie());
+    assert(Traits::Is64Bit);
+    assert(SrcReg == Traits::getRaxOrDie());
+    assert(DestReg == Traits::getRdxOrDie());
     Asm->cqo();
     break;
   }
 }
 
-template <class Machine> void InstX86Mul<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Mul::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(this->getSrcSize() == 2);
   assert(llvm::isa<Variable>(this->getSrc(0)));
   assert(llvm::cast<Variable>(this->getSrc(0))->getRegNum() ==
-         InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
-  assert(
-      this->getDest()->getRegNum() ==
-      InstX86Base<Machine>::Traits::RegisterSet::Reg_eax); // TODO: allow edx?
+         RegisterSet::Reg_eax);
+  assert(this->getDest()->getRegNum() == RegisterSet::Reg_eax); // TODO:
+                                                                // allow
+                                                                // edx?
   Str << "\t"
          "mul" << this->getWidthString(this->getDest()->getType()) << "\t";
   this->getSrc(1)->emit(Func);
 }
 
-template <class Machine>
-void InstX86Mul<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Mul::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
   assert(llvm::isa<Variable>(this->getSrc(0)));
   assert(llvm::cast<Variable>(this->getSrc(0))->getRegNum() ==
-         InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
-  assert(
-      this->getDest()->getRegNum() ==
-      InstX86Base<Machine>::Traits::RegisterSet::Reg_eax); // TODO: allow edx?
+         RegisterSet::Reg_eax);
+  assert(this->getDest()->getRegNum() == RegisterSet::Reg_eax); // TODO:
+                                                                // allow
+                                                                // edx?
   const Operand *Src = this->getSrc(1);
   Type Ty = Src->getType();
-  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterOneOp
-      Emitter = {&InstX86Base<Machine>::Traits::Assembler::mul,
-                 &InstX86Base<Machine>::Traits::Assembler::mul};
-  emitIASOpTyGPR<Machine>(Func, Ty, Src, Emitter);
+  static const GPREmitterOneOp Emitter = {&Assembler::mul, &Assembler::mul};
+  emitIASOpTyGPR(Func, Ty, Src, Emitter);
 }
 
-template <class Machine> void InstX86Mul<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Mul::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -1356,8 +1251,8 @@
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Shld<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Shld::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1373,22 +1268,19 @@
   Dest->emit(Func);
 }
 
-template <class Machine>
-void InstX86Shld<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Shld::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 3);
   assert(this->getDest() == this->getSrc(0));
   const Variable *Dest = this->getDest();
   const Operand *Src1 = this->getSrc(1);
   const Operand *Src2 = this->getSrc(2);
-  static const typename InstX86Base<
-      Machine>::Traits::Assembler::GPREmitterShiftD Emitter = {
-      &InstX86Base<Machine>::Traits::Assembler::shld,
-      &InstX86Base<Machine>::Traits::Assembler::shld};
-  emitIASGPRShiftDouble<Machine>(Func, Dest, Src1, Src2, Emitter);
+  static const GPREmitterShiftD Emitter = {&Assembler::shld, &Assembler::shld};
+  emitIASGPRShiftDouble(Func, Dest, Src1, Src2, Emitter);
 }
 
-template <class Machine>
-void InstX86Shld<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Shld::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -1397,8 +1289,8 @@
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Shrd<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Shrd::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1414,22 +1306,19 @@
   Dest->emit(Func);
 }
 
-template <class Machine>
-void InstX86Shrd<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Shrd::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 3);
   assert(this->getDest() == this->getSrc(0));
   const Variable *Dest = this->getDest();
   const Operand *Src1 = this->getSrc(1);
   const Operand *Src2 = this->getSrc(2);
-  static const typename InstX86Base<
-      Machine>::Traits::Assembler::GPREmitterShiftD Emitter = {
-      &InstX86Base<Machine>::Traits::Assembler::shrd,
-      &InstX86Base<Machine>::Traits::Assembler::shrd};
-  emitIASGPRShiftDouble<Machine>(Func, Dest, Src1, Src2, Emitter);
+  static const GPREmitterShiftD Emitter = {&Assembler::shrd, &Assembler::shrd};
+  emitIASGPRShiftDouble(Func, Dest, Src1, Src2, Emitter);
 }
 
-template <class Machine>
-void InstX86Shrd<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Shrd::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -1438,133 +1327,116 @@
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Cmov<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmov::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   Variable *Dest = this->getDest();
   Str << "\t";
-  assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
+  assert(Condition != Cond::Br_None);
   assert(this->getDest()->hasReg());
-  Str << "cmov"
-      << InstX86Base<Machine>::Traits::InstBrAttributes[Condition].DisplayString
+  Str << "cmov" << Traits::InstBrAttributes[Condition].DisplayString
       << this->getWidthString(Dest->getType()) << "\t";
   this->getSrc(1)->emit(Func);
   Str << ", ";
   Dest->emit(Func);
 }
 
-template <class Machine>
-void InstX86Cmov<Machine>::emitIAS(const Cfg *Func) const {
-  assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmov::emitIAS(const Cfg *Func) const {
+  assert(Condition != Cond::Br_None);
   assert(this->getDest()->hasReg());
   assert(this->getSrcSize() == 2);
   Operand *Src = this->getSrc(1);
   Type SrcTy = Src->getType();
-  assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
-         (InstX86Base<Machine>::Traits::Is64Bit));
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+  assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 || (Traits::Is64Bit));
+  Assembler *Asm = Func->getAssembler<Assembler>();
+  auto *Target = InstX86Base::getTarget(Func);
   if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
     if (SrcVar->hasReg()) {
-      Asm->cmov(
-          SrcTy, Condition, InstX86Base<Machine>::Traits::getEncodedGPR(
-                                this->getDest()->getRegNum()),
-          InstX86Base<Machine>::Traits::getEncodedGPR(SrcVar->getRegNum()));
+      Asm->cmov(SrcTy, Condition,
+                Traits::getEncodedGPR(this->getDest()->getRegNum()),
+                Traits::getEncodedGPR(SrcVar->getRegNum()));
     } else {
-      Asm->cmov(SrcTy, Condition, InstX86Base<Machine>::Traits::getEncodedGPR(
-                                      this->getDest()->getRegNum()),
+      Asm->cmov(SrcTy, Condition,
+                Traits::getEncodedGPR(this->getDest()->getRegNum()),
                 Target->stackVarToAsmOperand(SrcVar));
     }
-  } else if (const auto *Mem = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
-    assert(Mem->getSegmentRegister() ==
-           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
-    Asm->cmov(SrcTy, Condition, InstX86Base<Machine>::Traits::getEncodedGPR(
-                                    this->getDest()->getRegNum()),
+  } else if (const auto *Mem = llvm::dyn_cast<X86OperandMem>(Src)) {
+    assert(Mem->getSegmentRegister() == X86OperandMem::DefaultSegment);
+    Asm->cmov(SrcTy, Condition,
+              Traits::getEncodedGPR(this->getDest()->getRegNum()),
               Mem->toAsmAddress(Asm, Target));
   } else {
     llvm_unreachable("Unexpected operand type");
   }
 }
 
-template <class Machine>
-void InstX86Cmov<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmov::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
-  Str << "cmov"
-      << InstX86Base<Machine>::Traits::InstBrAttributes[Condition].DisplayString
-      << ".";
+  Str << "cmov" << Traits::InstBrAttributes[Condition].DisplayString << ".";
   Str << this->getDest()->getType() << " ";
   this->dumpDest(Func);
   Str << ", ";
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Cmpps<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmpps::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(this->getSrcSize() == 2);
-  assert(Condition < InstX86Base<Machine>::Traits::Cond::Cmpps_Invalid);
+  assert(Condition < Cond::Cmpps_Invalid);
   Type DestTy = this->Dest->getType();
   Str << "\t"
-         "cmp"
-      << InstX86Base<Machine>::Traits::InstCmppsAttributes[Condition].EmitString
-      << InstX86Base<Machine>::Traits::TypeAttributes[DestTy].PdPsString
-      << "\t";
+         "cmp" << Traits::InstCmppsAttributes[Condition].EmitString
+      << Traits::TypeAttributes[DestTy].PdPsString << "\t";
   this->getSrc(1)->emit(Func);
   Str << ", ";
   this->getDest()->emit(Func);
 }
 
-template <class Machine>
-void InstX86Cmpps<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmpps::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   assert(this->getSrcSize() == 2);
-  assert(Condition < InstX86Base<Machine>::Traits::Cond::Cmpps_Invalid);
+  assert(Condition < Cond::Cmpps_Invalid);
   // Assuming there isn't any load folding for cmpps, and vector constants are
   // not allowed in PNaCl.
   assert(llvm::isa<Variable>(this->getSrc(1)));
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+  auto *Target = InstX86Base::getTarget(Func);
   const auto *SrcVar = llvm::cast<Variable>(this->getSrc(1));
   if (SrcVar->hasReg()) {
     Asm->cmpps(this->getDest()->getType(),
-               InstX86Base<Machine>::Traits::getEncodedXmm(
-                   this->getDest()->getRegNum()),
-               InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum()),
-               Condition);
+               Traits::getEncodedXmm(this->getDest()->getRegNum()),
+               Traits::getEncodedXmm(SrcVar->getRegNum()), Condition);
   } else {
-    typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
-        Target->stackVarToAsmOperand(SrcVar);
+    Address SrcStackAddr = Target->stackVarToAsmOperand(SrcVar);
     Asm->cmpps(this->getDest()->getType(),
-               InstX86Base<Machine>::Traits::getEncodedXmm(
-                   this->getDest()->getRegNum()),
+               Traits::getEncodedXmm(this->getDest()->getRegNum()),
                SrcStackAddr, Condition);
   }
 }
 
-template <class Machine>
-void InstX86Cmpps<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmpps::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
-  assert(Condition < InstX86Base<Machine>::Traits::Cond::Cmpps_Invalid);
+  assert(Condition < Cond::Cmpps_Invalid);
   this->dumpDest(Func);
-  Str << " = cmp"
-      << InstX86Base<Machine>::Traits::InstCmppsAttributes[Condition].EmitString
-      << "ps"
-         "\t";
+  Str << " = cmp" << Traits::InstCmppsAttributes[Condition].EmitString << "ps"
+                                                                          "\t";
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Cmpxchg<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmpxchg::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1580,29 +1452,23 @@
   this->getSrc(0)->emit(Func);
 }
 
-template <class Machine>
-void InstX86Cmpxchg<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmpxchg::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 3);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Assembler *Asm = Func->getAssembler<Assembler>();
   Type Ty = this->getSrc(0)->getType();
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
-  const auto Mem =
-      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
-          this->getSrc(0));
-  assert(Mem->getSegmentRegister() ==
-         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
-  const typename InstX86Base<Machine>::Traits::Address Addr =
-      Mem->toAsmAddress(Asm, Target);
+  auto *Target = InstX86Base::getTarget(Func);
+  const auto Mem = llvm::cast<X86OperandMem>(this->getSrc(0));
+  assert(Mem->getSegmentRegister() == X86OperandMem::DefaultSegment);
+  const Address Addr = Mem->toAsmAddress(Asm, Target);
   const auto *VarReg = llvm::cast<Variable>(this->getSrc(2));
   assert(VarReg->hasReg());
-  const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg =
-      InstX86Base<Machine>::Traits::getEncodedGPR(VarReg->getRegNum());
+  const GPRRegister Reg = Traits::getEncodedGPR(VarReg->getRegNum());
   Asm->cmpxchg(Ty, Addr, Reg, this->Locked);
 }
 
-template <class Machine>
-void InstX86Cmpxchg<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmpxchg::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -1613,8 +1479,8 @@
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Cmpxchg8b<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmpxchg8b::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1628,24 +1494,19 @@
   this->getSrc(0)->emit(Func);
 }
 
-template <class Machine>
-void InstX86Cmpxchg8b<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmpxchg8b::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 5);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
-  const auto Mem =
-      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
-          this->getSrc(0));
-  assert(Mem->getSegmentRegister() ==
-         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
-  const typename InstX86Base<Machine>::Traits::Address Addr =
-      Mem->toAsmAddress(Asm, Target);
+  Assembler *Asm = Func->getAssembler<Assembler>();
+  const auto Mem = llvm::cast<X86OperandMem>(this->getSrc(0));
+  assert(Mem->getSegmentRegister() == X86OperandMem::DefaultSegment);
+  auto *Target = InstX86Base::getTarget(Func);
+  const Address Addr = Mem->toAsmAddress(Asm, Target);
   Asm->cmpxchg8b(Addr, this->Locked);
 }
 
-template <class Machine>
-void InstX86Cmpxchg8b<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cmpxchg8b::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -1656,7 +1517,8 @@
   this->dumpSources(Func);
 }
 
-template <class Machine> void InstX86Cvt<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cvt::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1665,19 +1527,15 @@
          "cvt";
   if (isTruncating())
     Str << "t";
-  Str << InstX86Base<Machine>::Traits::TypeAttributes[this->getSrc(0)
-                                                          ->getType()]
-             .CvtString << "2"
-      << InstX86Base<
-             Machine>::Traits::TypeAttributes[this->getDest()->getType()]
-             .CvtString << "\t";
+  Str << Traits::TypeAttributes[this->getSrc(0)->getType()].CvtString << "2"
+      << Traits::TypeAttributes[this->getDest()->getType()].CvtString << "\t";
   this->getSrc(0)->emit(Func);
   Str << ", ";
   this->getDest()->emit(Func);
 }
 
-template <class Machine>
-void InstX86Cvt<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cvt::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 1);
   const Variable *Dest = this->getDest();
   const Operand *Src = this->getSrc(0);
@@ -1686,85 +1544,64 @@
   switch (Variant) {
   case Si2ss: {
     assert(isScalarIntegerType(SrcTy));
-    if (!InstX86Base<Machine>::Traits::Is64Bit) {
+    if (!Traits::Is64Bit) {
       assert(typeWidthInBytes(SrcTy) <= 4);
     } else {
       assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
     }
     assert(isScalarFloatingType(DestTy));
-    static const typename InstX86Base<Machine>::Traits::Assembler::
-        template CastEmitterRegOp<
-            typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-            typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister>
-            Emitter = {&InstX86Base<Machine>::Traits::Assembler::cvtsi2ss,
-                       &InstX86Base<Machine>::Traits::Assembler::cvtsi2ss};
-    emitIASCastRegOp<
-        Machine,
-        typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-        typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
-        InstX86Base<Machine>::Traits::getEncodedXmm,
-        InstX86Base<Machine>::Traits::getEncodedGPR>(Func, DestTy, Dest, SrcTy,
-                                                     Src, Emitter);
+    static const CastEmitterRegOp<XmmRegister, GPRRegister> Emitter = {
+        &Assembler::cvtsi2ss, &Assembler::cvtsi2ss};
+    emitIASCastRegOp<XmmRegister, GPRRegister, Traits::getEncodedXmm,
+                     Traits::getEncodedGPR>(Func, DestTy, Dest, SrcTy, Src,
+                                            Emitter);
     return;
   }
   case Tss2si: {
     assert(isScalarFloatingType(SrcTy));
     assert(isScalarIntegerType(DestTy));
-    if (!InstX86Base<Machine>::Traits::Is64Bit) {
+    if (!Traits::Is64Bit) {
       assert(typeWidthInBytes(DestTy) <= 4);
     } else {
       assert(DestTy == IceType_i32 || DestTy == IceType_i64);
     }
-    static const typename InstX86Base<Machine>::Traits::Assembler::
-        template CastEmitterRegOp<
-            typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
-            typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister>
-            Emitter = {&InstX86Base<Machine>::Traits::Assembler::cvttss2si,
-                       &InstX86Base<Machine>::Traits::Assembler::cvttss2si};
-    emitIASCastRegOp<
-        Machine,
-        typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
-        typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-        InstX86Base<Machine>::Traits::getEncodedGPR,
-        InstX86Base<Machine>::Traits::getEncodedXmm>(Func, DestTy, Dest, SrcTy,
-                                                     Src, Emitter);
+    static const CastEmitterRegOp<GPRRegister, XmmRegister> Emitter = {
+        &Assembler::cvttss2si, &Assembler::cvttss2si};
+    emitIASCastRegOp<GPRRegister, XmmRegister, Traits::getEncodedGPR,
+                     Traits::getEncodedXmm>(Func, DestTy, Dest, SrcTy, Src,
+                                            Emitter);
     return;
   }
   case Float2float: {
     assert(isScalarFloatingType(SrcTy));
     assert(isScalarFloatingType(DestTy));
     assert(DestTy != SrcTy);
-    static const typename InstX86Base<
-        Machine>::Traits::Assembler::XmmEmitterRegOp Emitter = {
-        &InstX86Base<Machine>::Traits::Assembler::cvtfloat2float,
-        &InstX86Base<Machine>::Traits::Assembler::cvtfloat2float};
-    emitIASRegOpTyXMM<Machine>(Func, SrcTy, Dest, Src, Emitter);
+    static const XmmEmitterRegOp Emitter = {&Assembler::cvtfloat2float,
+                                            &Assembler::cvtfloat2float};
+    emitIASRegOpTyXMM(Func, SrcTy, Dest, Src, Emitter);
     return;
   }
   case Dq2ps: {
     assert(isVectorIntegerType(SrcTy));
     assert(isVectorFloatingType(DestTy));
-    static const typename InstX86Base<
-        Machine>::Traits::Assembler::XmmEmitterRegOp Emitter = {
-        &InstX86Base<Machine>::Traits::Assembler::cvtdq2ps,
-        &InstX86Base<Machine>::Traits::Assembler::cvtdq2ps};
-    emitIASRegOpTyXMM<Machine>(Func, DestTy, Dest, Src, Emitter);
+    static const XmmEmitterRegOp Emitter = {&Assembler::cvtdq2ps,
+                                            &Assembler::cvtdq2ps};
+    emitIASRegOpTyXMM(Func, DestTy, Dest, Src, Emitter);
     return;
   }
   case Tps2dq: {
     assert(isVectorFloatingType(SrcTy));
     assert(isVectorIntegerType(DestTy));
-    static const typename InstX86Base<
-        Machine>::Traits::Assembler::XmmEmitterRegOp Emitter = {
-        &InstX86Base<Machine>::Traits::Assembler::cvttps2dq,
-        &InstX86Base<Machine>::Traits::Assembler::cvttps2dq};
-    emitIASRegOpTyXMM<Machine>(Func, DestTy, Dest, Src, Emitter);
+    static const XmmEmitterRegOp Emitter = {&Assembler::cvttps2dq,
+                                            &Assembler::cvttps2dq};
+    emitIASRegOpTyXMM(Func, DestTy, Dest, Src, Emitter);
     return;
   }
   }
 }
 
-template <class Machine> void InstX86Cvt<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Cvt::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -1772,17 +1609,13 @@
   Str << " = cvt";
   if (isTruncating())
     Str << "t";
-  Str << InstX86Base<Machine>::Traits::TypeAttributes[this->getSrc(0)
-                                                          ->getType()]
-             .CvtString << "2"
-      << InstX86Base<
-             Machine>::Traits::TypeAttributes[this->getDest()->getType()]
-             .CvtString << " ";
+  Str << Traits::TypeAttributes[this->getSrc(0)->getType()].CvtString << "2"
+      << Traits::TypeAttributes[this->getDest()->getType()].CvtString << " ";
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Icmp<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Icmp::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1794,31 +1627,27 @@
   this->getSrc(0)->emit(Func);
 }
 
-template <class Machine>
-void InstX86Icmp<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Icmp::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
   const Operand *Src0 = this->getSrc(0);
   const Operand *Src1 = this->getSrc(1);
   Type Ty = Src0->getType();
-  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
-      RegEmitter = {&InstX86Base<Machine>::Traits::Assembler::cmp,
-                    &InstX86Base<Machine>::Traits::Assembler::cmp,
-                    &InstX86Base<Machine>::Traits::Assembler::cmp};
-  static const typename InstX86Base<
-      Machine>::Traits::Assembler::GPREmitterAddrOp AddrEmitter = {
-      &InstX86Base<Machine>::Traits::Assembler::cmp,
-      &InstX86Base<Machine>::Traits::Assembler::cmp};
+  static const GPREmitterRegOp RegEmitter = {&Assembler::cmp, &Assembler::cmp,
+                                             &Assembler::cmp};
+  static const GPREmitterAddrOp AddrEmitter = {&Assembler::cmp,
+                                               &Assembler::cmp};
   if (const auto *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
     if (SrcVar0->hasReg()) {
-      emitIASRegOpTyGPR<Machine>(Func, Ty, SrcVar0, Src1, RegEmitter);
+      emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
       return;
     }
   }
-  emitIASAsAddrOpTyGPR<Machine>(Func, Ty, Src0, Src1, AddrEmitter);
+  emitIASAsAddrOpTyGPR(Func, Ty, Src0, Src1, AddrEmitter);
 }
 
-template <class Machine>
-void InstX86Icmp<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Icmp::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -1826,38 +1655,35 @@
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Ucomiss<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Ucomiss::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(this->getSrcSize() == 2);
   Str << "\t"
          "ucomi"
-      << InstX86Base<Machine>::Traits::TypeAttributes[this->getSrc(0)
-                                                          ->getType()]
-             .SdSsString << "\t";
+      << Traits::TypeAttributes[this->getSrc(0)->getType()].SdSsString << "\t";
   this->getSrc(1)->emit(Func);
   Str << ", ";
   this->getSrc(0)->emit(Func);
 }
 
-template <class Machine>
-void InstX86Ucomiss<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Ucomiss::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
   // Currently src0 is always a variable by convention, to avoid having two
   // memory operands.
   assert(llvm::isa<Variable>(this->getSrc(0)));
   const auto *Src0Var = llvm::cast<Variable>(this->getSrc(0));
   Type Ty = Src0Var->getType();
-  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
-      Emitter = {&InstX86Base<Machine>::Traits::Assembler::ucomiss,
-                 &InstX86Base<Machine>::Traits::Assembler::ucomiss};
-  emitIASRegOpTyXMM<Machine>(Func, Ty, Src0Var, this->getSrc(1), Emitter);
+  static const XmmEmitterRegOp Emitter = {&Assembler::ucomiss,
+                                          &Assembler::ucomiss};
+  emitIASRegOpTyXMM(Func, Ty, Src0Var, this->getSrc(1), Emitter);
 }
 
-template <class Machine>
-void InstX86Ucomiss<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Ucomiss::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -1865,7 +1691,8 @@
   this->dumpSources(Func);
 }
 
-template <class Machine> void InstX86UD2<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86UD2::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1874,22 +1701,22 @@
          "ud2";
 }
 
-template <class Machine>
-void InstX86UD2<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86UD2::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   Asm->ud2();
 }
 
-template <class Machine> void InstX86UD2<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86UD2::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "ud2";
 }
 
-template <class Machine>
-void InstX86Test<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Test::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1901,31 +1728,28 @@
   this->getSrc(0)->emit(Func);
 }
 
-template <class Machine>
-void InstX86Test<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Test::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
   const Operand *Src0 = this->getSrc(0);
   const Operand *Src1 = this->getSrc(1);
   Type Ty = Src0->getType();
   // The Reg/Addr form of test is not encodeable.
-  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
-      RegEmitter = {&InstX86Base<Machine>::Traits::Assembler::test, nullptr,
-                    &InstX86Base<Machine>::Traits::Assembler::test};
-  static const typename InstX86Base<
-      Machine>::Traits::Assembler::GPREmitterAddrOp AddrEmitter = {
-      &InstX86Base<Machine>::Traits::Assembler::test,
-      &InstX86Base<Machine>::Traits::Assembler::test};
+  static const GPREmitterRegOp RegEmitter = {&Assembler::test, nullptr,
+                                             &Assembler::test};
+  static const GPREmitterAddrOp AddrEmitter = {&Assembler::test,
+                                               &Assembler::test};
   if (const auto *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
     if (SrcVar0->hasReg()) {
-      emitIASRegOpTyGPR<Machine>(Func, Ty, SrcVar0, Src1, RegEmitter);
+      emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
       return;
     }
   }
-  emitIASAsAddrOpTyGPR<Machine>(Func, Ty, Src0, Src1, AddrEmitter);
+  emitIASAsAddrOpTyGPR(Func, Ty, Src0, Src1, AddrEmitter);
 }
 
-template <class Machine>
-void InstX86Test<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Test::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -1933,8 +1757,8 @@
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Mfence<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Mfence::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1943,23 +1767,22 @@
          "mfence";
 }
 
-template <class Machine>
-void InstX86Mfence<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Mfence::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   Asm->mfence();
 }
 
-template <class Machine>
-void InstX86Mfence<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Mfence::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "mfence";
 }
 
-template <class Machine>
-void InstX86Store<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Store::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -1967,14 +1790,14 @@
   Type Ty = this->getSrc(0)->getType();
   Str << "\t"
          "mov" << this->getWidthString(Ty)
-      << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString << "\t";
+      << Traits::TypeAttributes[Ty].SdSsString << "\t";
   this->getSrc(0)->emit(Func);
   Str << ", ";
   this->getSrc(1)->emit(Func);
 }
 
-template <class Machine>
-void InstX86Store<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Store::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
   const Operand *Dest = this->getSrc(1);
   const Operand *Src = this->getSrc(0);
@@ -1983,37 +1806,29 @@
     // Src must be a register, since Dest is a Mem operand of some kind.
     const auto *SrcVar = llvm::cast<Variable>(Src);
     assert(SrcVar->hasReg());
-    typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister SrcReg =
-        InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum());
-    typename InstX86Base<Machine>::Traits::Assembler *Asm =
-        Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
-    auto *Target = InstX86Base<Machine>::getTarget(Func);
+    XmmRegister SrcReg = Traits::getEncodedXmm(SrcVar->getRegNum());
+    Assembler *Asm = Func->getAssembler<Assembler>();
+    auto *Target = InstX86Base::getTarget(Func);
     if (const auto *DestVar = llvm::dyn_cast<Variable>(Dest)) {
       assert(!DestVar->hasReg());
-      typename InstX86Base<Machine>::Traits::Address StackAddr(
-          Target->stackVarToAsmOperand(DestVar));
+      Address StackAddr(Target->stackVarToAsmOperand(DestVar));
       Asm->movss(DestTy, StackAddr, SrcReg);
     } else {
-      const auto DestMem =
-          llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
-              Dest);
-      assert(DestMem->getSegmentRegister() ==
-             InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+      const auto DestMem = llvm::cast<X86OperandMem>(Dest);
+      assert(DestMem->getSegmentRegister() == X86OperandMem::DefaultSegment);
       Asm->movss(DestTy, DestMem->toAsmAddress(Asm, Target), SrcReg);
     }
     return;
   } else {
     assert(isScalarIntegerType(DestTy));
-    static const typename InstX86Base<
-        Machine>::Traits::Assembler::GPREmitterAddrOp GPRAddrEmitter = {
-        &InstX86Base<Machine>::Traits::Assembler::mov,
-        &InstX86Base<Machine>::Traits::Assembler::mov};
-    emitIASAsAddrOpTyGPR<Machine>(Func, DestTy, Dest, Src, GPRAddrEmitter);
+    static const GPREmitterAddrOp GPRAddrEmitter = {&Assembler::mov,
+                                                    &Assembler::mov};
+    emitIASAsAddrOpTyGPR(Func, DestTy, Dest, Src, GPRAddrEmitter);
   }
 }
 
-template <class Machine>
-void InstX86Store<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Store::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -2023,8 +1838,8 @@
   this->getSrc(0)->dump(Func);
 }
 
-template <class Machine>
-void InstX86StoreP<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreP::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2037,25 +1852,21 @@
   this->getSrc(1)->emit(Func);
 }
 
-template <class Machine>
-void InstX86StoreP<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreP::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   assert(this->getSrcSize() == 2);
   const auto *SrcVar = llvm::cast<Variable>(this->getSrc(0));
-  const auto DestMem =
-      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
-          this->getSrc(1));
-  assert(DestMem->getSegmentRegister() ==
-         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  const auto DestMem = llvm::cast<X86OperandMem>(this->getSrc(1));
+  assert(DestMem->getSegmentRegister() == X86OperandMem::DefaultSegment);
   assert(SrcVar->hasReg());
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+  auto *Target = InstX86Base::getTarget(Func);
   Asm->movups(DestMem->toAsmAddress(Asm, Target),
-              InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum()));
+              Traits::getEncodedXmm(SrcVar->getRegNum()));
 }
 
-template <class Machine>
-void InstX86StoreP<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreP::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -2065,8 +1876,8 @@
   this->getSrc(0)->dump(Func);
 }
 
-template <class Machine>
-void InstX86StoreQ<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreQ::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2081,25 +1892,21 @@
   this->getSrc(1)->emit(Func);
 }
 
-template <class Machine>
-void InstX86StoreQ<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreQ::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   assert(this->getSrcSize() == 2);
   const auto *SrcVar = llvm::cast<Variable>(this->getSrc(0));
-  const auto DestMem =
-      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
-          this->getSrc(1));
-  assert(DestMem->getSegmentRegister() ==
-         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  const auto DestMem = llvm::cast<X86OperandMem>(this->getSrc(1));
+  assert(DestMem->getSegmentRegister() == X86OperandMem::DefaultSegment);
   assert(SrcVar->hasReg());
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+  auto *Target = InstX86Base::getTarget(Func);
   Asm->movq(DestMem->toAsmAddress(Asm, Target),
-            InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum()));
+            Traits::getEncodedXmm(SrcVar->getRegNum()));
 }
 
-template <class Machine>
-void InstX86StoreQ<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreQ::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -2109,7 +1916,8 @@
   this->getSrc(0)->dump(Func);
 }
 
-template <class Machine> void InstX86Lea<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Lea::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2131,7 +1939,8 @@
   this->getDest()->emit(Func);
 }
 
-template <class Machine> void InstX86Mov<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Mov::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2139,7 +1948,7 @@
   Operand *Src = this->getSrc(0);
   Type SrcTy = Src->getType();
   Type DestTy = this->getDest()->getType();
-  if (InstX86Base<Machine>::Traits::Is64Bit && DestTy == IceType_i64 &&
+  if (Traits::Is64Bit && DestTy == IceType_i64 &&
       llvm::isa<ConstantInteger64>(Src)) {
     Str << "\t"
            "movabs"
@@ -2148,23 +1957,20 @@
     Str << "\t"
            "mov" << (!isScalarFloatingType(DestTy)
                          ? this->getWidthString(DestTy)
-                         : InstX86Base<Machine>::Traits::TypeAttributes[DestTy]
-                               .SdSsString) << "\t";
+                         : Traits::TypeAttributes[DestTy].SdSsString) << "\t";
   }
   // For an integer truncation operation, src is wider than dest. In this case,
   // we use a mov instruction whose data width matches the narrower dest.
   // TODO: This assert disallows usages such as copying a floating
   // point value between a vector and a scalar (which movss is used for). Clean
   // this up.
-  assert(
-      InstX86Base<Machine>::getTarget(Func)->typeWidthInBytesOnStack(DestTy) ==
-      InstX86Base<Machine>::getTarget(Func)->typeWidthInBytesOnStack(SrcTy));
+  assert(InstX86Base::getTarget(Func)->typeWidthInBytesOnStack(DestTy) ==
+         InstX86Base::getTarget(Func)->typeWidthInBytesOnStack(SrcTy));
   const Operand *NewSrc = Src;
   if (auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
     int32_t NewRegNum = Variable::NoRegister;
     if (SrcVar->hasReg())
-      NewRegNum = InstX86Base<Machine>::Traits::getGprForType(
-          DestTy, SrcVar->getRegNum());
+      NewRegNum = Traits::getGprForType(DestTy, SrcVar->getRegNum());
     if (SrcTy != DestTy)
       NewSrc = SrcVar->asType(DestTy, NewRegNum);
   }
@@ -2173,8 +1979,8 @@
   this->getDest()->emit(Func);
 }
 
-template <class Machine>
-void InstX86Mov<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Mov::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 1);
   const Variable *Dest = this->getDest();
   const Operand *Src = this->getSrc(0);
@@ -2186,75 +1992,64 @@
   // is to find out if Src or Dest are a register, then use that register's
   // type to decide on which emitter set to use. The emitter set will include
   // reg-reg movs, but that case should be unused when the types don't match.
-  static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
-      XmmRegEmitter = {&InstX86Base<Machine>::Traits::Assembler::movss,
-                       &InstX86Base<Machine>::Traits::Assembler::movss};
-  static const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
-      GPRRegEmitter = {&InstX86Base<Machine>::Traits::Assembler::mov,
-                       &InstX86Base<Machine>::Traits::Assembler::mov,
-                       &InstX86Base<Machine>::Traits::Assembler::mov};
-  static const typename InstX86Base<
-      Machine>::Traits::Assembler::GPREmitterAddrOp GPRAddrEmitter = {
-      &InstX86Base<Machine>::Traits::Assembler::mov,
-      &InstX86Base<Machine>::Traits::Assembler::mov};
+  static const XmmEmitterRegOp XmmRegEmitter = {&Assembler::movss,
+                                                &Assembler::movss};
+  static const GPREmitterRegOp GPRRegEmitter = {
+      &Assembler::mov, &Assembler::mov, &Assembler::mov};
+  static const GPREmitterAddrOp GPRAddrEmitter = {&Assembler::mov,
+                                                  &Assembler::mov};
   // For an integer truncation operation, src is wider than dest. In this case,
   // we use a mov instruction whose data width matches the narrower dest.
   // TODO: This assert disallows usages such as copying a floating
   // point value between a vector and a scalar (which movss is used for). Clean
   // this up.
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+  auto *Target = InstX86Base::getTarget(Func);
   assert(Target->typeWidthInBytesOnStack(this->getDest()->getType()) ==
          Target->typeWidthInBytesOnStack(Src->getType()));
   if (Dest->hasReg()) {
     if (isScalarFloatingType(DestTy)) {
-      emitIASRegOpTyXMM<Machine>(Func, DestTy, Dest, Src, XmmRegEmitter);
+      emitIASRegOpTyXMM(Func, DestTy, Dest, Src, XmmRegEmitter);
       return;
     } else {
       assert(isScalarIntegerType(DestTy));
       // Widen DestTy for truncation (see above note). We should only do this
       // when both Src and Dest are integer types.
-      if (InstX86Base<Machine>::Traits::Is64Bit && DestTy == IceType_i64) {
+      if (Traits::Is64Bit && DestTy == IceType_i64) {
         if (const auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src)) {
-          Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>()
-              ->movabs(InstX86Base<Machine>::Traits::getEncodedGPR(
-                           Dest->getRegNum()),
-                       C64->getValue());
+          Func->getAssembler<Assembler>()->movabs(
+              Traits::getEncodedGPR(Dest->getRegNum()), C64->getValue());
           return;
         }
       }
       if (isScalarIntegerType(SrcTy)) {
         SrcTy = DestTy;
       }
-      emitIASRegOpTyGPR<Machine>(Func, DestTy, Dest, Src, GPRRegEmitter);
+      emitIASRegOpTyGPR(Func, DestTy, Dest, Src, GPRRegEmitter);
       return;
     }
   } else {
     // Dest must be Stack and Src *could* be a register. Use Src's type to
     // decide on the emitters.
-    typename InstX86Base<Machine>::Traits::Address StackAddr(
-        Target->stackVarToAsmOperand(Dest));
+    Address StackAddr(Target->stackVarToAsmOperand(Dest));
     if (isScalarFloatingType(SrcTy)) {
       // Src must be a register.
       const auto *SrcVar = llvm::cast<Variable>(Src);
       assert(SrcVar->hasReg());
-      typename InstX86Base<Machine>::Traits::Assembler *Asm =
-          Func->getAssembler<
-              typename InstX86Base<Machine>::Traits::Assembler>();
-      Asm->movss(SrcTy, StackAddr, InstX86Base<Machine>::Traits::getEncodedXmm(
-                                       SrcVar->getRegNum()));
+      Assembler *Asm = Func->getAssembler<Assembler>();
+      Asm->movss(SrcTy, StackAddr, Traits::getEncodedXmm(SrcVar->getRegNum()));
       return;
     } else {
       // Src can be a register or immediate.
       assert(isScalarIntegerType(SrcTy));
-      emitIASAddrOpTyGPR<Machine>(Func, SrcTy, StackAddr, Src, GPRAddrEmitter);
+      emitIASAddrOpTyGPR(Func, SrcTy, StackAddr, Src, GPRAddrEmitter);
       return;
     }
     return;
   }
 }
 
-template <class Machine>
-void InstX86Movd<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movd::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   assert(this->getSrcSize() == 1);
@@ -2274,36 +2069,31 @@
     return;
   }
 
-  InstX86BaseUnaryopXmm<Machine, InstX86Base<Machine>::Movd>::emit(Func);
+  InstX86BaseUnaryopXmm<InstX86Base::Movd>::emit(Func);
 }
 
-template <class Machine>
-void InstX86Movd<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movd::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   assert(this->getSrcSize() == 1);
   const Variable *Dest = this->getDest();
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+  auto *Target = InstX86Base::getTarget(Func);
   // For insert/extract element (one of Src/Dest is an Xmm vector and the other
   // is an int type).
   if (const auto *SrcVar = llvm::dyn_cast<Variable>(this->getSrc(0))) {
     if (SrcVar->getType() == IceType_i32 ||
-        (InstX86Base<Machine>::Traits::Is64Bit &&
-         SrcVar->getType() == IceType_i64)) {
+        (Traits::Is64Bit && SrcVar->getType() == IceType_i64)) {
       assert(isVectorType(Dest->getType()) ||
              (isScalarFloatingType(Dest->getType()) &&
               typeWidthInBytes(SrcVar->getType()) ==
                   typeWidthInBytes(Dest->getType())));
       assert(Dest->hasReg());
-      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister DestReg =
-          InstX86Base<Machine>::Traits::getEncodedXmm(Dest->getRegNum());
+      XmmRegister DestReg = Traits::getEncodedXmm(Dest->getRegNum());
       if (SrcVar->hasReg()) {
-        Asm->movd(
-            SrcVar->getType(), DestReg,
-            InstX86Base<Machine>::Traits::getEncodedGPR(SrcVar->getRegNum()));
+        Asm->movd(SrcVar->getType(), DestReg,
+                  Traits::getEncodedGPR(SrcVar->getRegNum()));
       } else {
-        typename InstX86Base<Machine>::Traits::Address StackAddr(
-            Target->stackVarToAsmOperand(SrcVar));
+        Address StackAddr(Target->stackVarToAsmOperand(SrcVar));
         Asm->movd(SrcVar->getType(), DestReg, StackAddr);
       }
     } else {
@@ -2313,33 +2103,26 @@
                   typeWidthInBytes(Dest->getType())));
       assert(SrcVar->hasReg());
       assert(Dest->getType() == IceType_i32 ||
-             (InstX86Base<Machine>::Traits::Is64Bit &&
-              Dest->getType() == IceType_i64));
-      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister SrcReg =
-          InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum());
+             (Traits::Is64Bit && Dest->getType() == IceType_i64));
+      XmmRegister SrcReg = Traits::getEncodedXmm(SrcVar->getRegNum());
       if (Dest->hasReg()) {
-        Asm->movd(Dest->getType(), InstX86Base<Machine>::Traits::getEncodedGPR(
-                                       Dest->getRegNum()),
+        Asm->movd(Dest->getType(), Traits::getEncodedGPR(Dest->getRegNum()),
                   SrcReg);
       } else {
-        typename InstX86Base<Machine>::Traits::Address StackAddr(
-            Target->stackVarToAsmOperand(Dest));
+        Address StackAddr(Target->stackVarToAsmOperand(Dest));
         Asm->movd(Dest->getType(), StackAddr, SrcReg);
       }
     }
   } else {
     assert(Dest->hasReg());
-    typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister DestReg =
-        InstX86Base<Machine>::Traits::getEncodedXmm(Dest->getRegNum());
-    auto *Mem =
-        llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
-            this->getSrc(0));
+    XmmRegister DestReg = Traits::getEncodedXmm(Dest->getRegNum());
+    auto *Mem = llvm::cast<X86OperandMem>(this->getSrc(0));
     Asm->movd(Mem->getType(), DestReg, Mem->toAsmAddress(Asm, Target));
   }
 }
 
-template <class Machine>
-void InstX86Movp<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movp::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   // TODO(wala,stichnot): movups works with all vector operands, but there
@@ -2354,22 +2137,19 @@
   this->getDest()->emit(Func);
 }
 
-template <class Machine>
-void InstX86Movp<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movp::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 1);
   assert(isVectorType(this->getDest()->getType()));
   const Variable *Dest = this->getDest();
   const Operand *Src = this->getSrc(0);
-  static const typename InstX86Base<
-      Machine>::Traits::Assembler::XmmEmitterMovOps Emitter = {
-      &InstX86Base<Machine>::Traits::Assembler::movups,
-      &InstX86Base<Machine>::Traits::Assembler::movups,
-      &InstX86Base<Machine>::Traits::Assembler::movups};
-  emitIASMovlikeXMM<Machine>(Func, Dest, Src, Emitter);
+  static const XmmEmitterMovOps Emitter = {
+      &Assembler::movups, &Assembler::movups, &Assembler::movups};
+  emitIASMovlikeXMM(Func, Dest, Src, Emitter);
 }
 
-template <class Machine>
-void InstX86Movq<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movq::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2384,23 +2164,20 @@
   this->getDest()->emit(Func);
 }
 
-template <class Machine>
-void InstX86Movq<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movq::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 1);
   assert(this->getDest()->getType() == IceType_i64 ||
          this->getDest()->getType() == IceType_f64);
   const Variable *Dest = this->getDest();
   const Operand *Src = this->getSrc(0);
-  static const typename InstX86Base<
-      Machine>::Traits::Assembler::XmmEmitterMovOps Emitter = {
-      &InstX86Base<Machine>::Traits::Assembler::movq,
-      &InstX86Base<Machine>::Traits::Assembler::movq,
-      &InstX86Base<Machine>::Traits::Assembler::movq};
-  emitIASMovlikeXMM<Machine>(Func, Dest, Src, Emitter);
+  static const XmmEmitterMovOps Emitter = {&Assembler::movq, &Assembler::movq,
+                                           &Assembler::movq};
+  emitIASMovlikeXMM(Func, Dest, Src, Emitter);
 }
 
-template <class Machine>
-void InstX86MovssRegs<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86MovssRegs::emitIAS(const Cfg *Func) const {
   // This is Binop variant is only intended to be used for reg-reg moves where
   // part of the Dest register is untouched.
   assert(this->getSrcSize() == 2);
@@ -2408,15 +2185,13 @@
   assert(Dest == this->getSrc(0));
   const auto *SrcVar = llvm::cast<Variable>(this->getSrc(1));
   assert(Dest->hasReg() && SrcVar->hasReg());
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
-  Asm->movss(IceType_f32,
-             InstX86Base<Machine>::Traits::getEncodedXmm(Dest->getRegNum()),
-             InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum()));
+  Assembler *Asm = Func->getAssembler<Assembler>();
+  Asm->movss(IceType_f32, Traits::getEncodedXmm(Dest->getRegNum()),
+             Traits::getEncodedXmm(SrcVar->getRegNum()));
 }
 
-template <class Machine>
-void InstX86Movsx<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movsx::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 1);
   const Variable *Dest = this->getDest();
   const Operand *Src = this->getSrc(0);
@@ -2426,15 +2201,14 @@
   Type SrcTy = Src->getType();
   assert(typeWidthInBytes(Dest->getType()) > 1);
   assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
-  emitIASRegOpTyGPR<Machine, false, true>(Func, SrcTy, Dest, Src,
-                                          this->Emitter);
+  emitIASRegOpTyGPR<false, true>(Func, SrcTy, Dest, Src, this->Emitter);
 }
 
-template <class Machine>
-void InstX86Movzx<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movzx::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
-  if (InstX86Base<Machine>::Traits::Is64Bit) {
+  if (Traits::Is64Bit) {
     // There's no movzx %eXX, %rXX. To zero extend 32- to 64-bits, we emit a
     // mov %eXX, %eXX. The processor will still do a movzx[bw]q.
     assert(this->getSrcSize() == 1);
@@ -2447,29 +2221,29 @@
              "\t";
       Src->emit(Func);
       Str << ", ";
-      Dest->asType(IceType_i32, InstX86Base<Machine>::Traits::getGprForType(
-                                    IceType_i32, Dest->getRegNum()))
+      Dest->asType(IceType_i32,
+                   Traits::getGprForType(IceType_i32, Dest->getRegNum()))
           ->emit(Func);
       Str << " /* movzx */";
       return;
     }
   }
-  InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movzx>::emit(Func);
+  InstX86BaseUnaryopGPR<InstX86Base::Movzx>::emit(Func);
 }
 
-template <class Machine>
-void InstX86Movzx<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Movzx::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 1);
   const Variable *Dest = this->getDest();
   const Operand *Src = this->getSrc(0);
   Type SrcTy = Src->getType();
   assert(typeWidthInBytes(Dest->getType()) > 1);
   assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
-  emitIASRegOpTyGPR<Machine, false, true>(Func, SrcTy, Dest, Src,
-                                          this->Emitter);
+  emitIASRegOpTyGPR<false, true>(Func, SrcTy, Dest, Src, this->Emitter);
 }
 
-template <class Machine> void InstX86Nop<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Nop::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2478,22 +2252,23 @@
          "nop\t/* variant = " << Variant << " */";
 }
 
-template <class Machine>
-void InstX86Nop<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Nop::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   // TODO: Emit the right code for the variant.
   Asm->nop();
 }
 
-template <class Machine> void InstX86Nop<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Nop::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "nop (variant = " << Variant << ")";
 }
 
-template <class Machine> void InstX86Fld<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Fld::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2505,8 +2280,7 @@
     // stack slot.  Function prolog emission guarantees that there is sufficient
     // space to do this.
     Str << "\t"
-           "mov" << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString
-        << "\t";
+           "mov" << Traits::TypeAttributes[Ty].SdSsString << "\t";
     Var->emit(Func);
     Str << ", (%esp)\n"
            "\t"
@@ -2519,44 +2293,38 @@
   this->getSrc(0)->emit(Func);
 }
 
-template <class Machine>
-void InstX86Fld<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Fld::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   assert(this->getSrcSize() == 1);
   const Operand *Src = this->getSrc(0);
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+  auto *Target = InstX86Base::getTarget(Func);
   Type Ty = Src->getType();
   if (const auto *Var = llvm::dyn_cast<Variable>(Src)) {
     if (Var->hasReg()) {
       // This is a physical xmm register, so we need to spill it to a temporary
       // stack slot.  Function prolog emission guarantees that there is
       // sufficient space to do this.
-      typename InstX86Base<Machine>::Traits::Address StackSlot =
-          typename InstX86Base<Machine>::Traits::Address(
-              InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, 0,
-              AssemblerFixup::NoFixup);
-      Asm->movss(Ty, StackSlot,
-                 InstX86Base<Machine>::Traits::getEncodedXmm(Var->getRegNum()));
+      Address StackSlot =
+          Address(RegisterSet::Encoded_Reg_esp, 0, AssemblerFixup::NoFixup);
+      Asm->movss(Ty, StackSlot, Traits::getEncodedXmm(Var->getRegNum()));
       Asm->fld(Ty, StackSlot);
     } else {
-      typename InstX86Base<Machine>::Traits::Address StackAddr(
-          Target->stackVarToAsmOperand(Var));
+      Address StackAddr(Target->stackVarToAsmOperand(Var));
       Asm->fld(Ty, StackAddr);
     }
-  } else if (const auto *Mem = llvm::dyn_cast<
-                 typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
-    assert(Mem->getSegmentRegister() ==
-           InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+  } else if (const auto *Mem = llvm::dyn_cast<X86OperandMem>(Src)) {
+    assert(Mem->getSegmentRegister() == X86OperandMem::DefaultSegment);
     Asm->fld(Ty, Mem->toAsmAddress(Asm, Target));
   } else if (const auto *Imm = llvm::dyn_cast<Constant>(Src)) {
-    Asm->fld(Ty, InstX86Base<Machine>::Traits::Address::ofConstPool(Asm, Imm));
+    Asm->fld(Ty, Traits::Address::ofConstPool(Asm, Imm));
   } else {
     llvm_unreachable("Unexpected operand type");
   }
 }
 
-template <class Machine> void InstX86Fld<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Fld::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -2564,8 +2332,8 @@
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Fstp<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Fstp::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2593,48 +2361,41 @@
          "fstp" << this->getFldString(Ty) << "\t"
                                              "(%esp)\n";
   Str << "\t"
-         "mov" << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString
-      << "\t"
-         "(%esp), ";
+         "mov" << Traits::TypeAttributes[Ty].SdSsString << "\t"
+                                                           "(%esp), ";
   this->getDest()->emit(Func);
 }
 
-template <class Machine>
-void InstX86Fstp<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Fstp::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   assert(this->getSrcSize() == 0);
   const Variable *Dest = this->getDest();
   // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
   // "partially" delete the fstp if the Dest is unused. Even if Dest is unused,
   // the fstp should be kept for the SideEffects of popping the stack.
   if (!Dest) {
-    Asm->fstp(InstX86Base<Machine>::Traits::RegisterSet::getEncodedSTReg(0));
+    Asm->fstp(RegisterSet::getEncodedSTReg(0));
     return;
   }
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+  auto *Target = InstX86Base::getTarget(Func);
   Type Ty = Dest->getType();
   if (!Dest->hasReg()) {
-    typename InstX86Base<Machine>::Traits::Address StackAddr(
-        Target->stackVarToAsmOperand(Dest));
+    Address StackAddr(Target->stackVarToAsmOperand(Dest));
     Asm->fstp(Ty, StackAddr);
   } else {
     // Dest is a physical (xmm) register, so st(0) needs to go through memory.
     // Hack this by using caller-reserved memory at the top of stack, spilling
     // st(0) there, and loading it into the xmm register.
-    typename InstX86Base<Machine>::Traits::Address StackSlot =
-        typename InstX86Base<Machine>::Traits::Address(
-            InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, 0,
-            AssemblerFixup::NoFixup);
+    Address StackSlot =
+        Address(RegisterSet::Encoded_Reg_esp, 0, AssemblerFixup::NoFixup);
     Asm->fstp(Ty, StackSlot);
-    Asm->movss(Ty,
-               InstX86Base<Machine>::Traits::getEncodedXmm(Dest->getRegNum()),
-               StackSlot);
+    Asm->movss(Ty, Traits::getEncodedXmm(Dest->getRegNum()), StackSlot);
   }
 }
 
-template <class Machine>
-void InstX86Fstp<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Fstp::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -2642,17 +2403,15 @@
   Str << " = fstp." << this->getDest()->getType() << ", st(0)";
 }
 
-template <class Machine>
-void InstX86Pextr<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Pextr::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(this->getSrcSize() == 2);
   // pextrb and pextrd are SSE4.1 instructions.
   Str << "\t" << this->Opcode
-      << InstX86Base<Machine>::Traits::TypeAttributes[this->getSrc(0)
-                                                          ->getType()]
-             .PackString << "\t";
+      << Traits::TypeAttributes[this->getSrc(0)->getType()].PackString << "\t";
   this->getSrc(1)->emit(Func);
   Str << ", ";
   this->getSrc(0)->emit(Func);
@@ -2665,50 +2424,40 @@
   Dest->asType(IceType_i32, Dest->getRegNum())->emit(Func);
 }
 
-template <class Machine>
-void InstX86Pextr<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Pextr::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
   // pextrb and pextrd are SSE4.1 instructions.
   const Variable *Dest = this->getDest();
-  Type DispatchTy = InstX86Base<Machine>::Traits::getInVectorElementType(
-      this->getSrc(0)->getType());
+  Type DispatchTy = Traits::getInVectorElementType(this->getSrc(0)->getType());
   // pextrw must take a register dest. There is an SSE4.1 version that takes a
   // memory dest, but we aren't using it. For uniformity, just restrict them
   // all to have a register dest for now.
   assert(Dest->hasReg());
   // pextrw's Src(0) must be a register (both SSE4.1 and SSE2).
   assert(llvm::cast<Variable>(this->getSrc(0))->hasReg());
-  static const typename InstX86Base<Machine>::Traits::Assembler::
-      template ThreeOpImmEmitter<
-          typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
-          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister>
-          Emitter = {&InstX86Base<Machine>::Traits::Assembler::pextr, nullptr};
-  emitIASThreeOpImmOps<
-      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
-      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-      InstX86Base<Machine>::Traits::getEncodedGPR,
-      InstX86Base<Machine>::Traits::getEncodedXmm>(
+  static const ThreeOpImmEmitter<GPRRegister, XmmRegister> Emitter = {
+      &Assembler::pextr, nullptr};
+  emitIASThreeOpImmOps<GPRRegister, XmmRegister, Traits::getEncodedGPR,
+                       Traits::getEncodedXmm>(
       Func, DispatchTy, Dest, this->getSrc(0), this->getSrc(1), Emitter);
 }
 
-template <class Machine>
-void InstX86Pinsr<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Pinsr::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(this->getSrcSize() == 3);
   Str << "\t" << this->Opcode
-      << InstX86Base<
-             Machine>::Traits::TypeAttributes[this->getDest()->getType()]
-             .PackString << "\t";
+      << Traits::TypeAttributes[this->getDest()->getType()].PackString << "\t";
   this->getSrc(2)->emit(Func);
   Str << ", ";
   Operand *Src1 = this->getSrc(1);
   if (const auto *Src1Var = llvm::dyn_cast<Variable>(Src1)) {
     // If src1 is a register, it should always be r32.
     if (Src1Var->hasReg()) {
-      int32_t NewRegNum =
-          InstX86Base<Machine>::Traits::getBaseReg(Src1Var->getRegNum());
+      int32_t NewRegNum = Traits::getBaseReg(Src1Var->getRegNum());
       const Variable *NewSrc = Src1Var->asType(IceType_i32, NewRegNum);
       NewSrc->emit(Func);
     } else {
@@ -2721,8 +2470,8 @@
   this->getDest()->emit(Func);
 }
 
-template <class Machine>
-void InstX86Pinsr<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Pinsr::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 3);
   assert(this->getDest() == this->getSrc(0));
   // pinsrb and pinsrd are SSE4.1 instructions.
@@ -2736,67 +2485,47 @@
     if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0)) {
       if (Src0Var->hasReg()) {
         int32_t RegNum = Src0Var->getRegNum();
-        int32_t BaseRegNum = InstX86Base<Machine>::Traits::getBaseReg(RegNum);
+        int32_t BaseRegNum = Traits::getBaseReg(RegNum);
         (void)BaseRegNum;
-        assert(InstX86Base<Machine>::Traits::getEncodedGPR(RegNum) ==
-               InstX86Base<Machine>::Traits::getEncodedGPR(BaseRegNum));
+        assert(Traits::getEncodedGPR(RegNum) ==
+               Traits::getEncodedGPR(BaseRegNum));
       }
     }
   }
-  static const typename InstX86Base<Machine>::Traits::Assembler::
-      template ThreeOpImmEmitter<
-          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-          typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister>
-          Emitter = {&InstX86Base<Machine>::Traits::Assembler::pinsr,
-                     &InstX86Base<Machine>::Traits::Assembler::pinsr};
-  emitIASThreeOpImmOps<
-      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-      typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
-      InstX86Base<Machine>::Traits::getEncodedXmm,
-      InstX86Base<Machine>::Traits::getEncodedGPR>(
-      Func, DispatchTy, this->getDest(), Src0, this->getSrc(2), Emitter);
+  static const ThreeOpImmEmitter<XmmRegister, GPRRegister> Emitter = {
+      &Assembler::pinsr, &Assembler::pinsr};
+  emitIASThreeOpImmOps<XmmRegister, GPRRegister, Traits::getEncodedXmm,
+                       Traits::getEncodedGPR>(Func, DispatchTy, this->getDest(),
+                                              Src0, this->getSrc(2), Emitter);
 }
 
-template <class Machine>
-void InstX86Pshufd<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Pshufd::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
   const Variable *Dest = this->getDest();
   Type Ty = Dest->getType();
-  static const typename InstX86Base<Machine>::Traits::Assembler::
-      template ThreeOpImmEmitter<
-          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister>
-          Emitter = {&InstX86Base<Machine>::Traits::Assembler::pshufd,
-                     &InstX86Base<Machine>::Traits::Assembler::pshufd};
-  emitIASThreeOpImmOps<
-      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-      InstX86Base<Machine>::Traits::getEncodedXmm,
-      InstX86Base<Machine>::Traits::getEncodedXmm>(
-      Func, Ty, Dest, this->getSrc(0), this->getSrc(1), Emitter);
+  static const ThreeOpImmEmitter<XmmRegister, XmmRegister> Emitter = {
+      &Assembler::pshufd, &Assembler::pshufd};
+  emitIASThreeOpImmOps<XmmRegister, XmmRegister, Traits::getEncodedXmm,
+                       Traits::getEncodedXmm>(Func, Ty, Dest, this->getSrc(0),
+                                              this->getSrc(1), Emitter);
 }
 
-template <class Machine>
-void InstX86Shufps<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Shufps::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 3);
   const Variable *Dest = this->getDest();
   assert(Dest == this->getSrc(0));
   Type Ty = Dest->getType();
-  static const typename InstX86Base<Machine>::Traits::Assembler::
-      template ThreeOpImmEmitter<
-          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-          typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister>
-          Emitter = {&InstX86Base<Machine>::Traits::Assembler::shufps,
-                     &InstX86Base<Machine>::Traits::Assembler::shufps};
-  emitIASThreeOpImmOps<
-      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-      typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
-      InstX86Base<Machine>::Traits::getEncodedXmm,
-      InstX86Base<Machine>::Traits::getEncodedXmm>(
-      Func, Ty, Dest, this->getSrc(1), this->getSrc(2), Emitter);
+  static const ThreeOpImmEmitter<XmmRegister, XmmRegister> Emitter = {
+      &Assembler::shufps, &Assembler::shufps};
+  emitIASThreeOpImmOps<XmmRegister, XmmRegister, Traits::getEncodedXmm,
+                       Traits::getEncodedXmm>(Func, Ty, Dest, this->getSrc(1),
+                                              this->getSrc(2), Emitter);
 }
 
-template <class Machine> void InstX86Pop<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Pop::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2806,21 +2535,20 @@
   this->getDest()->emit(Func);
 }
 
-template <class Machine>
-void InstX86Pop<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Pop::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 0);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Assembler *Asm = Func->getAssembler<Assembler>();
   if (this->getDest()->hasReg()) {
-    Asm->popl(InstX86Base<Machine>::Traits::getEncodedGPR(
-        this->getDest()->getRegNum()));
+    Asm->popl(Traits::getEncodedGPR(this->getDest()->getRegNum()));
   } else {
-    auto *Target = InstX86Base<Machine>::getTarget(Func);
+    auto *Target = InstX86Base::getTarget(Func);
     Asm->popl(Target->stackVarToAsmOperand(this->getDest()));
   }
 }
 
-template <class Machine> void InstX86Pop<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Pop::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -2828,8 +2556,8 @@
   Str << " = pop." << this->getDest()->getType() << " ";
 }
 
-template <class Machine>
-void InstX86Push<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Push::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2842,19 +2570,18 @@
   Var->emit(Func);
 }
 
-template <class Machine>
-void InstX86Push<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Push::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 1);
   // Push is currently only used for saving GPRs.
   const auto *Var = llvm::cast<Variable>(this->getSrc(0));
   assert(Var->hasReg());
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
-  Asm->pushl(InstX86Base<Machine>::Traits::getEncodedGPR(Var->getRegNum()));
+  Assembler *Asm = Func->getAssembler<Assembler>();
+  Asm->pushl(Traits::getEncodedGPR(Var->getRegNum()));
 }
 
-template <class Machine>
-void InstX86Push<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Push::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -2862,7 +2589,8 @@
   this->dumpSources(Func);
 }
 
-template <class Machine> void InstX86Ret<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Ret::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2870,14 +2598,14 @@
          "ret";
 }
 
-template <class Machine>
-void InstX86Ret<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Ret::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   Asm->ret();
 }
 
-template <class Machine> void InstX86Ret<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Ret::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -2887,47 +2615,42 @@
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Setcc<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Setcc::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   Str << "\t"
-         "set"
-      << InstX86Base<Machine>::Traits::InstBrAttributes[Condition].DisplayString
-      << "\t";
+         "set" << Traits::InstBrAttributes[Condition].DisplayString << "\t";
   this->Dest->emit(Func);
 }
 
-template <class Machine>
-void InstX86Setcc<Machine>::emitIAS(const Cfg *Func) const {
-  assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Setcc::emitIAS(const Cfg *Func) const {
+  assert(Condition != Cond::Br_None);
   assert(this->getDest()->getType() == IceType_i1);
   assert(this->getSrcSize() == 0);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
+  Assembler *Asm = Func->getAssembler<Assembler>();
+  auto *Target = InstX86Base::getTarget(Func);
   if (this->getDest()->hasReg())
-    Asm->setcc(Condition, InstX86Base<Machine>::Traits::getEncodedByteReg(
-                              this->getDest()->getRegNum()));
+    Asm->setcc(Condition,
+               Traits::getEncodedByteReg(this->getDest()->getRegNum()));
   else
     Asm->setcc(Condition, Target->stackVarToAsmOperand(this->getDest()));
   return;
 }
 
-template <class Machine>
-void InstX86Setcc<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Setcc::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
-  Str << "setcc."
-      << InstX86Base<Machine>::Traits::InstBrAttributes[Condition].DisplayString
-      << " ";
+  Str << "setcc." << Traits::InstBrAttributes[Condition].DisplayString << " ";
   this->dumpDest(Func);
 }
 
-template <class Machine>
-void InstX86Xadd<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Xadd::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2942,29 +2665,23 @@
   this->getSrc(0)->emit(Func);
 }
 
-template <class Machine>
-void InstX86Xadd<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Xadd::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Assembler *Asm = Func->getAssembler<Assembler>();
   Type Ty = this->getSrc(0)->getType();
-  const auto Mem =
-      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
-          this->getSrc(0));
-  assert(Mem->getSegmentRegister() ==
-         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
-  const typename InstX86Base<Machine>::Traits::Address Addr =
-      Mem->toAsmAddress(Asm, Target);
+  const auto Mem = llvm::cast<X86OperandMem>(this->getSrc(0));
+  assert(Mem->getSegmentRegister() == X86OperandMem::DefaultSegment);
+  auto *Target = InstX86Base::getTarget(Func);
+  const Address Addr = Mem->toAsmAddress(Asm, Target);
   const auto *VarReg = llvm::cast<Variable>(this->getSrc(1));
   assert(VarReg->hasReg());
-  const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg =
-      InstX86Base<Machine>::Traits::getEncodedGPR(VarReg->getRegNum());
+  const GPRRegister Reg = Traits::getEncodedGPR(VarReg->getRegNum());
   Asm->xadd(Ty, Addr, Reg, this->Locked);
 }
 
-template <class Machine>
-void InstX86Xadd<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Xadd::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -2976,8 +2693,8 @@
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86Xchg<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Xchg::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -2988,38 +2705,31 @@
   this->getSrc(0)->emit(Func);
 }
 
-template <class Machine>
-void InstX86Xchg<Machine>::emitIAS(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Xchg::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+  Assembler *Asm = Func->getAssembler<Assembler>();
   Type Ty = this->getSrc(0)->getType();
   const auto *VarReg1 = llvm::cast<Variable>(this->getSrc(1));
   assert(VarReg1->hasReg());
-  const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg1 =
-      InstX86Base<Machine>::Traits::getEncodedGPR(VarReg1->getRegNum());
+  const GPRRegister Reg1 = Traits::getEncodedGPR(VarReg1->getRegNum());
 
   if (const auto *VarReg0 = llvm::dyn_cast<Variable>(this->getSrc(0))) {
     assert(VarReg0->hasReg());
-    const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg0 =
-        InstX86Base<Machine>::Traits::getEncodedGPR(VarReg0->getRegNum());
+    const GPRRegister Reg0 = Traits::getEncodedGPR(VarReg0->getRegNum());
     Asm->xchg(Ty, Reg0, Reg1);
     return;
   }
 
-  const auto *Mem =
-      llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
-          this->getSrc(0));
-  assert(Mem->getSegmentRegister() ==
-         InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
-  auto *Target = InstX86Base<Machine>::getTarget(Func);
-  const typename InstX86Base<Machine>::Traits::Address Addr =
-      Mem->toAsmAddress(Asm, Target);
+  const auto *Mem = llvm::cast<X86OperandMem>(this->getSrc(0));
+  assert(Mem->getSegmentRegister() == X86OperandMem::DefaultSegment);
+  auto *Target = InstX86Base::getTarget(Func);
+  const Address Addr = Mem->toAsmAddress(Asm, Target);
   Asm->xchg(Ty, Addr, Reg1);
 }
 
-template <class Machine>
-void InstX86Xchg<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Xchg::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
@@ -3028,8 +2738,8 @@
   this->dumpSources(Func);
 }
 
-template <class Machine>
-void InstX86IacaStart<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86IacaStart::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -3040,23 +2750,22 @@
          "\t.byte 0x64, 0x67, 0x90";
 }
 
-template <class Machine>
-void InstX86IacaStart<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86IacaStart::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   Asm->iaca_start();
 }
 
-template <class Machine>
-void InstX86IacaStart<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86IacaStart::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "IACA_START";
 }
 
-template <class Machine>
-void InstX86IacaEnd<Machine>::emit(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86IacaEnd::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -3067,22 +2776,21 @@
          "\t.byte 0x0F, 0x0B";
 }
 
-template <class Machine>
-void InstX86IacaEnd<Machine>::emitIAS(const Cfg *Func) const {
-  typename InstX86Base<Machine>::Traits::Assembler *Asm =
-      Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86IacaEnd::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
   Asm->iaca_end();
 }
 
-template <class Machine>
-void InstX86IacaEnd<Machine>::dump(const Cfg *Func) const {
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86IacaEnd::dump(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "IACA_END";
 }
 
-} // end of namespace X86Internal
+} // end of namespace X86NAMESPACE
 
 } // end of namespace Ice
 
diff --git a/src/IceRegistersARM32.h b/src/IceRegistersARM32.h
index d601e73..b08b5b9 100644
--- a/src/IceRegistersARM32.h
+++ b/src/IceRegistersARM32.h
@@ -21,6 +21,7 @@
 #include "IceTypes.h"
 
 namespace Ice {
+namespace ARM32 {
 
 class RegARM32 {
 public:
@@ -136,6 +137,7 @@
 // Extend enum RegClass with ARM32-specific register classes (if any).
 enum RegClassARM32 : uint8_t { RCARM32_NUM = RC_Target };
 
+} // end of namespace ARM32
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICEREGISTERSARM32_H
diff --git a/src/IceRegistersMIPS32.h b/src/IceRegistersMIPS32.h
index 1dcceb7..8d519d7 100644
--- a/src/IceRegistersMIPS32.h
+++ b/src/IceRegistersMIPS32.h
@@ -21,7 +21,7 @@
 #include "IceTypes.h"
 
 namespace Ice {
-
+namespace MIPS32 {
 namespace RegMIPS32 {
 
 /// An enum of every register. The enum value may not match the encoding
@@ -63,6 +63,7 @@
 // Extend enum RegClass with MIPS32-specific register classes (if any).
 enum RegClassMIPS32 : uint8_t { RCMIPS32_NUM = RC_Target };
 
+} // end of namespace MIPS32
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICEREGISTERSMIPS32_H
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index d83530d..c0a2f57 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -37,23 +37,24 @@
 
 namespace ARM32 {
 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
-  return ::Ice::TargetARM32::create(Func);
+  return ::Ice::ARM32::TargetARM32::create(Func);
 }
 
 std::unique_ptr<::Ice::TargetDataLowering>
 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
-  return ::Ice::TargetDataARM32::create(Ctx);
+  return ::Ice::ARM32::TargetDataARM32::create(Ctx);
 }
 
 std::unique_ptr<::Ice::TargetHeaderLowering>
 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
-  return ::Ice::TargetHeaderARM32::create(Ctx);
+  return ::Ice::ARM32::TargetHeaderARM32::create(Ctx);
 }
 
-void staticInit() { ::Ice::TargetARM32::staticInit(); }
+void staticInit() { ::Ice::ARM32::TargetARM32::staticInit(); }
 } // end of namespace ARM32
 
 namespace Ice {
+namespace ARM32 {
 
 namespace {
 
@@ -6447,4 +6448,5 @@
 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
 llvm::SmallBitVector TargetARM32::ScratchRegs;
 
+} // end of namespace ARM32
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index fef162d..5263bc2 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -25,6 +25,7 @@
 #include "llvm/ADT/SmallBitVector.h"
 
 namespace Ice {
+namespace ARM32 {
 
 // Class encapsulating ARM cpu features / instruction set.
 class TargetARM32Features {
@@ -1206,6 +1207,7 @@
   TargetARM32Features CPUFeatures;
 };
 
+} // end of namespace ARM32
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 9b9b19c..7ec0363 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -32,23 +32,24 @@
 
 namespace MIPS32 {
 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
-  return ::Ice::TargetMIPS32::create(Func);
+  return ::Ice::MIPS32::TargetMIPS32::create(Func);
 }
 
 std::unique_ptr<::Ice::TargetDataLowering>
 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
-  return ::Ice::TargetDataMIPS32::create(Ctx);
+  return ::Ice::MIPS32::TargetDataMIPS32::create(Ctx);
 }
 
 std::unique_ptr<::Ice::TargetHeaderLowering>
 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
-  return ::Ice::TargetHeaderMIPS32::create(Ctx);
+  return ::Ice::MIPS32::TargetHeaderMIPS32::create(Ctx);
 }
 
-void staticInit() { ::Ice::TargetMIPS32::staticInit(); }
+void staticInit() { ::Ice::MIPS32::TargetMIPS32::staticInit(); }
 } // end of namespace MIPS32
 
 namespace Ice {
+namespace MIPS32 {
 
 using llvm::isInt;
 
@@ -1100,4 +1101,5 @@
 llvm::SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
 llvm::SmallBitVector TargetMIPS32::ScratchRegs;
 
+} // end of namespace MIPS32
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 1c6627e..8a242bd 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -23,6 +23,7 @@
 #include "IceTargetLowering.h"
 
 namespace Ice {
+namespace MIPS32 {
 
 class TargetMIPS32 : public TargetLowering {
   TargetMIPS32() = delete;
@@ -310,6 +311,7 @@
   ~TargetHeaderMIPS32() = default;
 };
 
+} // end of namespace MIPS32
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICETARGETLOWERINGMIPS32_H
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index cc51f46..b145948 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -16,27 +16,27 @@
 #include "IceTargetLoweringX8632.h"
 
 #include "IceTargetLoweringX8632Traits.h"
-#include "IceTargetLoweringX86Base.h"
 
 namespace X8632 {
 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
-  return ::Ice::TargetX8632::create(Func);
+  return ::Ice::X8632::TargetX8632::create(Func);
 }
 
 std::unique_ptr<::Ice::TargetDataLowering>
 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
-  return ::Ice::TargetDataX8632::create(Ctx);
+  return ::Ice::X8632::TargetDataX8632::create(Ctx);
 }
 
 std::unique_ptr<::Ice::TargetHeaderLowering>
 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
-  return ::Ice::TargetHeaderX8632::create(Ctx);
+  return ::Ice::X8632::TargetHeaderX8632::create(Ctx);
 }
 
-void staticInit() { ::Ice::TargetX8632::staticInit(); }
+void staticInit() { ::Ice::X8632::TargetX8632::staticInit(); }
 } // end of namespace X8632
 
 namespace Ice {
+namespace X8632 {
 
 //------------------------------------------------------------------------------
 //      ______   ______     ______     __     ______   ______
@@ -46,51 +46,46 @@
 //         \/_/   \/_/ /_/   \/_/\/_/   \/_/     \/_/   \/_____/
 //
 //------------------------------------------------------------------------------
-namespace X86Internal {
-const MachineTraits<TargetX8632>::TableFcmpType
-    MachineTraits<TargetX8632>::TableFcmp[] = {
+const TargetX8632Traits::TableFcmpType TargetX8632Traits::TableFcmp[] = {
 #define X(val, dflt, swapS, C1, C2, swapV, pred)                               \
   {                                                                            \
     dflt, swapS, X8632::Traits::Cond::C1, X8632::Traits::Cond::C2, swapV,      \
         X8632::Traits::Cond::pred                                              \
   }                                                                            \
   ,
-        FCMPX8632_TABLE
+    FCMPX8632_TABLE
 #undef X
 };
 
-const size_t MachineTraits<TargetX8632>::TableFcmpSize =
-    llvm::array_lengthof(TableFcmp);
+const size_t TargetX8632Traits::TableFcmpSize = llvm::array_lengthof(TableFcmp);
 
-const MachineTraits<TargetX8632>::TableIcmp32Type
-    MachineTraits<TargetX8632>::TableIcmp32[] = {
+const TargetX8632Traits::TableIcmp32Type TargetX8632Traits::TableIcmp32[] = {
 #define X(val, C_32, C1_64, C2_64, C3_64)                                      \
   { X8632::Traits::Cond::C_32 }                                                \
   ,
-        ICMPX8632_TABLE
+    ICMPX8632_TABLE
 #undef X
 };
 
-const size_t MachineTraits<TargetX8632>::TableIcmp32Size =
+const size_t TargetX8632Traits::TableIcmp32Size =
     llvm::array_lengthof(TableIcmp32);
 
-const MachineTraits<TargetX8632>::TableIcmp64Type
-    MachineTraits<TargetX8632>::TableIcmp64[] = {
+const TargetX8632Traits::TableIcmp64Type TargetX8632Traits::TableIcmp64[] = {
 #define X(val, C_32, C1_64, C2_64, C3_64)                                      \
   {                                                                            \
     X8632::Traits::Cond::C1_64, X8632::Traits::Cond::C2_64,                    \
         X8632::Traits::Cond::C3_64                                             \
   }                                                                            \
   ,
-        ICMPX8632_TABLE
+    ICMPX8632_TABLE
 #undef X
 };
 
-const size_t MachineTraits<TargetX8632>::TableIcmp64Size =
+const size_t TargetX8632Traits::TableIcmp64Size =
     llvm::array_lengthof(TableIcmp64);
 
-const MachineTraits<TargetX8632>::TableTypeX8632AttributesType
-    MachineTraits<TargetX8632>::TableTypeX8632Attributes[] = {
+const TargetX8632Traits::TableTypeX8632AttributesType
+    TargetX8632Traits::TableTypeX8632Attributes[] = {
 #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld)             \
   { IceType_##elementty }                                                      \
   ,
@@ -98,26 +93,24 @@
 #undef X
 };
 
-const size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize =
+const size_t TargetX8632Traits::TableTypeX8632AttributesSize =
     llvm::array_lengthof(TableTypeX8632Attributes);
 
-const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;
-const char *MachineTraits<TargetX8632>::TargetName = "X8632";
+const uint32_t TargetX8632Traits::X86_STACK_ALIGNMENT_BYTES = 16;
+const char *TargetX8632Traits::TargetName = "X8632";
 
 template <>
 std::array<llvm::SmallBitVector, RCX86_NUM>
-    TargetX86Base<TargetX8632>::TypeToRegisterSet = {{}};
+    TargetX86Base<X8632::Traits>::TypeToRegisterSet = {{}};
 
 template <>
 std::array<llvm::SmallBitVector,
-           TargetX86Base<TargetX8632>::Traits::RegisterSet::Reg_NUM>
-    TargetX86Base<TargetX8632>::RegisterAliases = {{}};
+           TargetX86Base<X8632::Traits>::Traits::RegisterSet::Reg_NUM>
+    TargetX86Base<X8632::Traits>::RegisterAliases = {{}};
 
 template <>
 llvm::SmallBitVector
-    TargetX86Base<TargetX8632>::ScratchRegs = llvm::SmallBitVector();
-
-} // end of namespace X86Internal
+    TargetX86Base<X8632::Traits>::ScratchRegs = llvm::SmallBitVector();
 
 //------------------------------------------------------------------------------
 //     __      ______  __     __  ______  ______  __  __   __  ______
@@ -1018,4 +1011,5 @@
 } // end of namespace dummy3
 } // end of anonymous namespace
 
+} // end of namespace X8632
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 51b033e..f7ac52e 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -18,16 +18,18 @@
 
 #include "IceAssemblerX8632.h"
 #include "IceDefs.h"
-#include "IceInstX8632.h"
 #include "IceRegistersX8632.h"
 #include "IceTargetLowering.h"
-#include "IceTargetLoweringX8632Traits.h"
+#include "IceInstX8632.h"
+#define X86NAMESPACE X8632
 #include "IceTargetLoweringX86Base.h"
+#undef X86NAMESPACE
+#include "IceTargetLoweringX8632Traits.h"
 
 namespace Ice {
+namespace X8632 {
 
-class TargetX8632 final
-    : public ::Ice::X86Internal::TargetX86Base<TargetX8632> {
+class TargetX8632 final : public ::Ice::X8632::TargetX86Base<X8632::Traits> {
   TargetX8632() = delete;
   TargetX8632(const TargetX8632 &) = delete;
   TargetX8632 &operator=(const TargetX8632 &) = delete;
@@ -55,7 +57,7 @@
 
 private:
   ENABLE_MAKE_UNIQUE;
-  friend class ::Ice::X86Internal::TargetX86Base<TargetX8632>;
+  friend class X8632::TargetX86Base<X8632::Traits>;
 
   Operand *createNaClReadTPSrcOperand() {
     Constant *Zero = Ctx->getConstantZero(IceType_i32);
@@ -108,6 +110,7 @@
   ~TargetHeaderX8632() = default;
 };
 
+} // end of namespace X8632
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8632_H
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index 49a0115..dbd4565 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -30,19 +30,16 @@
 
 namespace Ice {
 
-class TargetX8632;
-
 namespace X8632 {
-class AssemblerX8632;
-} // end of namespace X8632
-
-namespace X86Internal {
+using namespace ::Ice::X86;
 
 template <class Machine> struct Insts;
-template <class Machine> struct MachineTraits;
 template <class Machine> class TargetX86Base;
+template <class Machine> class AssemblerX86Base;
 
-template <> struct MachineTraits<TargetX8632> {
+class TargetX8632;
+
+struct TargetX8632Traits {
   //----------------------------------------------------------------------------
   //     ______  ______  __    __
   //    /\  __ \/\  ___\/\ "-./  \
@@ -51,6 +48,9 @@
   //      \/_/\/_/\/_____/\/_/  \/_/
   //
   //----------------------------------------------------------------------------
+  static constexpr ::Ice::Assembler::AssemblerKind AsmKind =
+      ::Ice::Assembler::Asm_X8632;
+
   static constexpr bool Is64Bit = false;
   static constexpr bool HasPopa = true;
   static constexpr bool HasPusha = true;
@@ -171,7 +171,7 @@
              ((encoding_[0] & 0x07) == reg); // Register codes match.
     }
 
-    template <class> friend class AssemblerX86Base;
+    friend class AssemblerX86Base<TargetX8632Traits>;
   };
 
   class Address : public Operand {
@@ -752,10 +752,12 @@
   //      \/_/\/_/ \/_/\/_____/  \/_/
   //
   //----------------------------------------------------------------------------
-  using Insts = ::Ice::X86Internal::Insts<TargetX8632>;
+  using Traits = TargetX8632Traits;
+  using Insts = ::Ice::X8632::Insts<Traits>;
 
-  using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8632>;
-  using Assembler = X8632::AssemblerX8632;
+  using TargetLowering = ::Ice::X8632::TargetX86Base<Traits>;
+  using ConcreteTarget = ::Ice::X8632::TargetX8632;
+  using Assembler = ::Ice::X8632::AssemblerX86Base<Traits>;
 
   /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem
   /// and VariableSplit.
@@ -932,10 +934,7 @@
   static uint8_t InstSegmentPrefixes[];
 };
 
-} // end of namespace X86Internal
-
-namespace X8632 {
-using Traits = ::Ice::X86Internal::MachineTraits<TargetX8632>;
+using Traits = ::Ice::X8632::TargetX8632Traits;
 } // end of namespace X8632
 
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 236cf48..2008afa 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -16,27 +16,27 @@
 #include "IceTargetLoweringX8664.h"
 
 #include "IceTargetLoweringX8664Traits.h"
-#include "IceTargetLoweringX86Base.h"
 
 namespace X8664 {
 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
-  return ::Ice::TargetX8664::create(Func);
+  return ::Ice::X8664::TargetX8664::create(Func);
 }
 
 std::unique_ptr<::Ice::TargetDataLowering>
 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
-  return ::Ice::TargetDataX8664::create(Ctx);
+  return ::Ice::X8664::TargetDataX8664::create(Ctx);
 }
 
 std::unique_ptr<::Ice::TargetHeaderLowering>
 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
-  return ::Ice::TargetHeaderX8664::create(Ctx);
+  return ::Ice::X8664::TargetHeaderX8664::create(Ctx);
 }
 
-void staticInit() { ::Ice::TargetX8664::staticInit(); }
+void staticInit() { ::Ice::X8664::TargetX8664::staticInit(); }
 } // end of namespace X8664
 
 namespace Ice {
+namespace X8664 {
 
 //------------------------------------------------------------------------------
 //      ______   ______     ______     __     ______   ______
@@ -46,51 +46,46 @@
 //         \/_/   \/_/ /_/   \/_/\/_/   \/_/     \/_/   \/_____/
 //
 //------------------------------------------------------------------------------
-namespace X86Internal {
-const MachineTraits<TargetX8664>::TableFcmpType
-    MachineTraits<TargetX8664>::TableFcmp[] = {
+const TargetX8664Traits::TableFcmpType TargetX8664Traits::TableFcmp[] = {
 #define X(val, dflt, swapS, C1, C2, swapV, pred)                               \
   {                                                                            \
     dflt, swapS, X8664::Traits::Cond::C1, X8664::Traits::Cond::C2, swapV,      \
         X8664::Traits::Cond::pred                                              \
   }                                                                            \
   ,
-        FCMPX8664_TABLE
+    FCMPX8664_TABLE
 #undef X
 };
 
-const size_t MachineTraits<TargetX8664>::TableFcmpSize =
-    llvm::array_lengthof(TableFcmp);
+const size_t TargetX8664Traits::TableFcmpSize = llvm::array_lengthof(TableFcmp);
 
-const MachineTraits<TargetX8664>::TableIcmp32Type
-    MachineTraits<TargetX8664>::TableIcmp32[] = {
+const TargetX8664Traits::TableIcmp32Type TargetX8664Traits::TableIcmp32[] = {
 #define X(val, C_32, C1_64, C2_64, C3_64)                                      \
   { X8664::Traits::Cond::C_32 }                                                \
   ,
-        ICMPX8664_TABLE
+    ICMPX8664_TABLE
 #undef X
 };
 
-const size_t MachineTraits<TargetX8664>::TableIcmp32Size =
+const size_t TargetX8664Traits::TableIcmp32Size =
     llvm::array_lengthof(TableIcmp32);
 
-const MachineTraits<TargetX8664>::TableIcmp64Type
-    MachineTraits<TargetX8664>::TableIcmp64[] = {
+const TargetX8664Traits::TableIcmp64Type TargetX8664Traits::TableIcmp64[] = {
 #define X(val, C_32, C1_64, C2_64, C3_64)                                      \
   {                                                                            \
     X8664::Traits::Cond::C1_64, X8664::Traits::Cond::C2_64,                    \
         X8664::Traits::Cond::C3_64                                             \
   }                                                                            \
   ,
-        ICMPX8664_TABLE
+    ICMPX8664_TABLE
 #undef X
 };
 
-const size_t MachineTraits<TargetX8664>::TableIcmp64Size =
+const size_t TargetX8664Traits::TableIcmp64Size =
     llvm::array_lengthof(TableIcmp64);
 
-const MachineTraits<TargetX8664>::TableTypeX8664AttributesType
-    MachineTraits<TargetX8664>::TableTypeX8664Attributes[] = {
+const TargetX8664Traits::TableTypeX8664AttributesType
+    TargetX8664Traits::TableTypeX8664Attributes[] = {
 #define X(tag, elementty, cvt, sdss, pdps, spsd, pack, width, fld)             \
   { IceType_##elementty }                                                      \
   ,
@@ -98,26 +93,24 @@
 #undef X
 };
 
-const size_t MachineTraits<TargetX8664>::TableTypeX8664AttributesSize =
+const size_t TargetX8664Traits::TableTypeX8664AttributesSize =
     llvm::array_lengthof(TableTypeX8664Attributes);
 
-const uint32_t MachineTraits<TargetX8664>::X86_STACK_ALIGNMENT_BYTES = 16;
-const char *MachineTraits<TargetX8664>::TargetName = "X8664";
+const uint32_t TargetX8664Traits::X86_STACK_ALIGNMENT_BYTES = 16;
+const char *TargetX8664Traits::TargetName = "X8664";
 
 template <>
 std::array<llvm::SmallBitVector, RCX86_NUM>
-    TargetX86Base<TargetX8664>::TypeToRegisterSet = {{}};
+    TargetX86Base<X8664::Traits>::TypeToRegisterSet = {{}};
 
 template <>
 std::array<llvm::SmallBitVector,
-           TargetX86Base<TargetX8664>::Traits::RegisterSet::Reg_NUM>
-    TargetX86Base<TargetX8664>::RegisterAliases = {{}};
+           TargetX86Base<X8664::Traits>::Traits::RegisterSet::Reg_NUM>
+    TargetX86Base<X8664::Traits>::RegisterAliases = {{}};
 
 template <>
 llvm::SmallBitVector
-    TargetX86Base<TargetX8664>::ScratchRegs = llvm::SmallBitVector();
-
-} // end of namespace X86Internal
+    TargetX86Base<X8664::Traits>::ScratchRegs = llvm::SmallBitVector();
 
 //------------------------------------------------------------------------------
 //     __      ______  __     __  ______  ______  __  __   __  ______
@@ -1050,4 +1043,5 @@
 } // end of namespace dummy3
 } // end of anonymous namespace
 
+} // end of namespace X8664
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringX8664.h b/src/IceTargetLoweringX8664.h
index 14a1d25..1cc5c87 100644
--- a/src/IceTargetLoweringX8664.h
+++ b/src/IceTargetLoweringX8664.h
@@ -21,13 +21,15 @@
 #include "IceGlobalContext.h"
 #include "IceInstX8664.h"
 #include "IceTargetLowering.h"
-#include "IceTargetLoweringX8664Traits.h"
+#define X86NAMESPACE X8664
 #include "IceTargetLoweringX86Base.h"
+#undef X86NAMESPACE
+#include "IceTargetLoweringX8664Traits.h"
 
 namespace Ice {
+namespace X8664 {
 
-class TargetX8664 final
-    : public ::Ice::X86Internal::TargetX86Base<TargetX8664> {
+class TargetX8664 final : public X8664::TargetX86Base<X8664::Traits> {
   TargetX8664() = delete;
   TargetX8664(const TargetX8664 &) = delete;
   TargetX8664 &operator=(const TargetX8664 &) = delete;
@@ -55,10 +57,10 @@
 
 private:
   ENABLE_MAKE_UNIQUE;
-  friend class ::Ice::X86Internal::TargetX86Base<TargetX8664>;
+  friend class X8664::TargetX86Base<X8664::Traits>;
 
   explicit TargetX8664(Cfg *Func)
-      : ::Ice::X86Internal::TargetX86Base<TargetX8664>(Func) {}
+      : ::Ice::X8664::TargetX86Base<X8664::Traits>(Func) {}
 
   Operand *createNaClReadTPSrcOperand() {
     Variable *TDB = makeReg(IceType_i32);
@@ -111,6 +113,7 @@
   explicit TargetHeaderX8664(GlobalContext *Ctx) : TargetHeaderLowering(Ctx) {}
 };
 
+} // end of namespace X8664
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8664_H
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h
index 2822e6a..d64b607 100644
--- a/src/IceTargetLoweringX8664Traits.h
+++ b/src/IceTargetLoweringX8664Traits.h
@@ -31,19 +31,16 @@
 
 namespace Ice {
 
+namespace X8664 {
+using namespace ::Ice::X86;
+
+template <class TraitsType> class AssemblerX86Base;
+template <class TraitsType> struct Insts;
+template <class TraitsType> class TargetX86Base;
+
 class TargetX8664;
 
-namespace X8664 {
-class AssemblerX8664;
-} // end of namespace X8664
-
-namespace X86Internal {
-
-template <class Machine> struct Insts;
-template <class Machine> struct MachineTraits;
-template <class Machine> class TargetX86Base;
-
-template <> struct MachineTraits<TargetX8664> {
+struct TargetX8664Traits {
   //----------------------------------------------------------------------------
   //     ______  ______  __    __
   //    /\  __ \/\  ___\/\ "-./  \
@@ -52,6 +49,9 @@
   //      \/_/\/_/\/_____/\/_/  \/_/
   //
   //----------------------------------------------------------------------------
+  static constexpr ::Ice::Assembler::AssemblerKind AsmKind =
+      ::Ice::Assembler::Asm_X8632;
+
   static constexpr bool Is64Bit = true;
   static constexpr bool HasPopa = false;
   static constexpr bool HasPusha = false;
@@ -191,7 +191,7 @@
              (rm() == reg); // Register codes match.
     }
 
-    template <class> friend class AssemblerX86Base;
+    friend class AssemblerX86Base<TargetX8664Traits>;
   };
 
   class Address : public Operand {
@@ -801,10 +801,12 @@
   //      \/_/\/_/ \/_/\/_____/  \/_/
   //
   //----------------------------------------------------------------------------
-  using Insts = ::Ice::X86Internal::Insts<TargetX8664>;
+  using Traits = TargetX8664Traits;
+  using Insts = ::Ice::X8664::Insts<Traits>;
 
-  using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8664>;
-  using Assembler = X8664::AssemblerX8664;
+  using TargetLowering = ::Ice::X8664::TargetX86Base<Traits>;
+  using ConcreteTarget = ::Ice::X8664::TargetX8664;
+  using Assembler = ::Ice::X8664::AssemblerX86Base<Traits>;
 
   /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem
   /// and VariableSplit.
@@ -972,10 +974,7 @@
   } TypeAttributes[];
 };
 
-} // end of namespace X86Internal
-
-namespace X8664 {
-using Traits = ::Ice::X86Internal::MachineTraits<TargetX8664>;
+using Traits = ::Ice::X8664::TargetX8664Traits;
 } // end of namespace X8664
 
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 1074ae4..3c24cd3 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -27,12 +27,16 @@
 #include <type_traits>
 #include <utility>
 
+#ifndef X86NAMESPACE
+#error "You must define the X86 Target namespace."
+#endif
+
 namespace Ice {
-namespace X86Internal {
+namespace X86NAMESPACE {
 
-template <class MachineTraits> class BoolFolding;
+using namespace ::Ice::X86;
 
-template <class Machine> struct MachineTraits {};
+template <typename Traits> class BoolFolding;
 
 /// TargetX86Base is a template for all X86 Targets, and it relies on the CRT
 /// pattern for generating code, delegating to actual backends target-specific
@@ -44,18 +48,34 @@
 ///
 /// Note: Ideally, we should be able to
 ///
-///  static_assert(std::is_base_of<TargetX86Base<Machine>, Machine>::value);
+///  static_assert(std::is_base_of<TargetX86Base<TraitsType>,
+///  Machine>::value);
 ///
 /// but that does not work: the compiler does not know that Machine inherits
 /// from TargetX86Base at this point in translation.
-template <class Machine> class TargetX86Base : public TargetLowering {
+template <typename TraitsType> class TargetX86Base : public TargetLowering {
   TargetX86Base() = delete;
   TargetX86Base(const TargetX86Base &) = delete;
   TargetX86Base &operator=(const TargetX86Base &) = delete;
 
 public:
-  using Traits = MachineTraits<Machine>;
-  using BoolFolding = ::Ice::X86Internal::BoolFolding<Traits>;
+  using Traits = TraitsType;
+  using BoolFolding = BoolFolding<Traits>;
+  using ConcreteTarget = typename Traits::ConcreteTarget;
+  using InstructionSetEnum = typename Traits::InstructionSet;
+
+  using BrCond = typename Traits::Cond::BrCond;
+  using CmppsCond = typename Traits::Cond::CmppsCond;
+
+  using X86Address = typename Traits::Address;
+  using X86Operand = typename Traits::X86Operand;
+  using X86OperandMem = typename Traits::X86OperandMem;
+  using SegmentRegisters = typename Traits::X86OperandMem::SegmentRegisters;
+  using SpillVariable = typename Traits::SpillVariable;
+
+  using InstX86Br = typename Traits::Insts::Br;
+  using InstX86FakeRMW = typename Traits::Insts::FakeRMW;
+  using InstX86Label = typename Traits::Insts::Label;
 
   ~TargetX86Base() override = default;
 
@@ -156,11 +176,9 @@
   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
                               size_t BasicFrameOffset, size_t StackAdjBytes,
                               size_t &InArgsSizeBytes);
-  typename Traits::Address stackVarToAsmOperand(const Variable *Var) const;
+  X86Address stackVarToAsmOperand(const Variable *Var) const;
 
-  typename Traits::InstructionSet getInstructionSet() const {
-    return InstructionSet;
-  }
+  InstructionSetEnum getInstructionSet() const { return InstructionSet; }
   Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
 
 protected:
@@ -186,7 +204,7 @@
   void lowerSwitch(const InstSwitch *Inst) override;
   void lowerUnreachable(const InstUnreachable *Inst) override;
   void lowerOther(const Inst *Instr) override;
-  void lowerRMW(const typename Traits::Insts::FakeRMW *RMW);
+  void lowerRMW(const InstX86FakeRMW *RMW);
   void prelowerPhis() override;
   uint32_t getCallStackArgumentsSizeBytes(const std::vector<Type> &ArgTypes,
                                           Type ReturnType);
@@ -277,8 +295,8 @@
   /// Turn a pointer operand into a memory operand that can be used by a real
   /// load/store operation. Legalizes the operand as well. This is a nop if the
   /// operand is already a legal memory operand.
-  typename Traits::X86OperandMem *formMemoryOperand(Operand *Ptr, Type Ty,
-                                                    bool DoLegalize = true);
+  X86OperandMem *formMemoryOperand(Operand *Ptr, Type Ty,
+                                   bool DoLegalize = true);
 
   Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
   static Type stackSlotType();
@@ -316,8 +334,8 @@
   /// @}
 
   /// Return a memory operand corresponding to a stack allocated Variable.
-  typename Traits::X86OperandMem *
-  getMemoryOperandForStackSlot(Type Ty, Variable *Slot, uint32_t Offset = 0);
+  X86OperandMem *getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
+                                              uint32_t Offset = 0);
 
   void
   makeRandomRegisterPermutation(llvm::SmallVectorImpl<int32_t> &Permutation,
@@ -330,13 +348,13 @@
   void _adc(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Adc>(Dest, Src0);
   }
-  void _adc_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+  void _adc_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
     Context.insert<typename Traits::Insts::AdcRMW>(DestSrc0, Src1);
   }
   void _add(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Add>(Dest, Src0);
   }
-  void _add_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+  void _add_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
     Context.insert<typename Traits::Insts::AddRMW>(DestSrc0, Src1);
   }
   void _addps(Variable *Dest, Operand *Src0) {
@@ -354,28 +372,25 @@
   void _andps(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Andps>(Dest, Src0);
   }
-  void _and_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+  void _and_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
     Context.insert<typename Traits::Insts::AndRMW>(DestSrc0, Src1);
   }
   void _blendvps(Variable *Dest, Operand *Src0, Operand *Src1) {
     Context.insert<typename Traits::Insts::Blendvps>(Dest, Src0, Src1);
   }
-  void _br(typename Traits::Cond::BrCond Condition, CfgNode *TargetTrue,
-           CfgNode *TargetFalse) {
-    Context.insert<typename Traits::Insts::Br>(
-        TargetTrue, TargetFalse, Condition, Traits::Insts::Br::Far);
+  void _br(BrCond Condition, CfgNode *TargetTrue, CfgNode *TargetFalse) {
+    Context.insert<InstX86Br>(TargetTrue, TargetFalse, Condition,
+                              InstX86Br::Far);
   }
   void _br(CfgNode *Target) {
-    Context.insert<typename Traits::Insts::Br>(Target, Traits::Insts::Br::Far);
+    Context.insert<InstX86Br>(Target, InstX86Br::Far);
   }
-  void _br(typename Traits::Cond::BrCond Condition, CfgNode *Target) {
-    Context.insert<typename Traits::Insts::Br>(Target, Condition,
-                                               Traits::Insts::Br::Far);
+  void _br(BrCond Condition, CfgNode *Target) {
+    Context.insert<InstX86Br>(Target, Condition, InstX86Br::Far);
   }
-  void _br(typename Traits::Cond::BrCond Condition,
-           typename Traits::Insts::Label *Label,
-           typename Traits::Insts::Br::Mode Kind = Traits::Insts::Br::Near) {
-    Context.insert<typename Traits::Insts::Br>(Label, Condition, Kind);
+  void _br(BrCond Condition, InstX86Label *Label,
+           typename InstX86Br::Mode Kind = InstX86Br::Near) {
+    Context.insert<InstX86Br>(Label, Condition, Kind);
   }
   void _bsf(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Bsf>(Dest, Src0);
@@ -389,15 +404,13 @@
   void _cbwdq(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Cbwdq>(Dest, Src0);
   }
-  void _cmov(Variable *Dest, Operand *Src0,
-             typename Traits::Cond::BrCond Condition) {
+  void _cmov(Variable *Dest, Operand *Src0, BrCond Condition) {
     Context.insert<typename Traits::Insts::Cmov>(Dest, Src0, Condition);
   }
   void _cmp(Operand *Src0, Operand *Src1) {
     Context.insert<typename Traits::Insts::Icmp>(Src0, Src1);
   }
-  void _cmpps(Variable *Dest, Operand *Src0,
-              typename Traits::Cond::CmppsCond Condition) {
+  void _cmpps(Variable *Dest, Operand *Src0, CmppsCond Condition) {
     Context.insert<typename Traits::Insts::Cmpps>(Dest, Src0, Condition);
   }
   void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired,
@@ -409,8 +422,8 @@
     _set_dest_redefined();
     Context.insert<InstFakeUse>(Eax);
   }
-  void _cmpxchg8b(typename Traits::X86OperandMem *Addr, Variable *Edx,
-                  Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked) {
+  void _cmpxchg8b(X86OperandMem *Addr, Variable *Edx, Variable *Eax,
+                  Variable *Ecx, Variable *Ebx, bool Locked) {
     Context.insert<typename Traits::Insts::Cmpxchg8b>(Addr, Edx, Eax, Ecx, Ebx,
                                                       Locked);
     // Mark edx, and eax as possibly modified by cmpxchg8b.
@@ -533,7 +546,7 @@
   void _orps(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Orps>(Dest, Src0);
   }
-  void _or_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+  void _or_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
     Context.insert<typename Traits::Insts::OrRMW>(DestSrc0, Src1);
   }
   void _padd(Variable *Dest, Operand *Src0) {
@@ -605,10 +618,10 @@
   void _sbb(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Sbb>(Dest, Src0);
   }
-  void _sbb_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+  void _sbb_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
     Context.insert<typename Traits::Insts::SbbRMW>(DestSrc0, Src1);
   }
-  void _setcc(Variable *Dest, typename Traits::Cond::BrCond Condition) {
+  void _setcc(Variable *Dest, BrCond Condition) {
     Context.insert<typename Traits::Insts::Setcc>(Dest, Condition);
   }
   void _shl(Variable *Dest, Operand *Src0) {
@@ -629,19 +642,19 @@
   void _sqrtss(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Sqrtss>(Dest, Src0);
   }
-  void _store(Operand *Value, typename Traits::X86Operand *Mem) {
+  void _store(Operand *Value, X86Operand *Mem) {
     Context.insert<typename Traits::Insts::Store>(Value, Mem);
   }
-  void _storep(Variable *Value, typename Traits::X86OperandMem *Mem) {
+  void _storep(Variable *Value, X86OperandMem *Mem) {
     Context.insert<typename Traits::Insts::StoreP>(Value, Mem);
   }
-  void _storeq(Variable *Value, typename Traits::X86OperandMem *Mem) {
+  void _storeq(Variable *Value, X86OperandMem *Mem) {
     Context.insert<typename Traits::Insts::StoreQ>(Value, Mem);
   }
   void _sub(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Sub>(Dest, Src0);
   }
-  void _sub_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+  void _sub_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
     Context.insert<typename Traits::Insts::SubRMW>(DestSrc0, Src1);
   }
   void _subps(Variable *Dest, Operand *Src0) {
@@ -679,7 +692,7 @@
   void _xorps(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Xorps>(Dest, Src0);
   }
-  void _xor_rmw(typename Traits::X86OperandMem *DestSrc0, Operand *Src1) {
+  void _xor_rmw(X86OperandMem *DestSrc0, Operand *Src1) {
     Context.insert<typename Traits::Insts::XorRMW>(DestSrc0, Src1);
   }
 
@@ -717,8 +730,7 @@
   bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1);
   void findRMW();
 
-  typename Traits::InstructionSet InstructionSet =
-      Traits::InstructionSet::Begin;
+  InstructionSetEnum InstructionSet = Traits::InstructionSet::Begin;
   bool IsEbpBasedFrame = false;
   bool NeedsStackAlignment = false;
   size_t SpillAreaSizeBytes = 0;
@@ -736,8 +748,8 @@
   /// Randomize a given immediate operand
   Operand *randomizeOrPoolImmediate(Constant *Immediate,
                                     int32_t RegNum = Variable::NoRegister);
-  typename Traits::X86OperandMem *
-  randomizeOrPoolImmediate(typename Traits::X86OperandMem *MemOperand,
+  X86OperandMem *
+  randomizeOrPoolImmediate(X86OperandMem *MemOperand,
                            int32_t RegNum = Variable::NoRegister);
   bool RandomizationPoolingPaused = false;
 
@@ -755,13 +767,15 @@
   /// Method **exactly** (i.e., no argument promotion is performed.)
   template <typename Ret, typename... Args>
   typename std::enable_if<!std::is_void<Ret>::value, Ret>::type
-  dispatchToConcrete(Ret (Machine::*Method)(Args...), Args &&... args) {
-    return (static_cast<Machine *>(this)->*Method)(std::forward<Args>(args)...);
+  dispatchToConcrete(Ret (ConcreteTarget::*Method)(Args...), Args &&... args) {
+    return (static_cast<ConcreteTarget *>(this)->*Method)(
+        std::forward<Args>(args)...);
   }
 
   template <typename... Args>
-  void dispatchToConcrete(void (Machine::*Method)(Args...), Args &&... args) {
-    (static_cast<Machine *>(this)->*Method)(std::forward<Args>(args)...);
+  void dispatchToConcrete(void (ConcreteTarget::*Method)(Args...),
+                          Args &&... args) {
+    (static_cast<ConcreteTarget *>(this)->*Method)(std::forward<Args>(args)...);
   }
 
   void lowerShift64(InstArithmetic::OpKind Op, Operand *Src0Lo, Operand *Src0Hi,
@@ -775,8 +789,7 @@
 
   /// Emit a setcc instruction if Consumer == nullptr; otherwise emit a
   /// specialized version of Consumer.
-  void setccOrConsumer(typename Traits::Cond::BrCond Condition, Variable *Dest,
-                       const Inst *Consumer);
+  void setccOrConsumer(BrCond Condition, Variable *Dest, const Inst *Consumer);
 
   /// Emit a mov [1|0] instruction if Consumer == nullptr; otherwise emit a
   /// specialized version of Consumer.
@@ -788,10 +801,10 @@
   void lowerSelectVector(const InstSelect *Inst);
 
   /// Helpers for select lowering.
-  void lowerSelectMove(Variable *Dest, typename Traits::Cond::BrCond Cond,
-                       Operand *SrcT, Operand *SrcF);
-  void lowerSelectIntMove(Variable *Dest, typename Traits::Cond::BrCond Cond,
-                          Operand *SrcT, Operand *SrcF);
+  void lowerSelectMove(Variable *Dest, BrCond Cond, Operand *SrcT,
+                       Operand *SrcF);
+  void lowerSelectIntMove(Variable *Dest, BrCond Cond, Operand *SrcT,
+                          Operand *SrcF);
   /// Generic helper to move an arbitrary type from Src to Dest.
   void lowerMove(Variable *Dest, Operand *Src, bool IsRedefinition);
 
@@ -813,7 +826,7 @@
 
   BoolFolding FoldingInfo;
 };
-} // end of namespace X86Internal
+} // end of namespace X86NAMESPACE
 } // end of namespace Ice
 
 #include "IceTargetLoweringX86BaseImpl.h"
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index a169203..04d9cdc 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -27,12 +27,13 @@
 #include "IceOperand.h"
 #include "IcePhiLoweringImpl.h"
 #include "IceUtils.h"
+#include "IceInstX86Base.h"
 #include "llvm/Support/MathExtras.h"
 
 #include <stack>
 
 namespace Ice {
-namespace X86Internal {
+namespace X86NAMESPACE {
 
 /// A helper class to ease the settings of RandomizationPoolingPause to disable
 /// constant blinding or pooling for some translation phases.
@@ -50,7 +51,7 @@
   bool &Flag;
 };
 
-template <class MachineTraits> class BoolFoldingEntry {
+template <typename Traits> class BoolFoldingEntry {
   BoolFoldingEntry(const BoolFoldingEntry &) = delete;
 
 public:
@@ -73,7 +74,7 @@
   uint32_t NumUses = 0;
 };
 
-template <class MachineTraits> class BoolFolding {
+template <typename Traits> class BoolFolding {
 public:
   enum BoolFoldingProducerKind {
     PK_None,
@@ -113,25 +114,25 @@
   }
   void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
   /// Producers maps Variable::Number to a BoolFoldingEntry.
-  std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;
+  std::unordered_map<SizeT, BoolFoldingEntry<Traits>> Producers;
 };
 
-template <class MachineTraits>
-BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
-    : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
+template <typename Traits>
+BoolFoldingEntry<Traits>::BoolFoldingEntry(Inst *I)
+    : Instr(I), IsComplex(BoolFolding<Traits>::hasComplexLowering(I)) {}
 
-template <class MachineTraits>
-typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
-BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
+template <typename Traits>
+typename BoolFolding<Traits>::BoolFoldingProducerKind
+BoolFolding<Traits>::getProducerKind(const Inst *Instr) {
   if (llvm::isa<InstIcmp>(Instr)) {
-    if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
+    if (Traits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
       return PK_Icmp32;
     return PK_Icmp64;
   }
   if (llvm::isa<InstFcmp>(Instr))
     return PK_Fcmp;
   if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
-    if (MachineTraits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) {
+    if (Traits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) {
       switch (Arith->getOp()) {
       default:
         return PK_None;
@@ -154,9 +155,9 @@
   return PK_None;
 }
 
-template <class MachineTraits>
-typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind
-BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) {
+template <typename Traits>
+typename BoolFolding<Traits>::BoolFoldingConsumerKind
+BoolFolding<Traits>::getConsumerKind(const Inst *Instr) {
   if (llvm::isa<InstBr>(Instr))
     return CK_Br;
   if (llvm::isa<InstSelect>(Instr))
@@ -181,23 +182,23 @@
 /// conditional branch, namely 64-bit integer compares and some floating-point
 /// compares. When this is true, and there is more than one consumer, we prefer
 /// to disable the folding optimization because it minimizes branches.
-template <class MachineTraits>
-bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
+template <typename Traits>
+bool BoolFolding<Traits>::hasComplexLowering(const Inst *Instr) {
   switch (getProducerKind(Instr)) {
   default:
     return false;
   case PK_Icmp64:
-    return !MachineTraits::Is64Bit;
+    return !Traits::Is64Bit;
   case PK_Fcmp:
-    return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
-               .C2 != MachineTraits::Cond::Br_None;
+    return Traits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
+           Traits::Cond::Br_None;
   }
 }
 
-template <class MachineTraits>
-bool BoolFolding<MachineTraits>::isValidFolding(
-    typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind,
-    typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) {
+template <typename Traits>
+bool BoolFolding<Traits>::isValidFolding(
+    typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind,
+    typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind) {
   switch (ProducerKind) {
   default:
     return false;
@@ -210,8 +211,7 @@
   }
 }
 
-template <class MachineTraits>
-void BoolFolding<MachineTraits>::init(CfgNode *Node) {
+template <typename Traits> void BoolFolding<Traits>::init(CfgNode *Node) {
   Producers.clear();
   for (Inst &Instr : Node->getInsts()) {
     // Check whether Instr is a valid producer.
@@ -220,7 +220,7 @@
         && Var             // only instructions with an actual dest var
         && Var->getType() == IceType_i1          // only bool-type dest vars
         && getProducerKind(&Instr) != PK_None) { // white-listed instructions
-      Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr);
+      Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr);
     }
     // Check each src variable against the map.
     FOREACH_VAR_IN_INST(Var, Instr) {
@@ -233,14 +233,14 @@
         continue;
       }
       // Consumer instructions must be white-listed
-      typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind
-          ConsumerKind = getConsumerKind(&Instr);
+      typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind =
+          getConsumerKind(&Instr);
       if (ConsumerKind == CK_None) {
         setInvalid(VarNum);
         continue;
       }
-      typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
-          ProducerKind = getProducerKind(Producers[VarNum].Instr);
+      typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind =
+          getProducerKind(Producers[VarNum].Instr);
       if (!isValidFolding(ProducerKind, ConsumerKind)) {
         setInvalid(VarNum);
         continue;
@@ -273,9 +273,8 @@
   }
 }
 
-template <class MachineTraits>
-const Inst *
-BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {
+template <typename Traits>
+const Inst *BoolFolding<Traits>::getProducerFor(const Operand *Opnd) const {
   auto *Var = llvm::dyn_cast<const Variable>(Opnd);
   if (Var == nullptr)
     return nullptr;
@@ -286,8 +285,8 @@
   return Element->second.Instr;
 }
 
-template <class MachineTraits>
-void BoolFolding<MachineTraits>::dump(const Cfg *Func) const {
+template <typename Traits>
+void BoolFolding<Traits>::dump(const Cfg *Func) const {
   if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
     return;
   OstreamLocker L(Func->getContext());
@@ -301,14 +300,14 @@
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::initNodeForLowering(CfgNode *Node) {
   FoldingInfo.init(Node);
   FoldingInfo.dump(Func);
 }
 
-template <class Machine>
-TargetX86Base<Machine>::TargetX86Base(Cfg *Func)
+template <typename TraitsType>
+TargetX86Base<TraitsType>::TargetX86Base(Cfg *Func)
     : TargetLowering(Func) {
   static_assert(
       (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
@@ -317,18 +316,18 @@
       "Traits::InstructionSet range different from TargetInstructionSet");
   if (Func->getContext()->getFlags().getTargetInstructionSet() !=
       TargetInstructionSet::BaseInstructionSet) {
-    InstructionSet = static_cast<typename Traits::InstructionSet>(
+    InstructionSet = static_cast<InstructionSetEnum>(
         (Func->getContext()->getFlags().getTargetInstructionSet() -
          TargetInstructionSet::X86InstructionSet_Begin) +
         Traits::InstructionSet::Begin);
   }
 }
 
-template <class Machine> void TargetX86Base<Machine>::staticInit() {
+template <typename TraitsType> void TargetX86Base<TraitsType>::staticInit() {
   Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs);
 }
 
-template <class Machine> void TargetX86Base<Machine>::translateO2() {
+template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() {
   TimerMarker T(TimerStack::TT_O2, Func);
 
   genTargetHelperCalls();
@@ -454,7 +453,7 @@
     Func->markNodesForSandboxing();
 }
 
-template <class Machine> void TargetX86Base<Machine>::translateOm1() {
+template <typename TraitsType> void TargetX86Base<TraitsType>::translateOm1() {
   TimerMarker T(TimerStack::TT_Om1, Func);
 
   genTargetHelperCalls();
@@ -531,14 +530,16 @@
   }
 }
 
-template <class Machine>
+template <typename TraitsType>
 bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
   if (A == B)
     return true;
-  if (auto *MemA = llvm::dyn_cast<
-          typename TargetX86Base<Machine>::Traits::X86OperandMem>(A)) {
-    if (auto *MemB = llvm::dyn_cast<
-            typename TargetX86Base<Machine>::Traits::X86OperandMem>(B)) {
+  if (auto *MemA =
+          llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>(
+              A)) {
+    if (auto *MemB =
+            llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>(
+                B)) {
       return MemA->getBase() == MemB->getBase() &&
              MemA->getOffset() == MemB->getOffset() &&
              MemA->getIndex() == MemB->getIndex() &&
@@ -549,7 +550,7 @@
   return false;
 }
 
-template <class Machine> void TargetX86Base<Machine>::findRMW() {
+template <typename TraitsType> void TargetX86Base<TraitsType>::findRMW() {
   Func->dump("Before RMW");
   if (Func->isVerbose(IceV_RMW))
     Func->getContext()->lockStr();
@@ -603,8 +604,8 @@
       // trigger, resulting in two loads and one store, which is worse than the
       // original one load and one store.  However, this is probably rare, and
       // caching probably keeps it just as fast.
-      if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
-                                            Store->getAddr()))
+      if (!isSameMemAddressOperand<TraitsType>(Load->getSourceAddress(),
+                                               Store->getAddr()))
         continue;
       Operand *ArithSrcFromLoad = Arith->getSrc(0);
       Operand *ArithSrcOther = Arith->getSrc(1);
@@ -632,8 +633,8 @@
       Store->setRmwBeacon(Beacon);
       auto *BeaconDef = InstFakeDef::create(Func, Beacon);
       Node->getInsts().insert(I3, BeaconDef);
-      auto *RMW = Traits::Insts::FakeRMW::create(
-          Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
+      auto *RMW = InstX86FakeRMW::create(Func, ArithSrcOther, Store->getAddr(),
+                                         Beacon, Arith->getOp());
       Node->getInsts().insert(I3, RMW);
     }
   }
@@ -666,7 +667,7 @@
   return false;
 }
 
-template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {
+template <typename TraitsType> void TargetX86Base<TraitsType>::doLoadOpt() {
   for (CfgNode *Node : Func->getNodes()) {
     Context.init(Node);
     while (!Context.atEnd()) {
@@ -755,16 +756,17 @@
   Func->dump("After load optimization");
 }
 
-template <class Machine>
-bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
-  if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
+template <typename TraitsType>
+bool TargetX86Base<TraitsType>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
+  if (auto *Br = llvm::dyn_cast<InstX86Br>(I)) {
     return Br->optimizeBranch(NextNode);
   }
   return false;
 }
 
-template <class Machine>
-Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::getPhysicalRegister(SizeT RegNum,
+                                                         Type Ty) {
   if (Ty == IceType_void)
     Ty = IceType_i32;
   if (PhysicalRegisters[Ty].empty())
@@ -786,13 +788,13 @@
   return Reg;
 }
 
-template <class Machine>
-IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
+template <typename TraitsType>
+IceString TargetX86Base<TraitsType>::getRegName(SizeT RegNum, Type Ty) const {
   return Traits::getRegName(Traits::getGprForType(Ty, RegNum));
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::emitVariable(const Variable *Var) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Ctx->getStrEmit();
@@ -823,9 +825,9 @@
   Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
 }
 
-template <class Machine>
-typename TargetX86Base<Machine>::Traits::Address
-TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
+template <typename TraitsType>
+typename TargetX86Base<TraitsType>::X86Address
+TargetX86Base<TraitsType>::stackVarToAsmOperand(const Variable *Var) const {
   if (Var->hasReg())
     llvm_unreachable("Stack Variable has a register assigned");
   if (Var->mustHaveReg()) {
@@ -835,8 +837,8 @@
   int32_t BaseRegNum = Var->getBaseRegNum();
   if (Var->getBaseRegNum() == Variable::NoRegister)
     BaseRegNum = getFrameOrStackReg();
-  return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset,
-                                  AssemblerFixup::NoFixup);
+  return X86Address(Traits::getEncodedGPR(BaseRegNum), Offset,
+                    AssemblerFixup::NoFixup);
 }
 
 /// Helper function for addProlog().
@@ -847,12 +849,10 @@
 /// recursively on the components, taking care to handle Lo first because of the
 /// little-endian architecture. Lastly, this function generates an instruction
 /// to copy Arg into its assigned register if applicable.
-template <class Machine>
-void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
-                                                    Variable *FramePtr,
-                                                    size_t BasicFrameOffset,
-                                                    size_t StackAdjBytes,
-                                                    size_t &InArgsSizeBytes) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::finishArgumentLowering(
+    Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset,
+    size_t StackAdjBytes, size_t &InArgsSizeBytes) {
   if (!Traits::Is64Bit) {
     if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
       Variable *Lo = Arg64On32->getLo();
@@ -872,7 +872,7 @@
   InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
   if (Arg->hasReg()) {
     assert(Ty != IceType_i64 || Traits::Is64Bit);
-    auto *Mem = Traits::X86OperandMem::create(
+    auto *Mem = X86OperandMem::create(
         Func, Ty, FramePtr,
         Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes));
     if (isVectorType(Arg->getType())) {
@@ -880,21 +880,21 @@
     } else {
       _mov(Arg, Mem);
     }
-    // This argument-copying instruction uses an explicit Traits::X86OperandMem
+    // This argument-copying instruction uses an explicit X86OperandMem
     // operand instead of a Variable, so its fill-from-stack operation has to
     // be tracked separately for statistics.
     Ctx->statsUpdateFills();
   }
 }
 
-template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
+template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() {
   return Traits::WordType;
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T>
 typename std::enable_if<!T::Is64Bit, Operand>::type *
-TargetX86Base<Machine>::loOperand(Operand *Operand) {
+TargetX86Base<TraitsType>::loOperand(Operand *Operand) {
   assert(Operand->getType() == IceType_i64 ||
          Operand->getType() == IceType_f64);
   if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
@@ -907,8 +907,8 @@
     // Check if we need to blind/pool the constant.
     return legalize(ConstInt);
   }
-  if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
-    auto *MemOperand = Traits::X86OperandMem::create(
+  if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) {
+    auto *MemOperand = X86OperandMem::create(
         Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
         Mem->getShift(), Mem->getSegmentRegister());
     // Test if we should randomize or pool the offset, if so randomize it or
@@ -920,10 +920,10 @@
   return nullptr;
 }
 
-template <class Machine>
+template <typename TraitsType>
 template <typename T>
 typename std::enable_if<!T::Is64Bit, Operand>::type *
-TargetX86Base<Machine>::hiOperand(Operand *Operand) {
+TargetX86Base<TraitsType>::hiOperand(Operand *Operand) {
   assert(Operand->getType() == IceType_i64 ||
          Operand->getType() == IceType_f64);
   if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
@@ -936,7 +936,7 @@
     // Check if we need to blind/pool the constant.
     return legalize(ConstInt);
   }
-  if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
+  if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) {
     Constant *Offset = Mem->getOffset();
     if (Offset == nullptr) {
       Offset = Ctx->getConstantInt32(4);
@@ -948,7 +948,7 @@
           Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
                               SymOffset->getSuppressMangling());
     }
-    auto *MemOperand = Traits::X86OperandMem::create(
+    auto *MemOperand = X86OperandMem::create(
         Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
         Mem->getShift(), Mem->getSegmentRegister());
     // Test if the Offset is an eligible i32 constants for randomization and
@@ -960,15 +960,15 @@
   return nullptr;
 }
 
-template <class Machine>
+template <typename TraitsType>
 llvm::SmallBitVector
-TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
-                                       RegSetMask Exclude) const {
+TargetX86Base<TraitsType>::getRegisterSet(RegSetMask Include,
+                                          RegSetMask Exclude) const {
   return Traits::getRegisterSet(Include, Exclude);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Inst) {
   // Conservatively require the stack to be aligned. Some stack adjustment
   // operations implemented below assume that the stack is aligned before the
   // alloca. All the alloca code ensures that the stack alignment is preserved
@@ -1034,10 +1034,8 @@
   uint32_t OutArgsSize = maxOutArgsSizeBytes();
   if (OutArgsSize > 0) {
     Variable *T = makeReg(IceType_i32);
-    typename Traits::X86OperandMem *CalculateOperand =
-        Traits::X86OperandMem::create(
-            Func, IceType_i32, esp,
-            Ctx->getConstantInt(IceType_i32, OutArgsSize));
+    auto *CalculateOperand = X86OperandMem::create(
+        Func, IceType_i32, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize));
     _lea(T, CalculateOperand);
     _mov(Dest, T);
   } else {
@@ -1050,9 +1048,9 @@
 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
 /// lea-based multiplies by 5, combined with left-shifting by 2.
-template <class Machine>
-bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
-                                               int32_t Src1) {
+template <typename TraitsType>
+bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0,
+                                                  int32_t Src1) {
   // Disable this optimization for Om1 and O0, just to keep things simple
   // there.
   if (Ctx->getFlags().getOptLevel() < Opt_1)
@@ -1119,18 +1117,15 @@
   Constant *Zero = Ctx->getConstantZero(IceType_i32);
   for (uint32_t i = 0; i < Count9; ++i) {
     constexpr uint16_t Shift = 3; // log2(9-1)
-    _lea(T,
-         Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
+    _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
   }
   for (uint32_t i = 0; i < Count5; ++i) {
     constexpr uint16_t Shift = 2; // log2(5-1)
-    _lea(T,
-         Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
+    _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
   }
   for (uint32_t i = 0; i < Count3; ++i) {
     constexpr uint16_t Shift = 1; // log2(3-1)
-    _lea(T,
-         Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
+    _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
   }
   if (Count2) {
     _shl(T, Ctx->getConstantInt(Ty, Count2));
@@ -1141,11 +1136,11 @@
   return true;
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op,
-                                          Operand *Src0Lo, Operand *Src0Hi,
-                                          Operand *Src1Lo, Variable *DestLo,
-                                          Variable *DestHi) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op,
+                                             Operand *Src0Lo, Operand *Src0Hi,
+                                             Operand *Src1Lo, Variable *DestLo,
+                                             Variable *DestHi) {
   // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
   Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
   Constant *Zero = Ctx->getConstantZero(IceType_i32);
@@ -1274,8 +1269,7 @@
   } else {
     // NON-CONSTANT CASES.
     Constant *BitTest = Ctx->getConstantInt32(0x20);
-    typename Traits::Insts::Label *Label =
-        Traits::Insts::Label::create(Func, this);
+    InstX86Label *Label = InstX86Label::create(Func, this);
     // COMMON PREFIX OF: a=b SHIFT_OP c ==>
     //   t1:ecx = c.lo & 0xff
     //   t2 = b.lo
@@ -1355,8 +1349,8 @@
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) {
   Variable *Dest = Inst->getDest();
   if (Dest->isRematerializable()) {
     Context.insert<InstFakeDef>(Dest);
@@ -1508,7 +1502,7 @@
   if (isVectorType(Ty)) {
     // TODO: Trap on integer divide and integer modulo by zero. See:
     // https://code.google.com/p/nativeclient/issues/detail?id=3899
-    if (llvm::isa<typename Traits::X86OperandMem>(Src1))
+    if (llvm::isa<X86OperandMem>(Src1))
       Src1 = legalizeToReg(Src1);
     switch (Inst->getOp()) {
     case InstArithmetic::_num:
@@ -1927,8 +1921,8 @@
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerAssign(const InstAssign *Inst) {
   Variable *Dest = Inst->getDest();
   if (Dest->isRematerializable()) {
     Context.insert<InstFakeDef>(Dest);
@@ -1939,8 +1933,8 @@
   lowerMove(Dest, Src, false);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerBr(const InstBr *Br) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerBr(const InstBr *Br) {
   if (Br->isUnconditional()) {
     _br(Br->getTargetUnconditional());
     return;
@@ -1974,8 +1968,8 @@
   _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) {
   // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
   InstCast::OpKind CastKind = Inst->getCastKind();
   Variable *Dest = Inst->getDest();
@@ -2153,7 +2147,7 @@
       assert(DestTy == IceType_v4i32 &&
              Inst->getSrc(0)->getType() == IceType_v4f32);
       Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
-      if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
+      if (llvm::isa<X86OperandMem>(Src0RM))
         Src0RM = legalizeToReg(Src0RM);
       Variable *T = makeReg(DestTy);
       _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
@@ -2219,7 +2213,7 @@
       assert(DestTy == IceType_v4f32 &&
              Inst->getSrc(0)->getType() == IceType_v4i32);
       Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
-      if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
+      if (llvm::isa<X86OperandMem>(Src0RM))
         Src0RM = legalizeToReg(Src0RM);
       Variable *T = makeReg(DestTy);
       _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
@@ -2304,8 +2298,7 @@
       Variable *T = nullptr;
       // TODO: Should be able to force a spill setup by calling legalize() with
       // Legal_Mem and not Legal_Reg or Legal_Imm.
-      typename Traits::SpillVariable *SpillVar =
-          Func->makeVariable<typename Traits::SpillVariable>(SrcType);
+      SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);
       SpillVar->setLinkedTo(Dest);
       Variable *Spill = SpillVar;
       Spill->setMustNotHaveReg();
@@ -2330,8 +2323,8 @@
         //   a_hi.i32 = t_hi.i32
         Operand *SpillLo, *SpillHi;
         if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
-          typename Traits::SpillVariable *SpillVar =
-              Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
+          SpillVariable *SpillVar =
+              Func->makeVariable<SpillVariable>(IceType_f64);
           SpillVar->setLinkedTo(Src0Var);
           Variable *Spill = SpillVar;
           Spill->setMustNotHaveReg();
@@ -2365,7 +2358,7 @@
         _mov(Dest, T);
       } else {
         Src0 = legalize(Src0);
-        if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
+        if (llvm::isa<X86OperandMem>(Src0)) {
           Variable *T = Func->makeVariable(DestTy);
           _movq(T, Src0);
           _movq(Dest, T);
@@ -2378,8 +2371,8 @@
         //   t_hi.i32 = b_hi.i32
         //   hi(s.f64) = t_hi.i32
         //   a.f64 = s.f64
-        typename Traits::SpillVariable *SpillVar =
-            Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
+        SpillVariable *SpillVar =
+            Func->makeVariable<SpillVariable>(IceType_f64);
         SpillVar->setLinkedTo(Dest);
         Variable *Spill = SpillVar;
         Spill->setMustNotHaveReg();
@@ -2418,8 +2411,8 @@
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerExtractElement(
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerExtractElement(
     const InstExtractElement *Inst) {
   Operand *SourceVectNotLegalized = Inst->getSrc(0);
   ConstantInteger32 *ElementIndex =
@@ -2481,7 +2474,7 @@
 
     // Compute the location of the element in memory.
     unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
-    typename Traits::X86OperandMem *Loc =
+    X86OperandMem *Loc =
         getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
     _mov(ExtractedElementR, Loc);
   }
@@ -2500,8 +2493,8 @@
   _mov(Dest, ExtractedElementR);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Fcmp) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerFcmp(const InstFcmp *Fcmp) {
   Variable *Dest = Fcmp->getDest();
 
   if (isVectorType(Dest->getType())) {
@@ -2512,9 +2505,9 @@
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
-                                                  const Inst *Consumer) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
+                                                     const Inst *Consumer) {
   Operand *Src0 = Fcmp->getSrc(0);
   Operand *Src1 = Fcmp->getSrc(1);
   Variable *Dest = Fcmp->getDest();
@@ -2566,8 +2559,7 @@
     Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault);
     _mov(Dest, Default);
     if (HasC1) {
-      typename Traits::Insts::Label *Label =
-          Traits::Insts::Label::create(Func, this);
+      InstX86Label *Label = InstX86Label::create(Func, this);
       _br(Traits::TableFcmp[Index].C1, Label);
       if (HasC2) {
         _br(Traits::TableFcmp[Index].C2, Label);
@@ -2602,8 +2594,7 @@
       std::swap(SrcT, SrcF);
     lowerMove(SelectDest, SrcF, false);
     if (HasC1) {
-      typename Traits::Insts::Label *Label =
-          Traits::Insts::Label::create(Func, this);
+      InstX86Label *Label = InstX86Label::create(Func, this);
       _br(Traits::TableFcmp[Index].C1, Label);
       if (HasC2) {
         _br(Traits::TableFcmp[Index].C2, Label);
@@ -2617,8 +2608,8 @@
   llvm::report_fatal_error("Unexpected consumer type");
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerFcmpVector(const InstFcmp *Fcmp) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerFcmpVector(const InstFcmp *Fcmp) {
   Operand *Src0 = Fcmp->getSrc(0);
   Operand *Src1 = Fcmp->getSrc(1);
   Variable *Dest = Fcmp->getDest();
@@ -2643,13 +2634,12 @@
   } else {
     Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
     Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
-    if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
+    if (llvm::isa<X86OperandMem>(Src1RM))
       Src1RM = legalizeToReg(Src1RM);
 
     switch (Condition) {
     default: {
-      typename Traits::Cond::CmppsCond Predicate =
-          Traits::TableFcmp[Index].Predicate;
+      CmppsCond Predicate = Traits::TableFcmp[Index].Predicate;
       assert(Predicate != Traits::Cond::Cmpps_Invalid);
       T = makeReg(Src0RM->getType());
       _movp(T, Src0RM);
@@ -2691,9 +2681,9 @@
   return false;
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerIcmpAndConsumer(const InstIcmp *Icmp,
-                                                  const Inst *Consumer) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerIcmpAndConsumer(const InstIcmp *Icmp,
+                                                     const Inst *Consumer) {
   Operand *Src0 = legalize(Icmp->getSrc(0));
   Operand *Src1 = legalize(Icmp->getSrc(1));
   Variable *Dest = Icmp->getDest();
@@ -2725,8 +2715,8 @@
                   Consumer);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerIcmpVector(const InstIcmp *Icmp) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerIcmpVector(const InstIcmp *Icmp) {
   Operand *Src0 = legalize(Icmp->getSrc(0));
   Operand *Src1 = legalize(Icmp->getSrc(1));
   Variable *Dest = Icmp->getDest();
@@ -2788,13 +2778,13 @@
     llvm_unreachable("unexpected condition");
     break;
   case InstIcmp::Eq: {
-    if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
+    if (llvm::isa<X86OperandMem>(Src1RM))
       Src1RM = legalizeToReg(Src1RM);
     _movp(T, Src0RM);
     _pcmpeq(T, Src1RM);
   } break;
   case InstIcmp::Ne: {
-    if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
+    if (llvm::isa<X86OperandMem>(Src1RM))
       Src1RM = legalizeToReg(Src1RM);
     _movp(T, Src0RM);
     _pcmpeq(T, Src1RM);
@@ -2803,7 +2793,7 @@
   } break;
   case InstIcmp::Ugt:
   case InstIcmp::Sgt: {
-    if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
+    if (llvm::isa<X86OperandMem>(Src1RM))
       Src1RM = legalizeToReg(Src1RM);
     _movp(T, Src0RM);
     _pcmpgt(T, Src1RM);
@@ -2811,7 +2801,7 @@
   case InstIcmp::Uge:
   case InstIcmp::Sge: {
     // !(Src1RM > Src0RM)
-    if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
+    if (llvm::isa<X86OperandMem>(Src0RM))
       Src0RM = legalizeToReg(Src0RM);
     _movp(T, Src1RM);
     _pcmpgt(T, Src0RM);
@@ -2820,7 +2810,7 @@
   } break;
   case InstIcmp::Ult:
   case InstIcmp::Slt: {
-    if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
+    if (llvm::isa<X86OperandMem>(Src0RM))
       Src0RM = legalizeToReg(Src0RM);
     _movp(T, Src1RM);
     _pcmpgt(T, Src0RM);
@@ -2828,7 +2818,7 @@
   case InstIcmp::Ule:
   case InstIcmp::Sle: {
     // !(Src0RM > Src1RM)
-    if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
+    if (llvm::isa<X86OperandMem>(Src1RM))
       Src1RM = legalizeToReg(Src1RM);
     _movp(T, Src0RM);
     _pcmpgt(T, Src1RM);
@@ -2841,11 +2831,11 @@
   eliminateNextVectorSextInstruction(Dest);
 }
 
-template <typename Machine>
+template <typename TraitsType>
 template <typename T>
 typename std::enable_if<!T::Is64Bit, void>::type
-TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp,
-                                    const Inst *Consumer) {
+TargetX86Base<TraitsType>::lowerIcmp64(const InstIcmp *Icmp,
+                                       const Inst *Consumer) {
   // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
   Operand *Src0 = legalize(Icmp->getSrc(0));
   Operand *Src1 = legalize(Icmp->getSrc(1));
@@ -2940,10 +2930,8 @@
   if (Consumer == nullptr) {
     Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0);
     Constant *One = Ctx->getConstantInt(Dest->getType(), 1);
-    typename Traits::Insts::Label *LabelFalse =
-        Traits::Insts::Label::create(Func, this);
-    typename Traits::Insts::Label *LabelTrue =
-        Traits::Insts::Label::create(Func, this);
+    InstX86Label *LabelFalse = InstX86Label::create(Func, this);
+    InstX86Label *LabelTrue = InstX86Label::create(Func, this);
     _mov(Dest, One);
     _cmp(Src0HiRM, Src1HiRI);
     if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
@@ -2972,10 +2960,8 @@
     Operand *SrcT = Select->getTrueOperand();
     Operand *SrcF = Select->getFalseOperand();
     Variable *SelectDest = Select->getDest();
-    typename Traits::Insts::Label *LabelFalse =
-        Traits::Insts::Label::create(Func, this);
-    typename Traits::Insts::Label *LabelTrue =
-        Traits::Insts::Label::create(Func, this);
+    InstX86Label *LabelFalse = InstX86Label::create(Func, this);
+    InstX86Label *LabelTrue = InstX86Label::create(Func, this);
     lowerMove(SelectDest, SrcT, false);
     _cmp(Src0HiRM, Src1HiRI);
     if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
@@ -2993,10 +2979,10 @@
   llvm::report_fatal_error("Unexpected consumer type");
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::setccOrConsumer(
-    typename Traits::Cond::BrCond Condition, Variable *Dest,
-    const Inst *Consumer) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::setccOrConsumer(BrCond Condition,
+                                                Variable *Dest,
+                                                const Inst *Consumer) {
   if (Consumer == nullptr) {
     _setcc(Dest, Condition);
     return;
@@ -3015,9 +3001,9 @@
   llvm::report_fatal_error("Unexpected consumer type");
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::movOrConsumer(bool IcmpResult, Variable *Dest,
-                                           const Inst *Consumer) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::movOrConsumer(bool IcmpResult, Variable *Dest,
+                                              const Inst *Consumer) {
   if (Consumer == nullptr) {
     _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
     return;
@@ -3045,9 +3031,9 @@
   llvm::report_fatal_error("Unexpected consumer type");
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerArithAndConsumer(const InstArithmetic *Arith,
-                                                   const Inst *Consumer) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerArithAndConsumer(
+    const InstArithmetic *Arith, const Inst *Consumer) {
   Variable *T = nullptr;
   Operand *Src0 = legalize(Arith->getSrc(0));
   Operand *Src1 = legalize(Arith->getSrc(1));
@@ -3084,8 +3070,9 @@
   llvm::report_fatal_error("Unexpected consumer type");
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerInsertElement(
+    const InstInsertElement *Inst) {
   Operand *SourceVectNotLegalized = Inst->getSrc(0);
   Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
   ConstantInteger32 *ElementIndex =
@@ -3123,7 +3110,8 @@
     } else {
       // For the pinsrb and pinsrw instructions, when the source operand is a
       // register, it must be a full r32 register like eax, and not ax/al/ah.
-      // For filetype=asm, InstX86Pinsr<Machine>::emit() compensates for the use
+      // For filetype=asm, InstX86Pinsr<TraitsType>::emit() compensates for
+      // the use
       // of r16 and r8 by converting them through getBaseReg(), while emitIAS()
       // validates that the original and base register encodings are the same.
       if (ElementRM->getType() == IceType_i8 &&
@@ -3208,7 +3196,7 @@
 
     // Compute the location of the position to insert in memory.
     unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
-    typename Traits::X86OperandMem *Loc =
+    X86OperandMem *Loc =
         getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
     _store(legalizeToReg(ElementToInsertNotLegalized), Loc);
 
@@ -3218,8 +3206,8 @@
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerIntrinsicCall(
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerIntrinsicCall(
     const InstIntrinsicCall *Instr) {
   switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
   case Intrinsics::AtomicCmpxchg: {
@@ -3299,8 +3287,7 @@
         // anyway, since this is x86-32 and integer arithmetic only happens on
         // 32-bit quantities.
         Variable *T = makeReg(IceType_f64);
-        typename Traits::X86OperandMem *Addr =
-            formMemoryOperand(Instr->getArg(0), IceType_f64);
+        X86OperandMem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
         _movq(T, Addr);
         // Then cast the bits back out of the XMM register to the i64 Dest.
         auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
@@ -3350,8 +3337,7 @@
       auto *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
       lowerCast(Cast);
       // Then store XMM w/ a movq.
-      typename Traits::X86OperandMem *Addr =
-          formMemoryOperand(Ptr, IceType_f64);
+      X86OperandMem *Addr = formMemoryOperand(Ptr, IceType_f64);
       _storeq(T, Addr);
       _mfence();
       return;
@@ -3495,7 +3481,7 @@
     // The pand instruction operates on an m128 memory operand, so if Src is an
     // f32 or f64, we need to make sure it's in a register.
     if (isVectorType(Ty)) {
-      if (llvm::isa<typename Traits::X86OperandMem>(Src))
+      if (llvm::isa<X86OperandMem>(Src))
         Src = legalizeToReg(Src);
     } else {
       Src = legalizeToReg(Src);
@@ -3528,7 +3514,8 @@
   }
   case Intrinsics::NaClReadTP: {
     if (Ctx->getFlags().getUseSandboxing()) {
-      Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand);
+      Operand *Src =
+          dispatchToConcrete(&ConcreteTarget::createNaClReadTPSrcOperand);
       Variable *Dest = Instr->getDest();
       Variable *T = nullptr;
       _mov(T, Src);
@@ -3578,10 +3565,11 @@
   return;
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
-                                                Operand *Ptr, Operand *Expected,
-                                                Operand *Desired) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerAtomicCmpxchg(Variable *DestPrev,
+                                                   Operand *Ptr,
+                                                   Operand *Expected,
+                                                   Operand *Desired) {
   Type Ty = Expected->getType();
   if (!Traits::Is64Bit && Ty == IceType_i64) {
     // Reserve the pre-colored registers first, before adding any more
@@ -3594,7 +3582,7 @@
     _mov(T_edx, hiOperand(Expected));
     _mov(T_ebx, loOperand(Desired));
     _mov(T_ecx, hiOperand(Desired));
-    typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
+    X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
     constexpr bool Locked = true;
     _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
     auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
@@ -3622,18 +3610,18 @@
   }
   Variable *T_eax = makeReg(Ty, Eax);
   _mov(T_eax, Expected);
-  typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
+  X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
   Variable *DesiredReg = legalizeToReg(Desired);
   constexpr bool Locked = true;
   _cmpxchg(Addr, T_eax, DesiredReg, Locked);
   _mov(DestPrev, T_eax);
 }
 
-template <class Machine>
-bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
-                                                      Operand *PtrToMem,
-                                                      Operand *Expected,
-                                                      Operand *Desired) {
+template <typename TraitsType>
+bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
+                                                         Operand *PtrToMem,
+                                                         Operand *Expected,
+                                                         Operand *Desired) {
   if (Ctx->getFlags().getOptLevel() == Opt_m1)
     return false;
   // Peek ahead a few instructions and see how Dest is used.
@@ -3705,9 +3693,10 @@
   return false;
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
-                                            Operand *Ptr, Operand *Val) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerAtomicRMW(Variable *Dest,
+                                               uint32_t Operation, Operand *Ptr,
+                                               Operand *Val) {
   bool NeedsCmpxchg = false;
   LowerBinOp Op_Lo = nullptr;
   LowerBinOp Op_Hi = nullptr;
@@ -3720,12 +3709,11 @@
       // All the fall-through paths must set this to true, but use this
       // for asserting.
       NeedsCmpxchg = true;
-      Op_Lo = &TargetX86Base<Machine>::_add;
-      Op_Hi = &TargetX86Base<Machine>::_adc;
+      Op_Lo = &TargetX86Base<TraitsType>::_add;
+      Op_Hi = &TargetX86Base<TraitsType>::_adc;
       break;
     }
-    typename Traits::X86OperandMem *Addr =
-        formMemoryOperand(Ptr, Dest->getType());
+    X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
     constexpr bool Locked = true;
     Variable *T = nullptr;
     _mov(T, Val);
@@ -3736,12 +3724,11 @@
   case Intrinsics::AtomicSub: {
     if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
       NeedsCmpxchg = true;
-      Op_Lo = &TargetX86Base<Machine>::_sub;
-      Op_Hi = &TargetX86Base<Machine>::_sbb;
+      Op_Lo = &TargetX86Base<TraitsType>::_sub;
+      Op_Hi = &TargetX86Base<TraitsType>::_sbb;
       break;
     }
-    typename Traits::X86OperandMem *Addr =
-        formMemoryOperand(Ptr, Dest->getType());
+    X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
     constexpr bool Locked = true;
     Variable *T = nullptr;
     _mov(T, Val);
@@ -3757,18 +3744,18 @@
     // xadd is probably fine vs lock add for add, and xchg is fine
     // vs an atomic store.
     NeedsCmpxchg = true;
-    Op_Lo = &TargetX86Base<Machine>::_or;
-    Op_Hi = &TargetX86Base<Machine>::_or;
+    Op_Lo = &TargetX86Base<TraitsType>::_or;
+    Op_Hi = &TargetX86Base<TraitsType>::_or;
     break;
   case Intrinsics::AtomicAnd:
     NeedsCmpxchg = true;
-    Op_Lo = &TargetX86Base<Machine>::_and;
-    Op_Hi = &TargetX86Base<Machine>::_and;
+    Op_Lo = &TargetX86Base<TraitsType>::_and;
+    Op_Hi = &TargetX86Base<TraitsType>::_and;
     break;
   case Intrinsics::AtomicXor:
     NeedsCmpxchg = true;
-    Op_Lo = &TargetX86Base<Machine>::_xor;
-    Op_Hi = &TargetX86Base<Machine>::_xor;
+    Op_Lo = &TargetX86Base<TraitsType>::_xor;
+    Op_Hi = &TargetX86Base<TraitsType>::_xor;
     break;
   case Intrinsics::AtomicExchange:
     if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
@@ -3779,8 +3766,7 @@
       Op_Hi = nullptr;
       break;
     }
-    typename Traits::X86OperandMem *Addr =
-        formMemoryOperand(Ptr, Dest->getType());
+    X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
     Variable *T = nullptr;
     _mov(T, Val);
     _xchg(Addr, T);
@@ -3793,12 +3779,12 @@
   expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
-                                                      LowerBinOp Op_Hi,
-                                                      Variable *Dest,
-                                                      Operand *Ptr,
-                                                      Operand *Val) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
+                                                         LowerBinOp Op_Hi,
+                                                         Variable *Dest,
+                                                         Operand *Ptr,
+                                                         Operand *Val) {
   // Expand a more complex RMW operation as a cmpxchg loop:
   // For 64-bit:
   //   mov     eax, [ptr]
@@ -3828,13 +3814,12 @@
   if (!Traits::Is64Bit && Ty == IceType_i64) {
     Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
     Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
-    typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
+    X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
     _mov(T_eax, loOperand(Addr));
     _mov(T_edx, hiOperand(Addr));
     Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
     Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
-    typename Traits::Insts::Label *Label =
-        Traits::Insts::Label::create(Func, this);
+    InstX86Label *Label = InstX86Label::create(Func, this);
     const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
     if (!IsXchg8b) {
       Context.insert(Label);
@@ -3876,7 +3861,7 @@
     _mov(DestHi, T_edx);
     return;
   }
-  typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
+  X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
   int32_t Eax;
   switch (Ty) {
   default:
@@ -3896,7 +3881,7 @@
   }
   Variable *T_eax = makeReg(Ty, Eax);
   _mov(T_eax, Addr);
-  auto *Label = Context.insert<typename Traits::Insts::Label>(this);
+  auto *Label = Context.insert<InstX86Label>(this);
   // We want to pick a different register for T than Eax, so don't use
   // _mov(T == nullptr, T_eax).
   Variable *T = makeReg(Ty);
@@ -3920,10 +3905,11 @@
 ///
 /// We could do constant folding here, but that should have
 /// been done by the front-end/middle-end optimizations.
-template <class Machine>
-void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
-                                             Operand *FirstVal,
-                                             Operand *SecondVal) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerCountZeros(bool Cttz, Type Ty,
+                                                Variable *Dest,
+                                                Operand *FirstVal,
+                                                Operand *SecondVal) {
   // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
   // Then the instructions will handle the Val == 0 case much more simply
   // and won't require conversion from bit position to number of zeros.
@@ -3995,10 +3981,10 @@
   _mov(DestHi, Ctx->getConstantZero(IceType_i32));
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base,
-                                       Constant *Offset) {
-  auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest,
+                                          Variable *Base, Constant *Offset) {
+  auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
 
   if (isVectorType(Ty))
     _movp(Dest, Mem);
@@ -4008,10 +3994,10 @@
     _mov(Dest, Mem);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::typedStore(Type Ty, Variable *Value,
-                                        Variable *Base, Constant *Offset) {
-  auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::typedStore(Type Ty, Variable *Value,
+                                           Variable *Base, Constant *Offset) {
+  auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
 
   if (isVectorType(Ty))
     _storep(Value, Mem);
@@ -4021,9 +4007,9 @@
     _store(Value, Mem);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::copyMemory(Type Ty, Variable *Dest, Variable *Src,
-                                        int32_t OffsetAmt) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::copyMemory(Type Ty, Variable *Dest,
+                                           Variable *Src, int32_t OffsetAmt) {
   Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
   // TODO(ascull): this or add nullptr test to _movp, _movq
   Variable *Data = makeReg(Ty);
@@ -4032,9 +4018,9 @@
   typedStore(Ty, Data, Dest, Offset);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerMemcpy(Operand *Dest, Operand *Src,
-                                         Operand *Count) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerMemcpy(Operand *Dest, Operand *Src,
+                                            Operand *Count) {
   // There is a load and store for each chunk in the unroll
   constexpr uint32_t BytesPerStorep = 16;
 
@@ -4086,9 +4072,9 @@
   lowerCall(Call);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerMemmove(Operand *Dest, Operand *Src,
-                                          Operand *Count) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerMemmove(Operand *Dest, Operand *Src,
+                                             Operand *Count) {
   // There is a load and store for each chunk in the unroll
   constexpr uint32_t BytesPerStorep = 16;
 
@@ -4158,9 +4144,9 @@
   lowerCall(Call);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val,
-                                         Operand *Count) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerMemset(Operand *Dest, Operand *Val,
+                                            Operand *Count) {
   constexpr uint32_t BytesPerStorep = 16;
   constexpr uint32_t BytesPerStoreq = 8;
   constexpr uint32_t BytesPerStorei32 = 4;
@@ -4193,7 +4179,7 @@
       Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
 
       // TODO(ascull): is 64-bit better with vector or scalar movq?
-      auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
+      auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
       if (isVectorType(Ty)) {
         assert(VecReg != nullptr);
         _storep(VecReg, Mem);
@@ -4267,8 +4253,8 @@
   lowerCall(Call);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerIndirectJump(Variable *JumpTarget) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerIndirectJump(Variable *JumpTarget) {
   const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
   if (Traits::Is64Bit) {
     Variable *T = makeReg(IceType_i64);
@@ -4665,11 +4651,11 @@
 ///
 /// Also note that we don't need to add a bounds check to a dereference of a
 /// simple global variable address.
-template <class Machine>
-void TargetX86Base<Machine>::doMockBoundsCheck(Operand *Opnd) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::doMockBoundsCheck(Operand *Opnd) {
   if (!Ctx->getFlags().getMockBoundsCheck())
     return;
-  if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd)) {
+  if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd)) {
     if (Mem->getIndex()) {
       llvm::report_fatal_error("doMockBoundsCheck: Opnd contains index reg");
     }
@@ -4686,7 +4672,7 @@
   if (Var->getRegNum() == static_cast<int32_t>(getStackReg()))
     return;
 
-  auto *Label = Traits::Insts::Label::create(Func, this);
+  auto *Label = InstX86Label::create(Func, this);
   _cmp(Opnd, Ctx->getConstantZero(IceType_i32));
   _br(Traits::Cond::Br_e, Label);
   _cmp(Opnd, Ctx->getConstantInt32(1));
@@ -4694,12 +4680,12 @@
   Context.insert(Label);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) {
   // A Load instruction can be treated the same as an Assign instruction, after
-  // the source operand is transformed into an Traits::X86OperandMem operand.
+  // the source operand is transformed into an X86OperandMem operand.
   // Note that the address mode optimization already creates an
-  // Traits::X86OperandMem operand, so it doesn't need another level of
+  // X86OperandMem operand, so it doesn't need another level of
   // transformation.
   Variable *DestLoad = Load->getDest();
   Type Ty = DestLoad->getType();
@@ -4709,7 +4695,8 @@
   lowerAssign(Assign);
 }
 
-template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::doAddressOptLoad() {
   Inst *Inst = Context.getCur();
   Variable *Dest = Inst->getDest();
   Operand *Addr = Inst->getSrc(0);
@@ -4719,10 +4706,9 @@
   int32_t Offset = 0;
   // Vanilla ICE load instructions should not use the segment registers, and
   // computeAddressOpt only works at the level of Variables and Constants, not
-  // other Traits::X86OperandMem, so there should be no mention of segment
+  // other X86OperandMem, so there should be no mention of segment
   // registers there either.
-  const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
-      Traits::X86OperandMem::DefaultSegment;
+  const SegmentRegisters SegmentReg = X86OperandMem::DefaultSegment;
   auto *Base = llvm::dyn_cast<Variable>(Addr);
   if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) {
     Inst->setDeleted();
@@ -4734,28 +4720,28 @@
                                      Relocatable->getName(),
                                      Relocatable->getSuppressMangling());
     }
-    Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp,
-                                         Index, Shift, SegmentReg);
+    Addr = X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, Index,
+                                 Shift, SegmentReg);
     Context.insert<InstLoad>(Dest, Addr);
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::randomlyInsertNop(float Probability,
-                                               RandomNumberGenerator &RNG) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::randomlyInsertNop(float Probability,
+                                                  RandomNumberGenerator &RNG) {
   RandomNumberGeneratorWrapper RNGW(RNG);
   if (RNGW.getTrueWithProbability(Probability)) {
     _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS));
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Inst*/) {
   Func->setError("Phi found in regular instruction list");
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerSelect(const InstSelect *Select) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
   Variable *Dest = Select->getDest();
 
   if (isVectorType(Dest->getType())) {
@@ -4787,20 +4773,19 @@
   _cmp(CmpResult, Zero);
   Operand *SrcT = Select->getTrueOperand();
   Operand *SrcF = Select->getFalseOperand();
-  const typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne;
+  const BrCond Cond = Traits::Cond::Br_ne;
   lowerSelectMove(Dest, Cond, SrcT, SrcF);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest,
-                                             typename Traits::Cond::BrCond Cond,
-                                             Operand *SrcT, Operand *SrcF) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond,
+                                                Operand *SrcT, Operand *SrcF) {
   Type DestTy = Dest->getType();
   if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
     // The cmov instruction doesn't allow 8-bit or FP operands, so we need
     // explicit control flow.
     // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
-    auto *Label = Traits::Insts::Label::create(Func, this);
+    auto *Label = InstX86Label::create(Func, this);
     SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
     _mov(Dest, SrcT);
     _br(Cond, Label);
@@ -4815,7 +4800,7 @@
   // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
   if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
     std::swap(SrcT, SrcF);
-    Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
+    Cond = InstImpl<TraitsType>::InstX86Base::getOppositeCondition(Cond);
   }
   if (!Traits::Is64Bit && DestTy == IceType_i64) {
     SrcT = legalizeUndef(SrcT);
@@ -4834,10 +4819,10 @@
   lowerSelectIntMove(Dest, Cond, SrcT, SrcF);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerSelectIntMove(
-    Variable *Dest, typename Traits::Cond::BrCond Cond, Operand *SrcT,
-    Operand *SrcF) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerSelectIntMove(Variable *Dest, BrCond Cond,
+                                                   Operand *SrcT,
+                                                   Operand *SrcF) {
   Variable *T = nullptr;
   SrcF = legalize(SrcF);
   _mov(T, SrcF);
@@ -4846,9 +4831,9 @@
   _mov(Dest, T);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerMove(Variable *Dest, Operand *Src,
-                                       bool IsRedefinition) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerMove(Variable *Dest, Operand *Src,
+                                          bool IsRedefinition) {
   assert(Dest->getType() == Src->getType());
   assert(!Dest->isRematerializable());
   if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
@@ -4882,9 +4867,9 @@
   }
 }
 
-template <class Machine>
-bool TargetX86Base<Machine>::lowerOptimizeFcmpSelect(const InstFcmp *Fcmp,
-                                                     const InstSelect *Select) {
+template <typename TraitsType>
+bool TargetX86Base<TraitsType>::lowerOptimizeFcmpSelect(
+    const InstFcmp *Fcmp, const InstSelect *Select) {
   Operand *CmpSrc0 = Fcmp->getSrc(0);
   Operand *CmpSrc1 = Fcmp->getSrc(1);
   Operand *SelectSrcT = Select->getTrueOperand();
@@ -4911,8 +4896,8 @@
   return false;
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Icmp) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerIcmp(const InstIcmp *Icmp) {
   Variable *Dest = Icmp->getDest();
   if (isVectorType(Dest->getType())) {
     lowerIcmpVector(Icmp);
@@ -4922,8 +4907,8 @@
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerSelectVector(const InstSelect *Inst) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerSelectVector(const InstSelect *Inst) {
   Variable *Dest = Inst->getDest();
   Type DestTy = Dest->getType();
   Operand *SrcT = Inst->getTrueOperand();
@@ -4990,12 +4975,11 @@
   return;
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerStore(const InstStore *Inst) {
   Operand *Value = Inst->getData();
   Operand *Addr = Inst->getAddr();
-  typename Traits::X86OperandMem *NewAddr =
-      formMemoryOperand(Addr, Value->getType());
+  X86OperandMem *NewAddr = formMemoryOperand(Addr, Value->getType());
   doMockBoundsCheck(NewAddr);
   Type Ty = NewAddr->getType();
 
@@ -5003,10 +4987,8 @@
     Value = legalizeUndef(Value);
     Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
     Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
-    _store(ValueHi,
-           llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));
-    _store(ValueLo,
-           llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
+    _store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr)));
+    _store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr)));
   } else if (isVectorType(Ty)) {
     _storep(legalizeToReg(Value), NewAddr);
   } else {
@@ -5015,7 +4997,8 @@
   }
 }
 
-template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::doAddressOptStore() {
   auto *Inst = llvm::cast<InstStore>(Context.getCur());
   Operand *Data = Inst->getData();
   Operand *Addr = Inst->getAddr();
@@ -5026,10 +5009,9 @@
   auto *Base = llvm::dyn_cast<Variable>(Addr);
   // Vanilla ICE store instructions should not use the segment registers, and
   // computeAddressOpt only works at the level of Variables and Constants, not
-  // other Traits::X86OperandMem, so there should be no mention of segment
+  // other X86OperandMem, so there should be no mention of segment
   // registers there either.
-  const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
-      Traits::X86OperandMem::DefaultSegment;
+  const SegmentRegisters SegmentReg = X86OperandMem::DefaultSegment;
   if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) {
     Inst->setDeleted();
     Constant *OffsetOp = nullptr;
@@ -5040,17 +5022,17 @@
                                      Relocatable->getName(),
                                      Relocatable->getSuppressMangling());
     }
-    Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp,
-                                         Index, Shift, SegmentReg);
+    Addr = X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, Index,
+                                 Shift, SegmentReg);
     auto *NewStore = Context.insert<InstStore>(Data, Addr);
     if (Inst->getDest())
       NewStore->setRmwBeacon(Inst->getRmwBeacon());
   }
 }
 
-template <class Machine>
-Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison,
-                                               uint64_t Min, uint64_t Max) {
+template <typename TraitsType>
+Operand *TargetX86Base<TraitsType>::lowerCmpRange(Operand *Comparison,
+                                                  uint64_t Min, uint64_t Max) {
   // TODO(ascull): 64-bit should not reach here but only because it is not
   // implemented yet. This should be able to handle the 64-bit case.
   assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
@@ -5068,19 +5050,20 @@
   return Comparison;
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case,
-                                              Operand *Comparison, bool DoneCmp,
-                                              CfgNode *DefaultTarget) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerCaseCluster(const CaseCluster &Case,
+                                                 Operand *Comparison,
+                                                 bool DoneCmp,
+                                                 CfgNode *DefaultTarget) {
   switch (Case.getKind()) {
   case CaseCluster::JumpTable: {
-    typename Traits::Insts::Label *SkipJumpTable;
+    InstX86Label *SkipJumpTable;
 
     Operand *RangeIndex =
         lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
     if (DefaultTarget == nullptr) {
       // Skip over jump table logic if comparison not in range and no default
-      SkipJumpTable = Traits::Insts::Label::create(Func, this);
+      SkipJumpTable = InstX86Label::create(Func, this);
       _br(Traits::Cond::Br_a, SkipJumpTable);
     } else {
       _br(Traits::Cond::Br_a, DefaultTarget);
@@ -5107,7 +5090,7 @@
     Constant *Offset = nullptr;
     uint16_t Shift = typeWidthInBytesLog2(getPointerType());
     // TODO(ascull): remove need for legalize by allowing null base in memop
-    auto *TargetInMemory = Traits::X86OperandMem::create(
+    auto *TargetInMemory = X86OperandMem::create(
         Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift);
     Variable *Target = nullptr;
     _mov(Target, TargetInMemory);
@@ -5143,8 +5126,8 @@
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerSwitch(const InstSwitch *Inst) {
   // Group cases together and navigate through them with a binary search
   CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
   Operand *Src0 = Inst->getComparison();
@@ -5170,8 +5153,7 @@
       for (SizeT I = 0; I < NumCases; ++I) {
         Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
         Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
-        typename Traits::Insts::Label *Label =
-            Traits::Insts::Label::create(Func, this);
+        InstX86Label *Label = InstX86Label::create(Func, this);
         _cmp(Src0Lo, ValueLo);
         _br(Traits::Cond::Br_ne, Label);
         _cmp(Src0Hi, ValueHi);
@@ -5206,12 +5188,12 @@
 
   // A span is over the clusters
   struct SearchSpan {
-    SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label)
+    SearchSpan(SizeT Begin, SizeT Size, InstX86Label *Label)
         : Begin(Begin), Size(Size), Label(Label) {}
 
     SizeT Begin;
     SizeT Size;
-    typename Traits::Insts::Label *Label;
+    InstX86Label *Label;
   };
   // The stack will only grow to the height of the tree so 12 should be plenty
   std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack;
@@ -5265,11 +5247,10 @@
       SizeT PivotIndex = Span.Begin + (Span.Size / 2);
       const CaseCluster &Pivot = CaseClusters[PivotIndex];
       Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
-      typename Traits::Insts::Label *Label =
-          Traits::Insts::Label::create(Func, this);
+      InstX86Label *Label = InstX86Label::create(Func, this);
       _cmp(Comparison, Value);
       // TODO(ascull): does it alway have to be far?
-      _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far);
+      _br(Traits::Cond::Br_b, Label, InstX86Br::Far);
       // Lower the left and (pivot+right) sides, falling through to the right
       SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
       SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
@@ -5281,10 +5262,11 @@
   _br(DefaultTarget);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
-                                                 Variable *Dest, Operand *Src0,
-                                                 Operand *Src1) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
+                                                    Variable *Dest,
+                                                    Operand *Src0,
+                                                    Operand *Src1) {
   assert(isVectorType(Dest->getType()));
   Type Ty = Dest->getType();
   Type ElementTy = typeElementType(Ty);
@@ -5323,8 +5305,8 @@
 /// We can eliminate the sext operation by copying the result of pcmpeqd,
 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the
 /// sext operation.
-template <class Machine>
-void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::eliminateNextVectorSextInstruction(
     Variable *SignExtendedResult) {
   if (auto *NextCast =
           llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
@@ -5338,8 +5320,8 @@
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerUnreachable(
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerUnreachable(
     const InstUnreachable * /*Inst*/) {
   _ud2();
   // Add a fake use of esp to make sure esp adjustments after the unreachable
@@ -5347,9 +5329,8 @@
   keepEspLiveAtExit();
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerRMW(
-    const typename Traits::Insts::FakeRMW *RMW) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerRMW(const InstX86FakeRMW *RMW) {
   // If the beacon variable's live range does not end in this instruction, then
   // it must end in the modified Store instruction that follows. This means
   // that the original Store instruction is still there, either because the
@@ -5360,16 +5341,14 @@
     return;
   Operand *Src = RMW->getData();
   Type Ty = Src->getType();
-  typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
+  X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
   doMockBoundsCheck(Addr);
   if (!Traits::Is64Bit && Ty == IceType_i64) {
     Src = legalizeUndef(Src);
     Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
     Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
-    typename Traits::X86OperandMem *AddrLo =
-        llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));
-    typename Traits::X86OperandMem *AddrHi =
-        llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
+    X86OperandMem *AddrLo = llvm::cast<X86OperandMem>(loOperand(Addr));
+    X86OperandMem *AddrHi = llvm::cast<X86OperandMem>(hiOperand(Addr));
     switch (RMW->getOp()) {
     default:
       // TODO(stichnot): Implement other arithmetic operators.
@@ -5427,10 +5406,9 @@
   llvm::report_fatal_error("Couldn't lower RMW instruction");
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
-  if (const auto *RMW =
-          llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::lowerOther(const Inst *Instr) {
+  if (const auto *RMW = llvm::dyn_cast<InstX86FakeRMW>(Instr)) {
     lowerRMW(RMW);
   } else {
     TargetLowering::lowerOther(Instr);
@@ -5440,7 +5418,7 @@
 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
 /// integrity of liveness analysis. Undef values are also turned into zeroes,
 /// since loOperand() and hiOperand() don't expect Undef input.
-template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
+template <typename TraitsType> void TargetX86Base<TraitsType>::prelowerPhis() {
   if (Traits::Is64Bit) {
     // On x86-64 we don't need to prelower phis -- the architecture can handle
     // 64-bit integer natively.
@@ -5450,12 +5428,12 @@
   // Pause constant blinding or pooling, blinding or pooling will be done later
   // during phi lowering assignments
   BoolFlagSaver B(RandomizationPoolingPaused, true);
-  PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
+  PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>(
       this, Context.getNode(), Func);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::genTargetHelperCallFor(Inst *Instr) {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
   uint32_t StackArgumentsSize = 0;
   if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
     const char *HelperName = nullptr;
@@ -5680,8 +5658,8 @@
   updateMaxOutArgsSizeBytes(StackArgumentsSize);
 }
 
-template <class Machine>
-uint32_t TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(
+template <typename TraitsType>
+uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
     const std::vector<Type> &ArgTypes, Type ReturnType) {
   uint32_t OutArgumentsSizeBytes = 0;
   uint32_t XmmArgCount = 0;
@@ -5715,9 +5693,9 @@
   return OutArgumentsSizeBytes;
 }
 
-template <class Machine>
-uint32_t
-TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) {
+template <typename TraitsType>
+uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
+    const InstCall *Instr) {
   // Build a vector of the arguments' types.
   std::vector<Type> ArgTypes;
   for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
@@ -5732,8 +5710,9 @@
   return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
 }
 
-template <class Machine>
-Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) {
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty,
+                                                        int32_t RegNum) {
   Variable *Reg = makeReg(Ty, RegNum);
   switch (Ty) {
   case IceType_i1:
@@ -5766,14 +5745,15 @@
 // TODO(wala): Add limited support for vector constants so that complex
 // initialization in registers is unnecessary.
 
-template <class Machine>
-Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::makeVectorOfZeros(Type Ty,
+                                                       int32_t RegNum) {
   return makeZeroedRegister(Ty, RegNum);
 }
 
-template <class Machine>
-Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,
-                                                        int32_t RegNum) {
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty,
+                                                           int32_t RegNum) {
   Variable *MinusOnes = makeReg(Ty, RegNum);
   // Insert a FakeDef so the live range of MinusOnes is not overestimated.
   Context.insert<InstFakeDef>(MinusOnes);
@@ -5781,17 +5761,17 @@
   return MinusOnes;
 }
 
-template <class Machine>
-Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) {
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) {
   Variable *Dest = makeVectorOfZeros(Ty, RegNum);
   Variable *MinusOne = makeVectorOfMinusOnes(Ty);
   _psub(Dest, MinusOne);
   return Dest;
 }
 
-template <class Machine>
-Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty,
-                                                            int32_t RegNum) {
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::makeVectorOfHighOrderBits(Type Ty,
+                                                               int32_t RegNum) {
   assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
          Ty == IceType_v16i8);
   if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
@@ -5817,18 +5797,18 @@
 /// ones logically right shifted one bit.
 // TODO(stichnot): Fix the wala
 // TODO: above, to represent vector constants in memory.
-template <class Machine>
-Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
-                                                       int32_t RegNum) {
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::makeVectorOfFabsMask(Type Ty,
+                                                          int32_t RegNum) {
   Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
   _psrl(Reg, Ctx->getConstantInt8(1));
   return Reg;
 }
 
-template <class Machine>
-typename TargetX86Base<Machine>::Traits::X86OperandMem *
-TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
-                                                     uint32_t Offset) {
+template <typename TraitsType>
+typename TargetX86Base<TraitsType>::X86OperandMem *
+TargetX86Base<TraitsType>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
+                                                        uint32_t Offset) {
   // Ensure that Loc is a stack slot.
   assert(Slot->mustNotHaveReg());
   assert(Slot->getRegNum() == Variable::NoRegister);
@@ -5840,7 +5820,7 @@
   Variable *Loc = makeReg(PointerType);
   _lea(Loc, Slot);
   Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
-  return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
+  return X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
 }
 
 /// Lowering helper to copy a scalar integer source operand into some 8-bit GPR.
@@ -5871,8 +5851,8 @@
 /// Variable with the RCX86_IsTrunc8Rcvr register class.  As such, this helper
 /// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument
 /// to the pinsrb instruction.
-template <class Machine>
-Variable *TargetX86Base<Machine>::copyToReg8(Operand *Src, int32_t RegNum) {
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::copyToReg8(Operand *Src, int32_t RegNum) {
   Type Ty = Src->getType();
   assert(isScalarIntegerType(Ty));
   assert(Ty != IceType_i1);
@@ -5906,8 +5886,8 @@
 
 /// Helper for legalize() to emit the right code to lower an operand to a
 /// register of the appropriate type.
-template <class Machine>
-Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::copyToReg(Operand *Src, int32_t RegNum) {
   Type Ty = Src->getType();
   Variable *Reg = makeReg(Ty, RegNum);
   if (isVectorType(Ty)) {
@@ -5918,9 +5898,9 @@
   return Reg;
 }
 
-template <class Machine>
-Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
-                                          int32_t RegNum) {
+template <typename TraitsType>
+Operand *TargetX86Base<TraitsType>::legalize(Operand *From, LegalMask Allowed,
+                                             int32_t RegNum) {
   Type Ty = From->getType();
   // Assert that a physical register is allowed. To date, all calls to
   // legalize() allow a physical register. If a physical register needs to be
@@ -5950,7 +5930,7 @@
     }
   }
 
-  if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
+  if (auto *Mem = llvm::dyn_cast<X86OperandMem>(From)) {
     // Before doing anything with a Mem operand, we need to ensure that the
     // Base and Index components are in physical registers.
     Variable *Base = Mem->getBase();
@@ -5966,9 +5946,8 @@
           legalize(Index, Legal_Reg | Legal_Rematerializable));
     }
     if (Base != RegBase || Index != RegIndex) {
-      Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(),
-                                          RegIndex, Mem->getShift(),
-                                          Mem->getSegmentRegister());
+      Mem = X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex,
+                                  Mem->getShift(), Mem->getSegmentRegister());
     }
 
     // For all Memory Operands, we do randomization/pooling here
@@ -6025,7 +6004,7 @@
       llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);
       llvm::cast<Constant>(From)->setShouldBePooled(true);
       Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
-      From = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
+      From = X86OperandMem::create(Func, Ty, Base, Offset);
     }
     bool NeedsReg = false;
     if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
@@ -6057,7 +6036,7 @@
       // Since Var is rematerializable, the offset will be added when the lea is
       // emitted.
       constexpr Constant *NoOffset = nullptr;
-      auto *Mem = Traits::X86OperandMem::create(Func, Ty, Var, NoOffset);
+      auto *Mem = X86OperandMem::create(Func, Ty, Var, NoOffset);
       _lea(NewVar, Mem);
       From = NewVar;
     } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
@@ -6072,14 +6051,16 @@
 }
 
 /// Provide a trivial wrapper to legalize() for this common usage.
-template <class Machine>
-Variable *TargetX86Base<Machine>::legalizeToReg(Operand *From, int32_t RegNum) {
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::legalizeToReg(Operand *From,
+                                                   int32_t RegNum) {
   return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
 }
 
 /// Legalize undef values to concrete values.
-template <class Machine>
-Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) {
+template <typename TraitsType>
+Operand *TargetX86Base<TraitsType>::legalizeUndef(Operand *From,
+                                                  int32_t RegNum) {
   Type Ty = From->getType();
   if (llvm::isa<ConstantUndef>(From)) {
     // Lower undefs to zero.  Another option is to lower undefs to an
@@ -6105,9 +6086,9 @@
 /// copied into a physical register. (Actually, either Src0 or Src1 can be
 /// chosen for the physical register, but unfortunately we have to commit to one
 /// or the other before register allocation.)
-template <class Machine>
-Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
-                                                    Operand *Src1) {
+template <typename TraitsType>
+Operand *TargetX86Base<TraitsType>::legalizeSrc0ForCmp(Operand *Src0,
+                                                       Operand *Src1) {
   bool IsSrc1ImmOrReg = false;
   if (llvm::isa<Constant>(Src1)) {
     IsSrc1ImmOrReg = true;
@@ -6118,13 +6099,13 @@
   return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
 }
 
-template <class Machine>
-typename TargetX86Base<Machine>::Traits::X86OperandMem *
-TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,
-                                          bool DoLegalize) {
-  auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd);
+template <typename TraitsType>
+typename TargetX86Base<TraitsType>::X86OperandMem *
+TargetX86Base<TraitsType>::formMemoryOperand(Operand *Opnd, Type Ty,
+                                             bool DoLegalize) {
+  auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd);
   // It may be the case that address mode optimization already creates an
-  // Traits::X86OperandMem, so in that case it wouldn't need another level of
+  // X86OperandMem, so in that case it wouldn't need another level of
   // transformation.
   if (!Mem) {
     auto *Base = llvm::dyn_cast<Variable>(Opnd);
@@ -6144,16 +6125,16 @@
       assert(llvm::isa<ConstantInteger32>(Offset) ||
              llvm::isa<ConstantRelocatable>(Offset));
     }
-    Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
+    Mem = X86OperandMem::create(Func, Ty, Base, Offset);
   }
   // Do legalization, which contains randomization/pooling or do
   // randomization/pooling.
-  return llvm::cast<typename Traits::X86OperandMem>(
-      DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
+  return llvm::cast<X86OperandMem>(DoLegalize ? legalize(Mem)
+                                              : randomizeOrPoolImmediate(Mem));
 }
 
-template <class Machine>
-Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
+template <typename TraitsType>
+Variable *TargetX86Base<TraitsType>::makeReg(Type Type, int32_t RegNum) {
   // There aren't any 64-bit integer registers for x86-32.
   assert(Traits::Is64Bit || Type != IceType_i64);
   Variable *Reg = Func->makeVariable(Type);
@@ -6164,12 +6145,12 @@
   return Reg;
 }
 
-template <class Machine>
-const Type TargetX86Base<Machine>::TypeForSize[] = {
+template <typename TraitsType>
+const Type TargetX86Base<TraitsType>::TypeForSize[] = {
     IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8};
-template <class Machine>
-Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size,
-                                               uint32_t MaxSize) {
+template <typename TraitsType>
+Type TargetX86Base<TraitsType>::largestTypeInSize(uint32_t Size,
+                                                  uint32_t MaxSize) {
   assert(Size != 0);
   uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
   uint32_t MaxIndex = MaxSize == NoSizeLimit
@@ -6178,9 +6159,9 @@
   return TypeForSize[std::min(TyIndex, MaxIndex)];
 }
 
-template <class Machine>
-Type TargetX86Base<Machine>::firstTypeThatFitsSize(uint32_t Size,
-                                                   uint32_t MaxSize) {
+template <typename TraitsType>
+Type TargetX86Base<TraitsType>::firstTypeThatFitsSize(uint32_t Size,
+                                                      uint32_t MaxSize) {
   assert(Size != 0);
   uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
   if (!llvm::isPowerOf2_32(Size))
@@ -6191,31 +6172,31 @@
   return TypeForSize[std::min(TyIndex, MaxIndex)];
 }
 
-template <class Machine> void TargetX86Base<Machine>::postLower() {
+template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() {
   if (Ctx->getFlags().getOptLevel() == Opt_m1)
     return;
   markRedefinitions();
   Context.availabilityUpdate();
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::makeRandomRegisterPermutation(
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::makeRandomRegisterPermutation(
     llvm::SmallVectorImpl<int32_t> &Permutation,
     const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
   Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation,
                                         ExcludeRegisters, Salt);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::emit(const ConstantInteger32 *C) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Ctx->getStrEmit();
   Str << getConstantPrefix() << C->getValue();
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::emit(const ConstantInteger64 *C) const {
   if (!Traits::Is64Bit) {
     llvm::report_fatal_error("Not expecting to emit 64-bit integers");
   } else {
@@ -6226,31 +6207,32 @@
   }
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::emit(const ConstantFloat *C) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Ctx->getStrEmit();
   C->emitPoolLabel(Str, Ctx);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::emit(const ConstantDouble *C) const {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::emit(const ConstantDouble *C) const {
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Ctx->getStrEmit();
   C->emitPoolLabel(Str, Ctx);
 }
 
-template <class Machine>
-void TargetX86Base<Machine>::emit(const ConstantUndef *) const {
+template <typename TraitsType>
+void TargetX86Base<TraitsType>::emit(const ConstantUndef *) const {
   llvm::report_fatal_error("undef value encountered by emitter.");
 }
 
 /// Randomize or pool an Immediate.
-template <class Machine>
-Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
-                                                          int32_t RegNum) {
+template <typename TraitsType>
+Operand *
+TargetX86Base<TraitsType>::randomizeOrPoolImmediate(Constant *Immediate,
+                                                    int32_t RegNum) {
   assert(llvm::isa<ConstantInteger32>(Immediate) ||
          llvm::isa<ConstantRelocatable>(Immediate));
   if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
@@ -6280,8 +6262,8 @@
       uint32_t Cookie = Func->getConstantBlindingCookie();
       _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
       Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
-      _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,
-                                              nullptr, 0));
+      _lea(Reg,
+           X86OperandMem::create(Func, IceType_i32, Reg, Offset, nullptr, 0));
       if (Immediate->getType() != IceType_i32) {
         Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
         _mov(TruncReg, Reg);
@@ -6310,9 +6292,8 @@
       constexpr bool SuppressMangling = true;
       Constant *Symbol =
           Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
-      typename Traits::X86OperandMem *MemOperand =
-          Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,
-                                        Symbol);
+      X86OperandMem *MemOperand =
+          X86OperandMem::create(Func, Immediate->getType(), nullptr, Symbol);
       _mov(Reg, MemOperand);
       return Reg;
     }
@@ -6322,10 +6303,10 @@
   return Immediate;
 }
 
-template <class Machine>
-typename TargetX86Base<Machine>::Traits::X86OperandMem *
-TargetX86Base<Machine>::randomizeOrPoolImmediate(
-    typename Traits::X86OperandMem *MemOperand, int32_t RegNum) {
+template <typename TraitsType>
+typename TargetX86Base<TraitsType>::X86OperandMem *
+TargetX86Base<TraitsType>::randomizeOrPoolImmediate(X86OperandMem *MemOperand,
+                                                    int32_t RegNum) {
   assert(MemOperand);
   if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
       RandomizationPoolingPaused == true) {
@@ -6359,9 +6340,8 @@
         Constant *Mask2 =
             Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
 
-        typename Traits::X86OperandMem *TempMemOperand =
-            Traits::X86OperandMem::create(Func, MemOperand->getType(),
-                                          MemOperand->getBase(), Mask1);
+        X86OperandMem *TempMemOperand = X86OperandMem::create(
+            Func, MemOperand->getType(), MemOperand->getBase(), Mask1);
         // If we have already assigned a physical register, we must come from
         // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
         // the assigned register as this assignment is that start of its
@@ -6369,11 +6349,9 @@
         Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
         _lea(RegTemp, TempMemOperand);
 
-        typename Traits::X86OperandMem *NewMemOperand =
-            Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
-                                          Mask2, MemOperand->getIndex(),
-                                          MemOperand->getShift(),
-                                          MemOperand->getSegmentRegister());
+        X86OperandMem *NewMemOperand = X86OperandMem::create(
+            Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(),
+            MemOperand->getShift(), MemOperand->getSegmentRegister());
 
         // Label this memory operand as randomized, so we won't randomize it
         // again in case we call legalize() multiple times on this memory
@@ -6407,25 +6385,22 @@
         constexpr bool SuppressMangling = true;
         Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
                                                SuppressMangling);
-        typename Traits::X86OperandMem *SymbolOperand =
-            Traits::X86OperandMem::create(
-                Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
+        X86OperandMem *SymbolOperand = X86OperandMem::create(
+            Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
         _mov(RegTemp, SymbolOperand);
         // If we have a base variable here, we should add the lea instruction
         // to add the value of the base variable to RegTemp. If there is no
         // base variable, we won't need this lea instruction.
         if (MemOperand->getBase()) {
-          typename Traits::X86OperandMem *CalculateOperand =
-              Traits::X86OperandMem::create(
-                  Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
-                  RegTemp, 0, MemOperand->getSegmentRegister());
+          X86OperandMem *CalculateOperand = X86OperandMem::create(
+              Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
+              RegTemp, 0, MemOperand->getSegmentRegister());
           _lea(RegTemp, CalculateOperand);
         }
-        typename Traits::X86OperandMem *NewMemOperand =
-            Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
-                                          nullptr, MemOperand->getIndex(),
-                                          MemOperand->getShift(),
-                                          MemOperand->getSegmentRegister());
+        X86OperandMem *NewMemOperand = X86OperandMem::create(
+            Func, MemOperand->getType(), RegTemp, nullptr,
+            MemOperand->getIndex(), MemOperand->getShift(),
+            MemOperand->getSegmentRegister());
         return NewMemOperand;
       }
       assert("Unsupported -randomize-pool-immediates option" && false);
@@ -6436,7 +6411,7 @@
   return MemOperand;
 }
 
-} // end of namespace X86Internal
+} // end of namespace X86NAMESPACE
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
diff --git a/src/IceTargetLoweringX86RegClass.h b/src/IceTargetLoweringX86RegClass.h
index 5ffb975..3aee63b 100644
--- a/src/IceTargetLoweringX86RegClass.h
+++ b/src/IceTargetLoweringX86RegClass.h
@@ -18,7 +18,7 @@
 #include "IceOperand.h" // RC_Target
 
 namespace Ice {
-namespace X86Internal {
+namespace X86 {
 
 // Extend enum RegClass with x86-specific register classes.
 enum RegClassX86 : uint8_t {
@@ -30,7 +30,7 @@
   RCX86_NUM
 };
 
-} // end of namespace X86Internal
+} // end of namespace X86
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86REGCLASS_H
diff --git a/unittest/AssemblerX8632/TestUtil.h b/unittest/AssemblerX8632/TestUtil.h
index 6adf27b..a22c637 100644
--- a/unittest/AssemblerX8632/TestUtil.h
+++ b/unittest/AssemblerX8632/TestUtil.h
@@ -15,6 +15,7 @@
 #define ASSEMBLERX8632_TESTUTIL_H_
 
 #include "IceAssemblerX8632.h"
+#include "IceDefs.h"
 
 #include "gtest/gtest.h"
 
@@ -31,14 +32,14 @@
   using Cond = AssemblerX8632::Traits::Cond;
   using GPRRegister = AssemblerX8632::Traits::GPRRegister;
   using ByteRegister = AssemblerX8632::Traits::ByteRegister;
-  using Label = ::Ice::X86Internal::Label;
+  using Label = ::Ice::X8632::Label;
   using Traits = AssemblerX8632::Traits;
   using XmmRegister = AssemblerX8632::Traits::XmmRegister;
   using X87STRegister = AssemblerX8632::Traits::X87STRegister;
 
   AssemblerX8632TestBase() { reset(); }
 
-  void reset() { Assembler.reset(new AssemblerX8632()); }
+  void reset() { Assembler = makeUnique<AssemblerX8632>(); }
 
   AssemblerX8632 *assembler() const { return Assembler.get(); }
 
diff --git a/unittest/AssemblerX8664/TestUtil.h b/unittest/AssemblerX8664/TestUtil.h
index 2f3b070..1c68131 100644
--- a/unittest/AssemblerX8664/TestUtil.h
+++ b/unittest/AssemblerX8664/TestUtil.h
@@ -135,7 +135,7 @@
 
   AssemblerX8664TestBase() { reset(); }
 
-  void reset() { Assembler.reset(new AssemblerX8664()); }
+  void reset() { Assembler = makeUnique<AssemblerX8664>(); }
 
   AssemblerX8664 *assembler() const { return Assembler.get(); }