Move X8632-specific Assembler stuff to Machine Traits.

As part of the refactoring moves the MachineTraits<TargetX8632> to a separate header.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/1216033004.
diff --git a/Makefile.standalone b/Makefile.standalone
index ad8889c..b4c33b7 100644
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -175,7 +175,6 @@
 
 SRCS = \
 	IceAssembler.cpp \
-	IceAssemblerX8632.cpp \
 	IceAssemblerX8664.cpp \
 	IceBrowserCompileServer.cpp \
 	IceCfg.cpp \
diff --git a/src/IceAssemblerX8632.cpp b/src/IceAssemblerX8632.cpp
deleted file mode 100644
index 7d217cf..0000000
--- a/src/IceAssemblerX8632.cpp
+++ /dev/null
@@ -1,2558 +0,0 @@
-//===- subzero/src/IceAssemblerX8632.cpp - Assembler for x86-32  ----------===//
-// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
-// for details. All rights reserved. Use of this source code is governed by a
-// BSD-style license that can be found in the LICENSE file.
-//
-// Modified by the Subzero authors.
-//
-//===----------------------------------------------------------------------===//
-//
-//                        The Subzero Code Generator
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Assembler class for x86-32.
-//
-//===----------------------------------------------------------------------===//
-
-#include "IceAssemblerX8632.h"
-
-#include "IceCfg.h"
-#include "IceOperand.h"
-
-namespace Ice {
-namespace X8632 {
-
-Address Address::ofConstPool(Assembler *Asm, const Constant *Imm) {
-  AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Imm);
-  const RelocOffsetT Offset = 0;
-  return Address::Absolute(Offset, Fixup);
-}
-
-AssemblerX8632::~AssemblerX8632() {
-  if (BuildDefs::asserts()) {
-    for (const Label *Label : CfgNodeLabels) {
-      Label->FinalCheck();
-    }
-    for (const Label *Label : LocalLabels) {
-      Label->FinalCheck();
-    }
-  }
-}
-
-void AssemblerX8632::alignFunction() {
-  SizeT Align = 1 << getBundleAlignLog2Bytes();
-  SizeT BytesNeeded = Utils::OffsetToAlignment(Buffer.getPosition(), Align);
-  const SizeT HltSize = 1;
-  while (BytesNeeded > 0) {
-    hlt();
-    BytesNeeded -= HltSize;
-  }
-}
-
-Label *AssemblerX8632::GetOrCreateLabel(SizeT Number, LabelVector &Labels) {
-  Label *L = nullptr;
-  if (Number == Labels.size()) {
-    L = new (this->allocate<Label>()) Label();
-    Labels.push_back(L);
-    return L;
-  }
-  if (Number > Labels.size()) {
-    Labels.resize(Number + 1);
-  }
-  L = Labels[Number];
-  if (!L) {
-    L = new (this->allocate<Label>()) Label();
-    Labels[Number] = L;
-  }
-  return L;
-}
-
-Label *AssemblerX8632::GetOrCreateCfgNodeLabel(SizeT NodeNumber) {
-  return GetOrCreateLabel(NodeNumber, CfgNodeLabels);
-}
-
-Label *AssemblerX8632::GetOrCreateLocalLabel(SizeT Number) {
-  return GetOrCreateLabel(Number, LocalLabels);
-}
-
-void AssemblerX8632::bindCfgNodeLabel(SizeT NodeNumber) {
-  assert(!getPreliminary());
-  Label *L = GetOrCreateCfgNodeLabel(NodeNumber);
-  this->bind(L);
-}
-
-void AssemblerX8632::BindLocalLabel(SizeT Number) {
-  Label *L = GetOrCreateLocalLabel(Number);
-  if (!getPreliminary())
-    this->bind(L);
-}
-
-void AssemblerX8632::call(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xFF);
-  emitRegisterOperand(2, reg);
-}
-
-void AssemblerX8632::call(const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xFF);
-  emitOperand(2, address);
-}
-
-void AssemblerX8632::call(const ConstantRelocatable *label) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  intptr_t call_start = Buffer.getPosition();
-  emitUint8(0xE8);
-  emitFixup(this->createFixup(llvm::ELF::R_386_PC32, label));
-  emitInt32(-4);
-  assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
-  (void)call_start;
-}
-
-void AssemblerX8632::call(const Immediate &abs_address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  intptr_t call_start = Buffer.getPosition();
-  emitUint8(0xE8);
-  emitFixup(
-      this->createFixup(llvm::ELF::R_386_PC32, AssemblerFixup::NullSymbol));
-  emitInt32(abs_address.value() - 4);
-  assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
-  (void)call_start;
-}
-
-void AssemblerX8632::pushl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x50 + reg);
-}
-
-void AssemblerX8632::popl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x58 + reg);
-}
-
-void AssemblerX8632::popl(const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x8F);
-  emitOperand(0, address);
-}
-
-void AssemblerX8632::pushal() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x60);
-}
-
-void AssemblerX8632::popal() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x61);
-}
-
-void AssemblerX8632::setcc(CondX86::BrCond condition, ByteRegister dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x90 + condition);
-  emitUint8(0xC0 + dst);
-}
-
-void AssemblerX8632::setcc(CondX86::BrCond condition, const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x90 + condition);
-  emitOperand(0, address);
-}
-
-void AssemblerX8632::mov(Type Ty, GPRRegister dst, const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (isByteSizedType(Ty)) {
-    emitUint8(0xB0 + dst);
-    emitUint8(imm.value() & 0xFF);
-    return;
-  }
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0xB8 + dst);
-  emitImmediate(Ty, imm);
-}
-
-void AssemblerX8632::mov(Type Ty, GPRRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedType(Ty)) {
-    emitUint8(0x88);
-  } else {
-    emitUint8(0x89);
-  }
-  emitRegisterOperand(src, dst);
-}
-
-void AssemblerX8632::mov(Type Ty, GPRRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedType(Ty)) {
-    emitUint8(0x8A);
-  } else {
-    emitUint8(0x8B);
-  }
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::mov(Type Ty, const Address &dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedType(Ty)) {
-    emitUint8(0x88);
-  } else {
-    emitUint8(0x89);
-  }
-  emitOperand(src, dst);
-}
-
-void AssemblerX8632::mov(Type Ty, const Address &dst, const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedType(Ty)) {
-    emitUint8(0xC6);
-    emitOperand(0, dst);
-    emitUint8(imm.value() & 0xFF);
-  } else {
-    emitUint8(0xC7);
-    emitOperand(0, dst);
-    emitImmediate(Ty, imm);
-  }
-}
-
-void AssemblerX8632::movzx(Type SrcTy, GPRRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  bool ByteSized = isByteSizedType(SrcTy);
-  assert(ByteSized || SrcTy == IceType_i16);
-  emitUint8(0x0F);
-  emitUint8(ByteSized ? 0xB6 : 0xB7);
-  emitRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::movzx(Type SrcTy, GPRRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  bool ByteSized = isByteSizedType(SrcTy);
-  assert(ByteSized || SrcTy == IceType_i16);
-  emitUint8(0x0F);
-  emitUint8(ByteSized ? 0xB6 : 0xB7);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::movsx(Type SrcTy, GPRRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  bool ByteSized = isByteSizedType(SrcTy);
-  assert(ByteSized || SrcTy == IceType_i16);
-  emitUint8(0x0F);
-  emitUint8(ByteSized ? 0xBE : 0xBF);
-  emitRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::movsx(Type SrcTy, GPRRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  bool ByteSized = isByteSizedType(SrcTy);
-  assert(ByteSized || SrcTy == IceType_i16);
-  emitUint8(0x0F);
-  emitUint8(ByteSized ? 0xBE : 0xBF);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::lea(Type Ty, GPRRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x8D);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst,
-                          GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  else
-    assert(Ty == IceType_i32);
-  emitUint8(0x0F);
-  emitUint8(0x40 + cond);
-  emitRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst,
-                          const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  else
-    assert(Ty == IceType_i32);
-  emitUint8(0x0F);
-  emitUint8(0x40 + cond);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::rep_movsb() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF3);
-  emitUint8(0xA4);
-}
-
-void AssemblerX8632::movss(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x10);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::movss(Type Ty, const Address &dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x11);
-  emitOperand(src, dst);
-}
-
-void AssemblerX8632::movss(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x11);
-  emitXmmRegisterOperand(src, dst);
-}
-
-void AssemblerX8632::movd(XmmRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x6E);
-  emitRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::movd(XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x6E);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::movd(GPRRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x7E);
-  emitRegisterOperand(src, dst);
-}
-
-void AssemblerX8632::movd(const Address &dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x7E);
-  emitOperand(src, dst);
-}
-
-void AssemblerX8632::movq(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF3);
-  emitUint8(0x0F);
-  emitUint8(0x7E);
-  emitRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::movq(const Address &dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xD6);
-  emitOperand(src, dst);
-}
-
-void AssemblerX8632::movq(XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF3);
-  emitUint8(0x0F);
-  emitUint8(0x7E);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::addss(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x58);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::addss(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x58);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::subss(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x5C);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::subss(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x5C);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::mulss(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x59);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::mulss(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x59);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::divss(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x5E);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::divss(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x5E);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::fld(Type Ty, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
-  emitOperand(0, src);
-}
-
-void AssemblerX8632::fstp(Type Ty, const Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
-  emitOperand(3, dst);
-}
-
-void AssemblerX8632::fstp(X87STRegister st) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDD);
-  emitUint8(0xD8 + st);
-}
-
-void AssemblerX8632::movaps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x28);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::movups(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x10);
-  emitRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::movups(XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x10);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::movups(const Address &dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x11);
-  emitOperand(src, dst);
-}
-
-void AssemblerX8632::padd(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xFC);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xFD);
-  } else {
-    emitUint8(0xFE);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::padd(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xFC);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xFD);
-  } else {
-    emitUint8(0xFE);
-  }
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::pand(Type /* Ty */, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xDB);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::pand(Type /* Ty */, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xDB);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::pandn(Type /* Ty */, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xDF);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::pandn(Type /* Ty */, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xDF);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::pmull(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xD5);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0x38);
-    emitUint8(0x40);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::pmull(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xD5);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0x38);
-    emitUint8(0x40);
-  }
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::pmuludq(Type /* Ty */, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xF4);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::pmuludq(Type /* Ty */, XmmRegister dst,
-                             const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xF4);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::por(Type /* Ty */, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xEB);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::por(Type /* Ty */, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xEB);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::psub(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xF8);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xF9);
-  } else {
-    emitUint8(0xFA);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::psub(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xF8);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xF9);
-  } else {
-    emitUint8(0xFA);
-  }
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::pxor(Type /* Ty */, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xEF);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::pxor(Type /* Ty */, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xEF);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::psll(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xF1);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0xF2);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::psll(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xF1);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0xF2);
-  }
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::psll(Type Ty, XmmRegister dst, const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_int8());
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0x71);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0x72);
-  }
-  emitRegisterOperand(6, dst);
-  emitUint8(imm.value() & 0xFF);
-}
-
-void AssemblerX8632::psra(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xE1);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0xE2);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::psra(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xE1);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0xE2);
-  }
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::psra(Type Ty, XmmRegister dst, const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_int8());
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0x71);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0x72);
-  }
-  emitRegisterOperand(4, dst);
-  emitUint8(imm.value() & 0xFF);
-}
-
-void AssemblerX8632::psrl(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xD1);
-  } else if (Ty == IceType_f64) {
-    emitUint8(0xD3);
-  } else {
-    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
-    emitUint8(0xD2);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::psrl(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xD1);
-  } else if (Ty == IceType_f64) {
-    emitUint8(0xD3);
-  } else {
-    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
-    emitUint8(0xD2);
-  }
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::psrl(Type Ty, XmmRegister dst, const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_int8());
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0x71);
-  } else if (Ty == IceType_f64) {
-    emitUint8(0x73);
-  } else {
-    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
-    emitUint8(0x72);
-  }
-  emitRegisterOperand(2, dst);
-  emitUint8(imm.value() & 0xFF);
-}
-
-// {add,sub,mul,div}ps are given a Ty parameter for consistency with
-// {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows
-// addpd, etc., we can use the Ty parameter to decide on adding
-// a 0x66 prefix.
-void AssemblerX8632::addps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x58);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::addps(Type /* Ty */, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x58);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::subps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x5C);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::subps(Type /* Ty */, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x5C);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::divps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x5E);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::divps(Type /* Ty */, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x5E);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::mulps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x59);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::mulps(Type /* Ty */, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x59);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::minps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x5D);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::maxps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x5F);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::andps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x54);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::andps(XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x54);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::orps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x56);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::blendvps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x14);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::blendvps(Type /* Ty */, XmmRegister dst,
-                              const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x14);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::pblendvb(Type /* Ty */, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x10);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::pblendvb(Type /* Ty */, XmmRegister dst,
-                              const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x10);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::cmpps(XmmRegister dst, XmmRegister src,
-                           CondX86::CmppsCond CmpCondition) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0xC2);
-  emitXmmRegisterOperand(dst, src);
-  emitUint8(CmpCondition);
-}
-
-void AssemblerX8632::cmpps(XmmRegister dst, const Address &src,
-                           CondX86::CmppsCond CmpCondition) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0xC2);
-  emitOperand(dst, src);
-  emitUint8(CmpCondition);
-}
-
-void AssemblerX8632::sqrtps(XmmRegister dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x51);
-  emitXmmRegisterOperand(dst, dst);
-}
-
-void AssemblerX8632::rsqrtps(XmmRegister dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x52);
-  emitXmmRegisterOperand(dst, dst);
-}
-
-void AssemblerX8632::reciprocalps(XmmRegister dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x53);
-  emitXmmRegisterOperand(dst, dst);
-}
-
-void AssemblerX8632::movhlps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x12);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::movlhps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x16);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::unpcklps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x14);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::unpckhps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x15);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::unpcklpd(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x14);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::unpckhpd(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x15);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::set1ps(XmmRegister dst, GPRRegister tmp1,
-                            const Immediate &imm) {
-  // Load 32-bit immediate value into tmp1.
-  mov(IceType_i32, tmp1, imm);
-  // Move value from tmp1 into dst.
-  movd(dst, tmp1);
-  // Broadcast low lane into other three lanes.
-  shufps(dst, dst, Immediate(0x0));
-}
-
-void AssemblerX8632::shufps(XmmRegister dst, XmmRegister src,
-                            const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0xC6);
-  emitXmmRegisterOperand(dst, src);
-  assert(imm.is_uint8());
-  emitUint8(imm.value());
-}
-
-void AssemblerX8632::pshufd(Type /* Ty */, XmmRegister dst, XmmRegister src,
-                            const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x70);
-  emitXmmRegisterOperand(dst, src);
-  assert(imm.is_uint8());
-  emitUint8(imm.value());
-}
-
-void AssemblerX8632::pshufd(Type /* Ty */, XmmRegister dst, const Address &src,
-                            const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x70);
-  emitOperand(dst, src);
-  assert(imm.is_uint8());
-  emitUint8(imm.value());
-}
-
-void AssemblerX8632::shufps(Type /* Ty */, XmmRegister dst, XmmRegister src,
-                            const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0xC6);
-  emitXmmRegisterOperand(dst, src);
-  assert(imm.is_uint8());
-  emitUint8(imm.value());
-}
-
-void AssemblerX8632::shufps(Type /* Ty */, XmmRegister dst, const Address &src,
-                            const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0xC6);
-  emitOperand(dst, src);
-  assert(imm.is_uint8());
-  emitUint8(imm.value());
-}
-
-void AssemblerX8632::minpd(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x5D);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::maxpd(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x5F);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::sqrtpd(XmmRegister dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x51);
-  emitXmmRegisterOperand(dst, dst);
-}
-
-void AssemblerX8632::shufpd(XmmRegister dst, XmmRegister src,
-                            const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0xC6);
-  emitXmmRegisterOperand(dst, src);
-  assert(imm.is_uint8());
-  emitUint8(imm.value());
-}
-
-void AssemblerX8632::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
-                              XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x5B);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
-                              const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x5B);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::cvttps2dq(Type /* Ignore */, XmmRegister dst,
-                               XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF3);
-  emitUint8(0x0F);
-  emitUint8(0x5B);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::cvttps2dq(Type /* Ignore */, XmmRegister dst,
-                               const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF3);
-  emitUint8(0x0F);
-  emitUint8(0x5B);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::cvtsi2ss(Type DestTy, XmmRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x2A);
-  emitRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::cvtsi2ss(Type DestTy, XmmRegister dst,
-                              const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x2A);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::cvtfloat2float(Type SrcTy, XmmRegister dst,
-                                    XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // ss2sd or sd2ss
-  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x5A);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::cvtfloat2float(Type SrcTy, XmmRegister dst,
-                                    const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x5A);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::cvttss2si(Type SrcTy, GPRRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x2C);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::cvttss2si(Type SrcTy, GPRRegister dst,
-                               const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x2C);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::ucomiss(Type Ty, XmmRegister a, XmmRegister b) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_f64)
-    emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x2E);
-  emitXmmRegisterOperand(a, b);
-}
-
-void AssemblerX8632::ucomiss(Type Ty, XmmRegister a, const Address &b) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_f64)
-    emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x2E);
-  emitOperand(a, b);
-}
-
-void AssemblerX8632::movmskpd(GPRRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x50);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::movmskps(GPRRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x50);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::sqrtss(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x51);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::sqrtss(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitUint8(0x0F);
-  emitUint8(0x51);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::xorpd(XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x57);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::xorpd(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x57);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::orpd(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x56);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::xorps(XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x57);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::xorps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x57);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::andpd(XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x54);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::andpd(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x54);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::insertps(Type Ty, XmmRegister dst, XmmRegister src,
-                              const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_uint8());
-  assert(isVectorFloatingType(Ty));
-  (void)Ty;
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x3A);
-  emitUint8(0x21);
-  emitXmmRegisterOperand(dst, src);
-  emitUint8(imm.value());
-}
-
-void AssemblerX8632::insertps(Type Ty, XmmRegister dst, const Address &src,
-                              const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_uint8());
-  assert(isVectorFloatingType(Ty));
-  (void)Ty;
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x3A);
-  emitUint8(0x21);
-  emitOperand(dst, src);
-  emitUint8(imm.value());
-}
-
-void AssemblerX8632::pinsr(Type Ty, XmmRegister dst, GPRRegister src,
-                           const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_uint8());
-  if (Ty == IceType_i16) {
-    emitUint8(0x66);
-    emitUint8(0x0F);
-    emitUint8(0xC4);
-    emitXmmRegisterOperand(dst, XmmRegister(src));
-    emitUint8(imm.value());
-  } else {
-    emitUint8(0x66);
-    emitUint8(0x0F);
-    emitUint8(0x3A);
-    emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
-    emitXmmRegisterOperand(dst, XmmRegister(src));
-    emitUint8(imm.value());
-  }
-}
-
-void AssemblerX8632::pinsr(Type Ty, XmmRegister dst, const Address &src,
-                           const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_uint8());
-  if (Ty == IceType_i16) {
-    emitUint8(0x66);
-    emitUint8(0x0F);
-    emitUint8(0xC4);
-    emitOperand(dst, src);
-    emitUint8(imm.value());
-  } else {
-    emitUint8(0x66);
-    emitUint8(0x0F);
-    emitUint8(0x3A);
-    emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
-    emitOperand(dst, src);
-    emitUint8(imm.value());
-  }
-}
-
-void AssemblerX8632::pextr(Type Ty, GPRRegister dst, XmmRegister src,
-                           const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_uint8());
-  if (Ty == IceType_i16) {
-    emitUint8(0x66);
-    emitUint8(0x0F);
-    emitUint8(0xC5);
-    emitXmmRegisterOperand(XmmRegister(dst), src);
-    emitUint8(imm.value());
-  } else {
-    emitUint8(0x66);
-    emitUint8(0x0F);
-    emitUint8(0x3A);
-    emitUint8(isByteSizedType(Ty) ? 0x14 : 0x16);
-    // SSE 4.1 versions are "MRI" because dst can be mem, while
-    // pextrw (SSE2) is RMI because dst must be reg.
-    emitXmmRegisterOperand(src, XmmRegister(dst));
-    emitUint8(imm.value());
-  }
-}
-
-void AssemblerX8632::pmovsxdq(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x25);
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::pcmpeq(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0x74);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0x75);
-  } else {
-    emitUint8(0x76);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::pcmpeq(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0x74);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0x75);
-  } else {
-    emitUint8(0x76);
-  }
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::pcmpgt(Type Ty, XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0x64);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0x65);
-  } else {
-    emitUint8(0x66);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::pcmpgt(Type Ty, XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0x64);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0x65);
-  } else {
-    emitUint8(0x66);
-  }
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::roundsd(XmmRegister dst, XmmRegister src,
-                             RoundingMode mode) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitUint8(0x0F);
-  emitUint8(0x3A);
-  emitUint8(0x0B);
-  emitXmmRegisterOperand(dst, src);
-  // Mask precision exeption.
-  emitUint8(static_cast<uint8_t>(mode) | 0x8);
-}
-
-void AssemblerX8632::fnstcw(const Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xD9);
-  emitOperand(7, dst);
-}
-
-void AssemblerX8632::fldcw(const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xD9);
-  emitOperand(5, src);
-}
-
-void AssemblerX8632::fistpl(const Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDF);
-  emitOperand(7, dst);
-}
-
-void AssemblerX8632::fistps(const Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDB);
-  emitOperand(3, dst);
-}
-
-void AssemblerX8632::fildl(const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDF);
-  emitOperand(5, src);
-}
-
-void AssemblerX8632::filds(const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDB);
-  emitOperand(0, src);
-}
-
-void AssemblerX8632::fincstp() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xD9);
-  emitUint8(0xF7);
-}
-
-template <uint32_t Tag>
-void AssemblerX8632::arith_int(Type Ty, GPRRegister reg, const Immediate &imm) {
-  static_assert(Tag < 8, "Tag must be between 0..7");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (isByteSizedType(Ty)) {
-    emitComplexI8(Tag, Operand(reg), imm);
-    return;
-  }
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitComplex(Ty, Tag, Operand(reg), imm);
-}
-
-template <uint32_t Tag>
-void AssemblerX8632::arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1) {
-  static_assert(Tag < 8, "Tag must be between 0..7");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedType(Ty))
-    emitUint8(Tag * 8 + 2);
-  else
-    emitUint8(Tag * 8 + 3);
-  emitRegisterOperand(reg0, reg1);
-}
-
-template <uint32_t Tag>
-void AssemblerX8632::arith_int(Type Ty, GPRRegister reg,
-                               const Address &address) {
-  static_assert(Tag < 8, "Tag must be between 0..7");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedType(Ty))
-    emitUint8(Tag * 8 + 2);
-  else
-    emitUint8(Tag * 8 + 3);
-  emitOperand(reg, address);
-}
-
-template <uint32_t Tag>
-void AssemblerX8632::arith_int(Type Ty, const Address &address,
-                               GPRRegister reg) {
-  static_assert(Tag < 8, "Tag must be between 0..7");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedType(Ty))
-    emitUint8(Tag * 8 + 0);
-  else
-    emitUint8(Tag * 8 + 1);
-  emitOperand(reg, address);
-}
-
-template <uint32_t Tag>
-void AssemblerX8632::arith_int(Type Ty, const Address &address,
-                               const Immediate &imm) {
-  static_assert(Tag < 8, "Tag must be between 0..7");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (isByteSizedType(Ty)) {
-    emitComplexI8(Tag, address, imm);
-    return;
-  }
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitComplex(Ty, Tag, address, imm);
-}
-
-void AssemblerX8632::cmp(Type Ty, GPRRegister reg, const Immediate &imm) {
-  arith_int<7>(Ty, reg, imm);
-}
-
-void AssemblerX8632::cmp(Type Ty, GPRRegister reg0, GPRRegister reg1) {
-  arith_int<7>(Ty, reg0, reg1);
-}
-
-void AssemblerX8632::cmp(Type Ty, GPRRegister reg, const Address &address) {
-  arith_int<7>(Ty, reg, address);
-}
-
-void AssemblerX8632::cmp(Type Ty, const Address &address, GPRRegister reg) {
-  arith_int<7>(Ty, address, reg);
-}
-
-void AssemblerX8632::cmp(Type Ty, const Address &address,
-                         const Immediate &imm) {
-  arith_int<7>(Ty, address, imm);
-}
-
-void AssemblerX8632::test(Type Ty, GPRRegister reg1, GPRRegister reg2) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedType(Ty))
-    emitUint8(0x84);
-  else
-    emitUint8(0x85);
-  emitRegisterOperand(reg1, reg2);
-}
-
-void AssemblerX8632::test(Type Ty, const Address &addr, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedType(Ty))
-    emitUint8(0x84);
-  else
-    emitUint8(0x85);
-  emitOperand(reg, addr);
-}
-
-void AssemblerX8632::test(Type Ty, GPRRegister reg,
-                          const Immediate &immediate) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // For registers that have a byte variant (EAX, EBX, ECX, and EDX)
-  // we only test the byte register to keep the encoding short.
-  // This is legal even if the register had high bits set since
-  // this only sets flags registers based on the "AND" of the two operands,
-  // and the immediate had zeros at those high bits.
-  if (immediate.is_uint8() && reg < 4) {
-    // Use zero-extended 8-bit immediate.
-    if (reg == RegX8632::Encoded_Reg_eax) {
-      emitUint8(0xA8);
-    } else {
-      emitUint8(0xF6);
-      emitUint8(0xC0 + reg);
-    }
-    emitUint8(immediate.value() & 0xFF);
-  } else if (reg == RegX8632::Encoded_Reg_eax) {
-    // Use short form if the destination is EAX.
-    if (Ty == IceType_i16)
-      emitOperandSizeOverride();
-    emitUint8(0xA9);
-    emitImmediate(Ty, immediate);
-  } else {
-    if (Ty == IceType_i16)
-      emitOperandSizeOverride();
-    emitUint8(0xF7);
-    emitRegisterOperand(0, reg);
-    emitImmediate(Ty, immediate);
-  }
-}
-
-void AssemblerX8632::test(Type Ty, const Address &addr,
-                          const Immediate &immediate) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // If the immediate is short, we only test the byte addr to keep the
-  // encoding short.
-  if (immediate.is_uint8()) {
-    // Use zero-extended 8-bit immediate.
-    emitUint8(0xF6);
-    emitOperand(0, addr);
-    emitUint8(immediate.value() & 0xFF);
-  } else {
-    if (Ty == IceType_i16)
-      emitOperandSizeOverride();
-    emitUint8(0xF7);
-    emitOperand(0, addr);
-    emitImmediate(Ty, immediate);
-  }
-}
-
-void AssemblerX8632::And(Type Ty, GPRRegister dst, GPRRegister src) {
-  arith_int<4>(Ty, dst, src);
-}
-
-void AssemblerX8632::And(Type Ty, GPRRegister dst, const Address &address) {
-  arith_int<4>(Ty, dst, address);
-}
-
-void AssemblerX8632::And(Type Ty, GPRRegister dst, const Immediate &imm) {
-  arith_int<4>(Ty, dst, imm);
-}
-
-void AssemblerX8632::And(Type Ty, const Address &address, GPRRegister reg) {
-  arith_int<4>(Ty, address, reg);
-}
-
-void AssemblerX8632::And(Type Ty, const Address &address,
-                         const Immediate &imm) {
-  arith_int<4>(Ty, address, imm);
-}
-
-void AssemblerX8632::Or(Type Ty, GPRRegister dst, GPRRegister src) {
-  arith_int<1>(Ty, dst, src);
-}
-
-void AssemblerX8632::Or(Type Ty, GPRRegister dst, const Address &address) {
-  arith_int<1>(Ty, dst, address);
-}
-
-void AssemblerX8632::Or(Type Ty, GPRRegister dst, const Immediate &imm) {
-  arith_int<1>(Ty, dst, imm);
-}
-
-void AssemblerX8632::Or(Type Ty, const Address &address, GPRRegister reg) {
-  arith_int<1>(Ty, address, reg);
-}
-
-void AssemblerX8632::Or(Type Ty, const Address &address, const Immediate &imm) {
-  arith_int<1>(Ty, address, imm);
-}
-
-void AssemblerX8632::Xor(Type Ty, GPRRegister dst, GPRRegister src) {
-  arith_int<6>(Ty, dst, src);
-}
-
-void AssemblerX8632::Xor(Type Ty, GPRRegister dst, const Address &address) {
-  arith_int<6>(Ty, dst, address);
-}
-
-void AssemblerX8632::Xor(Type Ty, GPRRegister dst, const Immediate &imm) {
-  arith_int<6>(Ty, dst, imm);
-}
-
-void AssemblerX8632::Xor(Type Ty, const Address &address, GPRRegister reg) {
-  arith_int<6>(Ty, address, reg);
-}
-
-void AssemblerX8632::Xor(Type Ty, const Address &address,
-                         const Immediate &imm) {
-  arith_int<6>(Ty, address, imm);
-}
-
-void AssemblerX8632::add(Type Ty, GPRRegister dst, GPRRegister src) {
-  arith_int<0>(Ty, dst, src);
-}
-
-void AssemblerX8632::add(Type Ty, GPRRegister reg, const Address &address) {
-  arith_int<0>(Ty, reg, address);
-}
-
-void AssemblerX8632::add(Type Ty, GPRRegister reg, const Immediate &imm) {
-  arith_int<0>(Ty, reg, imm);
-}
-
-void AssemblerX8632::add(Type Ty, const Address &address, GPRRegister reg) {
-  arith_int<0>(Ty, address, reg);
-}
-
-void AssemblerX8632::add(Type Ty, const Address &address,
-                         const Immediate &imm) {
-  arith_int<0>(Ty, address, imm);
-}
-
-void AssemblerX8632::adc(Type Ty, GPRRegister dst, GPRRegister src) {
-  arith_int<2>(Ty, dst, src);
-}
-
-void AssemblerX8632::adc(Type Ty, GPRRegister dst, const Address &address) {
-  arith_int<2>(Ty, dst, address);
-}
-
-void AssemblerX8632::adc(Type Ty, GPRRegister reg, const Immediate &imm) {
-  arith_int<2>(Ty, reg, imm);
-}
-
-void AssemblerX8632::adc(Type Ty, const Address &address, GPRRegister reg) {
-  arith_int<2>(Ty, address, reg);
-}
-
-void AssemblerX8632::adc(Type Ty, const Address &address,
-                         const Immediate &imm) {
-  arith_int<2>(Ty, address, imm);
-}
-
-void AssemblerX8632::sub(Type Ty, GPRRegister dst, GPRRegister src) {
-  arith_int<5>(Ty, dst, src);
-}
-
-void AssemblerX8632::sub(Type Ty, GPRRegister reg, const Address &address) {
-  arith_int<5>(Ty, reg, address);
-}
-
-void AssemblerX8632::sub(Type Ty, GPRRegister reg, const Immediate &imm) {
-  arith_int<5>(Ty, reg, imm);
-}
-
-void AssemblerX8632::sub(Type Ty, const Address &address, GPRRegister reg) {
-  arith_int<5>(Ty, address, reg);
-}
-
-void AssemblerX8632::sub(Type Ty, const Address &address,
-                         const Immediate &imm) {
-  arith_int<5>(Ty, address, imm);
-}
-
-void AssemblerX8632::sbb(Type Ty, GPRRegister dst, GPRRegister src) {
-  arith_int<3>(Ty, dst, src);
-}
-
-void AssemblerX8632::sbb(Type Ty, GPRRegister dst, const Address &address) {
-  arith_int<3>(Ty, dst, address);
-}
-
-void AssemblerX8632::sbb(Type Ty, GPRRegister reg, const Immediate &imm) {
-  arith_int<3>(Ty, reg, imm);
-}
-
-void AssemblerX8632::sbb(Type Ty, const Address &address, GPRRegister reg) {
-  arith_int<3>(Ty, address, reg);
-}
-
-void AssemblerX8632::sbb(Type Ty, const Address &address,
-                         const Immediate &imm) {
-  arith_int<3>(Ty, address, imm);
-}
-
-void AssemblerX8632::cbw() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitOperandSizeOverride();
-  emitUint8(0x98);
-}
-
-void AssemblerX8632::cwd() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitOperandSizeOverride();
-  emitUint8(0x99);
-}
-
-void AssemblerX8632::cdq() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x99);
-}
-
-void AssemblerX8632::div(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitRegisterOperand(6, reg);
-}
-
-void AssemblerX8632::div(Type Ty, const Address &addr) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitOperand(6, addr);
-}
-
-void AssemblerX8632::idiv(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitRegisterOperand(7, reg);
-}
-
-void AssemblerX8632::idiv(Type Ty, const Address &addr) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitOperand(7, addr);
-}
-
-void AssemblerX8632::imul(Type Ty, GPRRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xAF);
-  emitRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::imul(Type Ty, GPRRegister reg, const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xAF);
-  emitOperand(reg, address);
-}
-
-void AssemblerX8632::imul(Type Ty, GPRRegister reg, const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (imm.is_int8()) {
-    emitUint8(0x6B);
-    emitRegisterOperand(reg, reg);
-    emitUint8(imm.value() & 0xFF);
-  } else {
-    emitUint8(0x69);
-    emitRegisterOperand(reg, reg);
-    emitImmediate(Ty, imm);
-  }
-}
-
-void AssemblerX8632::imul(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitRegisterOperand(5, reg);
-}
-
-void AssemblerX8632::imul(Type Ty, const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitOperand(5, address);
-}
-
-void AssemblerX8632::mul(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitRegisterOperand(4, reg);
-}
-
-void AssemblerX8632::mul(Type Ty, const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitOperand(4, address);
-}
-
-void AssemblerX8632::incl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x40 + reg);
-}
-
-void AssemblerX8632::incl(const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xFF);
-  emitOperand(0, address);
-}
-
-void AssemblerX8632::decl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x48 + reg);
-}
-
-void AssemblerX8632::decl(const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xFF);
-  emitOperand(1, address);
-}
-
-void AssemblerX8632::rol(Type Ty, GPRRegister reg, const Immediate &imm) {
-  emitGenericShift(0, Ty, reg, imm);
-}
-
-void AssemblerX8632::rol(Type Ty, GPRRegister operand, GPRRegister shifter) {
-  emitGenericShift(0, Ty, Operand(operand), shifter);
-}
-
-void AssemblerX8632::rol(Type Ty, const Address &operand, GPRRegister shifter) {
-  emitGenericShift(0, Ty, operand, shifter);
-}
-
-void AssemblerX8632::shl(Type Ty, GPRRegister reg, const Immediate &imm) {
-  emitGenericShift(4, Ty, reg, imm);
-}
-
-void AssemblerX8632::shl(Type Ty, GPRRegister operand, GPRRegister shifter) {
-  emitGenericShift(4, Ty, Operand(operand), shifter);
-}
-
-void AssemblerX8632::shl(Type Ty, const Address &operand, GPRRegister shifter) {
-  emitGenericShift(4, Ty, operand, shifter);
-}
-
-void AssemblerX8632::shr(Type Ty, GPRRegister reg, const Immediate &imm) {
-  emitGenericShift(5, Ty, reg, imm);
-}
-
-void AssemblerX8632::shr(Type Ty, GPRRegister operand, GPRRegister shifter) {
-  emitGenericShift(5, Ty, Operand(operand), shifter);
-}
-
-void AssemblerX8632::shr(Type Ty, const Address &operand, GPRRegister shifter) {
-  emitGenericShift(5, Ty, operand, shifter);
-}
-
-void AssemblerX8632::sar(Type Ty, GPRRegister reg, const Immediate &imm) {
-  emitGenericShift(7, Ty, reg, imm);
-}
-
-void AssemblerX8632::sar(Type Ty, GPRRegister operand, GPRRegister shifter) {
-  emitGenericShift(7, Ty, Operand(operand), shifter);
-}
-
-void AssemblerX8632::sar(Type Ty, const Address &address, GPRRegister shifter) {
-  emitGenericShift(7, Ty, address, shifter);
-}
-
-void AssemblerX8632::shld(Type Ty, GPRRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xA5);
-  emitRegisterOperand(src, dst);
-}
-
-void AssemblerX8632::shld(Type Ty, GPRRegister dst, GPRRegister src,
-                          const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  assert(imm.is_int8());
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xA4);
-  emitRegisterOperand(src, dst);
-  emitUint8(imm.value() & 0xFF);
-}
-
-void AssemblerX8632::shld(Type Ty, const Address &operand, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xA5);
-  emitOperand(src, operand);
-}
-
-void AssemblerX8632::shrd(Type Ty, GPRRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xAD);
-  emitRegisterOperand(src, dst);
-}
-
-void AssemblerX8632::shrd(Type Ty, GPRRegister dst, GPRRegister src,
-                          const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  assert(imm.is_int8());
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xAC);
-  emitRegisterOperand(src, dst);
-  emitUint8(imm.value() & 0xFF);
-}
-
-void AssemblerX8632::shrd(Type Ty, const Address &dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xAD);
-  emitOperand(src, dst);
-}
-
-void AssemblerX8632::neg(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitRegisterOperand(3, reg);
-}
-
-void AssemblerX8632::neg(Type Ty, const Address &addr) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitOperand(3, addr);
-}
-
-void AssemblerX8632::notl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF7);
-  emitUint8(0xD0 | reg);
-}
-
-void AssemblerX8632::bswap(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i32);
-  (void)Ty;
-  emitUint8(0x0F);
-  emitUint8(0xC8 | reg);
-}
-
-void AssemblerX8632::bsf(Type Ty, GPRRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xBC);
-  emitRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::bsf(Type Ty, GPRRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xBC);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::bsr(Type Ty, GPRRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xBD);
-  emitRegisterOperand(dst, src);
-}
-
-void AssemblerX8632::bsr(Type Ty, GPRRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(0x0F);
-  emitUint8(0xBD);
-  emitOperand(dst, src);
-}
-
-void AssemblerX8632::bt(GPRRegister base, GPRRegister offset) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0xA3);
-  emitRegisterOperand(offset, base);
-}
-
-void AssemblerX8632::ret() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xC3);
-}
-
-void AssemblerX8632::ret(const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xC2);
-  assert(imm.is_uint16());
-  emitUint8(imm.value() & 0xFF);
-  emitUint8((imm.value() >> 8) & 0xFF);
-}
-
-void AssemblerX8632::nop(int size) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // There are nops up to size 15, but for now just provide up to size 8.
-  assert(0 < size && size <= MAX_NOP_SIZE);
-  switch (size) {
-  case 1:
-    emitUint8(0x90);
-    break;
-  case 2:
-    emitUint8(0x66);
-    emitUint8(0x90);
-    break;
-  case 3:
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x00);
-    break;
-  case 4:
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x40);
-    emitUint8(0x00);
-    break;
-  case 5:
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x44);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    break;
-  case 6:
-    emitUint8(0x66);
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x44);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    break;
-  case 7:
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x80);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    break;
-  case 8:
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x84);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    break;
-  default:
-    llvm_unreachable("Unimplemented");
-  }
-}
-
-void AssemblerX8632::int3() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xCC);
-}
-
-void AssemblerX8632::hlt() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF4);
-}
-
-void AssemblerX8632::ud2() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x0B);
-}
-
-void AssemblerX8632::j(CondX86::BrCond condition, Label *label, bool near) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (label->IsBound()) {
-    static const int kShortSize = 2;
-    static const int kLongSize = 6;
-    intptr_t offset = label->Position() - Buffer.size();
-    assert(offset <= 0);
-    if (Utils::IsInt(8, offset - kShortSize)) {
-      // TODO(stichnot): Here and in jmp(), we may need to be more
-      // conservative about the backward branch distance if the branch
-      // instruction is within a bundle_lock sequence, because the
-      // distance may increase when padding is added.  This isn't an
-      // issue for branches outside a bundle_lock, because if padding
-      // is added, the retry may change it to a long backward branch
-      // without affecting any of the bookkeeping.
-      emitUint8(0x70 + condition);
-      emitUint8((offset - kShortSize) & 0xFF);
-    } else {
-      emitUint8(0x0F);
-      emitUint8(0x80 + condition);
-      emitInt32(offset - kLongSize);
-    }
-  } else if (near) {
-    emitUint8(0x70 + condition);
-    emitNearLabelLink(label);
-  } else {
-    emitUint8(0x0F);
-    emitUint8(0x80 + condition);
-    emitLabelLink(label);
-  }
-}
-
-void AssemblerX8632::j(CondX86::BrCond condition,
-                       const ConstantRelocatable *label) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x80 + condition);
-  emitFixup(this->createFixup(llvm::ELF::R_386_PC32, label));
-  emitInt32(-4);
-}
-
-void AssemblerX8632::jmp(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xFF);
-  emitRegisterOperand(4, reg);
-}
-
-void AssemblerX8632::jmp(Label *label, bool near) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (label->IsBound()) {
-    static const int kShortSize = 2;
-    static const int kLongSize = 5;
-    intptr_t offset = label->Position() - Buffer.size();
-    assert(offset <= 0);
-    if (Utils::IsInt(8, offset - kShortSize)) {
-      emitUint8(0xEB);
-      emitUint8((offset - kShortSize) & 0xFF);
-    } else {
-      emitUint8(0xE9);
-      emitInt32(offset - kLongSize);
-    }
-  } else if (near) {
-    emitUint8(0xEB);
-    emitNearLabelLink(label);
-  } else {
-    emitUint8(0xE9);
-    emitLabelLink(label);
-  }
-}
-
-void AssemblerX8632::jmp(const ConstantRelocatable *label) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xE9);
-  emitFixup(this->createFixup(llvm::ELF::R_386_PC32, label));
-  emitInt32(-4);
-}
-
-void AssemblerX8632::mfence() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0xAE);
-  emitUint8(0xF0);
-}
-
-void AssemblerX8632::lock() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF0);
-}
-
-void AssemblerX8632::cmpxchg(Type Ty, const Address &address, GPRRegister reg,
-                             bool Locked) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (Locked)
-    emitUint8(0xF0);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xB0);
-  else
-    emitUint8(0xB1);
-  emitOperand(reg, address);
-}
-
-void AssemblerX8632::cmpxchg8b(const Address &address, bool Locked) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Locked)
-    emitUint8(0xF0);
-  emitUint8(0x0F);
-  emitUint8(0xC7);
-  emitOperand(1, address);
-}
-
-void AssemblerX8632::xadd(Type Ty, const Address &addr, GPRRegister reg,
-                          bool Locked) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (Locked)
-    emitUint8(0xF0);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xC0);
-  else
-    emitUint8(0xC1);
-  emitOperand(reg, addr);
-}
-
-void AssemblerX8632::xchg(Type Ty, const Address &addr, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (isByteSizedArithType(Ty))
-    emitUint8(0x86);
-  else
-    emitUint8(0x87);
-  emitOperand(reg, addr);
-}
-
-void AssemblerX8632::emitSegmentOverride(uint8_t prefix) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(prefix);
-}
-
-void AssemblerX8632::align(intptr_t alignment, intptr_t offset) {
-  assert(llvm::isPowerOf2_32(alignment));
-  intptr_t pos = offset + Buffer.getPosition();
-  intptr_t mod = pos & (alignment - 1);
-  if (mod == 0) {
-    return;
-  }
-  intptr_t bytes_needed = alignment - mod;
-  while (bytes_needed > MAX_NOP_SIZE) {
-    nop(MAX_NOP_SIZE);
-    bytes_needed -= MAX_NOP_SIZE;
-  }
-  if (bytes_needed) {
-    nop(bytes_needed);
-  }
-  assert(((offset + Buffer.getPosition()) & (alignment - 1)) == 0);
-}
-
-void AssemblerX8632::bind(Label *label) {
-  intptr_t bound = Buffer.size();
-  assert(!label->IsBound()); // Labels can only be bound once.
-  while (label->IsLinked()) {
-    intptr_t position = label->LinkPosition();
-    intptr_t next = Buffer.load<int32_t>(position);
-    Buffer.store<int32_t>(position, bound - (position + 4));
-    label->position_ = next;
-  }
-  while (label->HasNear()) {
-    intptr_t position = label->NearPosition();
-    intptr_t offset = bound - (position + 1);
-    assert(Utils::IsInt(8, offset));
-    Buffer.store<int8_t>(position, offset);
-  }
-  label->BindTo(bound);
-}
-
-void AssemblerX8632::emitOperand(int rm, const Operand &operand) {
-  assert(rm >= 0 && rm < 8);
-  const intptr_t length = operand.length_;
-  assert(length > 0);
-  // Emit the ModRM byte updated with the given RM value.
-  assert((operand.encoding_[0] & 0x38) == 0);
-  emitUint8(operand.encoding_[0] + (rm << 3));
-  if (operand.fixup()) {
-    emitFixup(operand.fixup());
-  }
-  // Emit the rest of the encoded operand.
-  for (intptr_t i = 1; i < length; i++) {
-    emitUint8(operand.encoding_[i]);
-  }
-}
-
-void AssemblerX8632::emitImmediate(Type Ty, const Immediate &imm) {
-  if (Ty == IceType_i16) {
-    assert(!imm.fixup());
-    emitInt16(imm.value());
-  } else {
-    if (imm.fixup()) {
-      emitFixup(imm.fixup());
-    }
-    emitInt32(imm.value());
-  }
-}
-
-void AssemblerX8632::emitComplexI8(int rm, const Operand &operand,
-                                   const Immediate &immediate) {
-  assert(rm >= 0 && rm < 8);
-  assert(immediate.is_int8());
-  if (operand.IsRegister(RegX8632::Encoded_Reg_eax)) {
-    // Use short form if the destination is al.
-    emitUint8(0x04 + (rm << 3));
-    emitUint8(immediate.value() & 0xFF);
-  } else {
-    // Use sign-extended 8-bit immediate.
-    emitUint8(0x80);
-    emitOperand(rm, operand);
-    emitUint8(immediate.value() & 0xFF);
-  }
-}
-
-void AssemblerX8632::emitComplex(Type Ty, int rm, const Operand &operand,
-                                 const Immediate &immediate) {
-  assert(rm >= 0 && rm < 8);
-  if (immediate.is_int8()) {
-    // Use sign-extended 8-bit immediate.
-    emitUint8(0x83);
-    emitOperand(rm, operand);
-    emitUint8(immediate.value() & 0xFF);
-  } else if (operand.IsRegister(RegX8632::Encoded_Reg_eax)) {
-    // Use short form if the destination is eax.
-    emitUint8(0x05 + (rm << 3));
-    emitImmediate(Ty, immediate);
-  } else {
-    emitUint8(0x81);
-    emitOperand(rm, operand);
-    emitImmediate(Ty, immediate);
-  }
-}
-
-void AssemblerX8632::emitLabel(Label *label, intptr_t instruction_size) {
-  if (label->IsBound()) {
-    intptr_t offset = label->Position() - Buffer.size();
-    assert(offset <= 0);
-    emitInt32(offset - instruction_size);
-  } else {
-    emitLabelLink(label);
-  }
-}
-
-void AssemblerX8632::emitLabelLink(Label *Label) {
-  assert(!Label->IsBound());
-  intptr_t Position = Buffer.size();
-  emitInt32(Label->position_);
-  if (!getPreliminary())
-    Label->LinkTo(Position);
-}
-
-void AssemblerX8632::emitNearLabelLink(Label *label) {
-  assert(!label->IsBound());
-  intptr_t position = Buffer.size();
-  emitUint8(0);
-  if (!getPreliminary())
-    label->NearLinkTo(position);
-}
-
-void AssemblerX8632::emitGenericShift(int rm, Type Ty, GPRRegister reg,
-                                      const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_int8());
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (imm.value() == 1) {
-    emitUint8(isByteSizedArithType(Ty) ? 0xD0 : 0xD1);
-    emitOperand(rm, Operand(reg));
-  } else {
-    emitUint8(isByteSizedArithType(Ty) ? 0xC0 : 0xC1);
-    emitOperand(rm, Operand(reg));
-    emitUint8(imm.value() & 0xFF);
-  }
-}
-
-void AssemblerX8632::emitGenericShift(int rm, Type Ty, const Operand &operand,
-                                      GPRRegister shifter) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(shifter == RegX8632::Encoded_Reg_ecx);
-  (void)shifter;
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitUint8(isByteSizedArithType(Ty) ? 0xD2 : 0xD3);
-  emitOperand(rm, operand);
-}
-
-} // end of namespace X8632
-} // end of namespace Ice
diff --git a/src/IceAssemblerX8632.h b/src/IceAssemblerX8632.h
index 8b13fc6..ad79489 100644
--- a/src/IceAssemblerX8632.h
+++ b/src/IceAssemblerX8632.h
@@ -1,4 +1,4 @@
-//===- subzero/src/assembler_ia32.h - Assembler for x86-32 ------*- C++ -*-===//
+//===- subzero/src/IceAssemblerX8632.h - Assembler for x86-32 ---*- C++ -*-===//
 //
 // Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
@@ -23,906 +23,34 @@
 #define SUBZERO_SRC_ICEASSEMBLERX8632_H
 
 #include "IceAssembler.h"
-#include "IceConditionCodesX8632.h"
+#include "IceAssemblerX86Base.h"
 #include "IceDefs.h"
 #include "IceOperand.h"
-#include "IceRegistersX8632.h"
+#include "IceTargetLoweringX8632Traits.h"
 #include "IceTypes.h"
 #include "IceUtils.h"
 
 namespace Ice {
 
-using RegX8632::GPRRegister;
-using RegX8632::XmmRegister;
-using RegX8632::ByteRegister;
-using RegX8632::X87STRegister;
+class TargetX8632;
 
 namespace X8632 {
 
-const int MAX_NOP_SIZE = 8;
-
-enum ScaleFactor { TIMES_1 = 0, TIMES_2 = 1, TIMES_4 = 2, TIMES_8 = 3 };
-
-class Immediate {
-  Immediate(const Immediate &) = delete;
-  Immediate &operator=(const Immediate &) = delete;
-
-public:
-  explicit Immediate(int32_t value) : value_(value) {}
-
-  Immediate(RelocOffsetT offset, AssemblerFixup *fixup)
-      : value_(offset), fixup_(fixup) {
-    // Use the Offset in the "value" for now. If we decide to process fixups,
-    // we'll need to patch that offset with the true value.
-  }
-
-  int32_t value() const { return value_; }
-  AssemblerFixup *fixup() const { return fixup_; }
-
-  bool is_int8() const {
-    // We currently only allow 32-bit fixups, and they usually have value = 0,
-    // so if fixup_ != nullptr, it shouldn't be classified as int8/16.
-    return fixup_ == nullptr && Utils::IsInt(8, value_);
-  }
-  bool is_uint8() const {
-    return fixup_ == nullptr && Utils::IsUint(8, value_);
-  }
-  bool is_uint16() const {
-    return fixup_ == nullptr && Utils::IsUint(16, value_);
-  }
-
-private:
-  const int32_t value_;
-  AssemblerFixup *fixup_ = nullptr;
-};
-
-class Operand {
-public:
-  Operand(const Operand &other) : length_(other.length_), fixup_(other.fixup_) {
-    memmove(&encoding_[0], &other.encoding_[0], other.length_);
-  }
-
-  Operand &operator=(const Operand &other) {
-    length_ = other.length_;
-    fixup_ = other.fixup_;
-    memmove(&encoding_[0], &other.encoding_[0], other.length_);
-    return *this;
-  }
-
-  uint8_t mod() const { return (encoding_at(0) >> 6) & 3; }
-
-  GPRRegister rm() const {
-    return static_cast<GPRRegister>(encoding_at(0) & 7);
-  }
-
-  ScaleFactor scale() const {
-    return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
-  }
-
-  GPRRegister index() const {
-    return static_cast<GPRRegister>((encoding_at(1) >> 3) & 7);
-  }
-
-  GPRRegister base() const {
-    return static_cast<GPRRegister>(encoding_at(1) & 7);
-  }
-
-  int8_t disp8() const {
-    assert(length_ >= 2);
-    return static_cast<int8_t>(encoding_[length_ - 1]);
-  }
-
-  int32_t disp32() const {
-    assert(length_ >= 5);
-    return bit_copy<int32_t>(encoding_[length_ - 4]);
-  }
-
-  AssemblerFixup *fixup() const { return fixup_; }
-
-protected:
-  Operand() : length_(0), fixup_(nullptr) {} // Needed by subclass Address.
-
-  void SetModRM(int mod, GPRRegister rm) {
-    assert((mod & ~3) == 0);
-    encoding_[0] = (mod << 6) | rm;
-    length_ = 1;
-  }
-
-  void SetSIB(ScaleFactor scale, GPRRegister index, GPRRegister base) {
-    assert(length_ == 1);
-    assert((scale & ~3) == 0);
-    encoding_[1] = (scale << 6) | (index << 3) | base;
-    length_ = 2;
-  }
-
-  void SetDisp8(int8_t disp) {
-    assert(length_ == 1 || length_ == 2);
-    encoding_[length_++] = static_cast<uint8_t>(disp);
-  }
-
-  void SetDisp32(int32_t disp) {
-    assert(length_ == 1 || length_ == 2);
-    intptr_t disp_size = sizeof(disp);
-    memmove(&encoding_[length_], &disp, disp_size);
-    length_ += disp_size;
-  }
-
-  void SetFixup(AssemblerFixup *fixup) { fixup_ = fixup; }
-
-private:
-  uint8_t length_;
-  uint8_t encoding_[6];
-  AssemblerFixup *fixup_;
-
-  explicit Operand(GPRRegister reg) : fixup_(nullptr) { SetModRM(3, reg); }
-
-  // Get the operand encoding byte at the given index.
-  uint8_t encoding_at(intptr_t index) const {
-    assert(index >= 0 && index < length_);
-    return encoding_[index];
-  }
-
-  // Returns whether or not this operand is really the given register in
-  // disguise. Used from the assembler to generate better encodings.
-  bool IsRegister(GPRRegister reg) const {
-    return ((encoding_[0] & 0xF8) == 0xC0) // Addressing mode is register only.
-           && ((encoding_[0] & 0x07) == reg); // Register codes match.
-  }
-
-  friend class AssemblerX8632;
-};
-
-class Address : public Operand {
-public:
-  Address(const Address &other) : Operand(other) {}
-
-  Address &operator=(const Address &other) {
-    Operand::operator=(other);
-    return *this;
-  }
-
-  Address(GPRRegister base, int32_t disp) {
-    if (disp == 0 && base != RegX8632::Encoded_Reg_ebp) {
-      SetModRM(0, base);
-      if (base == RegX8632::Encoded_Reg_esp)
-        SetSIB(TIMES_1, RegX8632::Encoded_Reg_esp, base);
-    } else if (Utils::IsInt(8, disp)) {
-      SetModRM(1, base);
-      if (base == RegX8632::Encoded_Reg_esp)
-        SetSIB(TIMES_1, RegX8632::Encoded_Reg_esp, base);
-      SetDisp8(disp);
-    } else {
-      SetModRM(2, base);
-      if (base == RegX8632::Encoded_Reg_esp)
-        SetSIB(TIMES_1, RegX8632::Encoded_Reg_esp, base);
-      SetDisp32(disp);
-    }
-  }
-
-  Address(GPRRegister index, ScaleFactor scale, int32_t disp) {
-    assert(index != RegX8632::Encoded_Reg_esp); // Illegal addressing mode.
-    SetModRM(0, RegX8632::Encoded_Reg_esp);
-    SetSIB(scale, index, RegX8632::Encoded_Reg_ebp);
-    SetDisp32(disp);
-  }
-
-  Address(GPRRegister base, GPRRegister index, ScaleFactor scale,
-          int32_t disp) {
-    assert(index != RegX8632::Encoded_Reg_esp); // Illegal addressing mode.
-    if (disp == 0 && base != RegX8632::Encoded_Reg_ebp) {
-      SetModRM(0, RegX8632::Encoded_Reg_esp);
-      SetSIB(scale, index, base);
-    } else if (Utils::IsInt(8, disp)) {
-      SetModRM(1, RegX8632::Encoded_Reg_esp);
-      SetSIB(scale, index, base);
-      SetDisp8(disp);
-    } else {
-      SetModRM(2, RegX8632::Encoded_Reg_esp);
-      SetSIB(scale, index, base);
-      SetDisp32(disp);
-    }
-  }
-
-  static Address Absolute(const uintptr_t addr) {
-    Address result;
-    result.SetModRM(0, RegX8632::Encoded_Reg_ebp);
-    result.SetDisp32(addr);
-    return result;
-  }
-
-  static Address Absolute(RelocOffsetT Offset, AssemblerFixup *fixup) {
-    Address result;
-    result.SetModRM(0, RegX8632::Encoded_Reg_ebp);
-    // Use the Offset in the displacement for now. If we decide to process
-    // fixups later, we'll need to patch up the emitted displacement.
-    result.SetDisp32(Offset);
-    result.SetFixup(fixup);
-    return result;
-  }
-
-  static Address ofConstPool(Assembler *Asm, const Constant *Imm);
-
-private:
-  Address() = default; // Needed by Address::Absolute.
-};
-
-class Label {
-  Label(const Label &) = delete;
-  Label &operator=(const Label &) = delete;
-
-public:
-  Label() {
-    if (BuildDefs::asserts()) {
-      for (int i = 0; i < kMaxUnresolvedBranches; i++) {
-        unresolved_near_positions_[i] = -1;
-      }
-    }
-  }
-
-  ~Label() = default;
-
-  void FinalCheck() const {
-    // Assert if label is being destroyed with unresolved branches pending.
-    assert(!IsLinked());
-    assert(!HasNear());
-  }
-
-  // TODO(jvoung): why are labels offset by this?
-  static const uint32_t kWordSize = sizeof(uint32_t);
-
-  // Returns the position for bound labels (branches that come after this
-  // are considered backward branches). Cannot be used for unused or linked
-  // labels.
-  intptr_t Position() const {
-    assert(IsBound());
-    return -position_ - kWordSize;
-  }
-
-  // Returns the position of an earlier branch instruction that was linked
-  // to this label (branches that use this are considered forward branches).
-  // The linked instructions form a linked list, of sorts, using the
-  // instruction's displacement field for the location of the next
-  // instruction that is also linked to this label.
-  intptr_t LinkPosition() const {
-    assert(IsLinked());
-    return position_ - kWordSize;
-  }
-
-  // Returns the position of an earlier branch instruction which
-  // assumes that this label is "near", and bumps iterator to the
-  // next near position.
-  intptr_t NearPosition() {
-    assert(HasNear());
-    return unresolved_near_positions_[--num_unresolved_];
-  }
-
-  bool IsBound() const { return position_ < 0; }
-  bool IsLinked() const { return position_ > 0; }
-  bool IsUnused() const { return (position_ == 0) && (num_unresolved_ == 0); }
-  bool HasNear() const { return num_unresolved_ != 0; }
-
-private:
-  void BindTo(intptr_t position) {
-    assert(!IsBound());
-    assert(!HasNear());
-    position_ = -position - kWordSize;
-    assert(IsBound());
-  }
-
-  void LinkTo(intptr_t position) {
-    assert(!IsBound());
-    position_ = position + kWordSize;
-    assert(IsLinked());
-  }
-
-  void NearLinkTo(intptr_t position) {
-    assert(!IsBound());
-    assert(num_unresolved_ < kMaxUnresolvedBranches);
-    unresolved_near_positions_[num_unresolved_++] = position;
-  }
-
-  static constexpr int kMaxUnresolvedBranches = 20;
-
-  intptr_t position_ = 0;
-  intptr_t num_unresolved_ = 0;
-  // TODO(stichnot,jvoung): Can this instead be
-  // llvm::SmallVector<intptr_t, kMaxUnresolvedBranches> ?
-  intptr_t unresolved_near_positions_[kMaxUnresolvedBranches];
-
-  friend class AssemblerX8632;
-};
-
-class AssemblerX8632 : public Assembler {
+class AssemblerX8632 : public X86Internal::AssemblerX86Base<TargetX8632> {
   AssemblerX8632(const AssemblerX8632 &) = delete;
   AssemblerX8632 &operator=(const AssemblerX8632 &) = delete;
 
 public:
   explicit AssemblerX8632(bool use_far_branches = false)
-      : Assembler(Asm_X8632) {
-    // This mode is only needed and implemented for MIPS and ARM.
-    assert(!use_far_branches);
-    (void)use_far_branches;
-  }
-  ~AssemblerX8632() override;
-
-  static const bool kNearJump = true;
-  static const bool kFarJump = false;
-
-  void alignFunction() override;
-
-  SizeT getBundleAlignLog2Bytes() const override { return 5; }
-
-  const char *getNonExecPadDirective() const override { return ".p2align"; }
-
-  llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const override {
-    static const uint8_t Padding[] = {0xF4};
-    return llvm::ArrayRef<uint8_t>(Padding, 1);
-  }
-
-  void padWithNop(intptr_t Padding) override {
-    while (Padding > MAX_NOP_SIZE) {
-      nop(MAX_NOP_SIZE);
-      Padding -= MAX_NOP_SIZE;
-    }
-    if (Padding)
-      nop(Padding);
-  }
-
-  Label *GetOrCreateCfgNodeLabel(SizeT NodeNumber);
-  void bindCfgNodeLabel(SizeT NodeNumber) override;
-  Label *GetOrCreateLocalLabel(SizeT Number);
-  void BindLocalLabel(SizeT Number);
-
-  bool fixupIsPCRel(FixupKind Kind) const override {
-    // Currently assuming this is the only PC-rel relocation type used.
-    return Kind == llvm::ELF::R_386_PC32;
-  }
+      : X86Internal::AssemblerX86Base<TargetX8632>(Asm_X8632,
+                                                   use_far_branches) {}
+  ~AssemblerX8632() override = default;
 
   static bool classof(const Assembler *Asm) {
     return Asm->getKind() == Asm_X8632;
   }
-
-  // Operations to emit GPR instructions (and dispatch on operand type).
-  typedef void (AssemblerX8632::*TypedEmitGPR)(Type, GPRRegister);
-  typedef void (AssemblerX8632::*TypedEmitAddr)(Type, const Address &);
-  struct GPREmitterOneOp {
-    TypedEmitGPR Reg;
-    TypedEmitAddr Addr;
-  };
-
-  typedef void (AssemblerX8632::*TypedEmitGPRGPR)(Type, GPRRegister,
-                                                  GPRRegister);
-  typedef void (AssemblerX8632::*TypedEmitGPRAddr)(Type, GPRRegister,
-                                                   const Address &);
-  typedef void (AssemblerX8632::*TypedEmitGPRImm)(Type, GPRRegister,
-                                                  const Immediate &);
-  struct GPREmitterRegOp {
-    TypedEmitGPRGPR GPRGPR;
-    TypedEmitGPRAddr GPRAddr;
-    TypedEmitGPRImm GPRImm;
-  };
-
-  struct GPREmitterShiftOp {
-    // Technically, Addr/GPR and Addr/Imm are also allowed, but */Addr are not.
-    // In practice, we always normalize the Dest to a Register first.
-    TypedEmitGPRGPR GPRGPR;
-    TypedEmitGPRImm GPRImm;
-  };
-
-  typedef void (AssemblerX8632::*TypedEmitGPRGPRImm)(Type, GPRRegister,
-                                                     GPRRegister,
-                                                     const Immediate &);
-  struct GPREmitterShiftD {
-    // Technically AddrGPR and AddrGPRImm are also allowed, but in practice
-    // we always normalize Dest to a Register first.
-    TypedEmitGPRGPR GPRGPR;
-    TypedEmitGPRGPRImm GPRGPRImm;
-  };
-
-  typedef void (AssemblerX8632::*TypedEmitAddrGPR)(Type, const Address &,
-                                                   GPRRegister);
-  typedef void (AssemblerX8632::*TypedEmitAddrImm)(Type, const Address &,
-                                                   const Immediate &);
-  struct GPREmitterAddrOp {
-    TypedEmitAddrGPR AddrGPR;
-    TypedEmitAddrImm AddrImm;
-  };
-
-  // Operations to emit XMM instructions (and dispatch on operand type).
-  typedef void (AssemblerX8632::*TypedEmitXmmXmm)(Type, XmmRegister,
-                                                  XmmRegister);
-  typedef void (AssemblerX8632::*TypedEmitXmmAddr)(Type, XmmRegister,
-                                                   const Address &);
-  struct XmmEmitterRegOp {
-    TypedEmitXmmXmm XmmXmm;
-    TypedEmitXmmAddr XmmAddr;
-  };
-
-  typedef void (AssemblerX8632::*EmitXmmXmm)(XmmRegister, XmmRegister);
-  typedef void (AssemblerX8632::*EmitXmmAddr)(XmmRegister, const Address &);
-  typedef void (AssemblerX8632::*EmitAddrXmm)(const Address &, XmmRegister);
-  struct XmmEmitterMovOps {
-    EmitXmmXmm XmmXmm;
-    EmitXmmAddr XmmAddr;
-    EmitAddrXmm AddrXmm;
-  };
-
-  typedef void (AssemblerX8632::*TypedEmitXmmImm)(Type, XmmRegister,
-                                                  const Immediate &);
-
-  struct XmmEmitterShiftOp {
-    TypedEmitXmmXmm XmmXmm;
-    TypedEmitXmmAddr XmmAddr;
-    TypedEmitXmmImm XmmImm;
-  };
-
-  // Cross Xmm/GPR cast instructions.
-  template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
-    typedef void (AssemblerX8632::*TypedEmitRegs)(Type, DReg_t, SReg_t);
-    typedef void (AssemblerX8632::*TypedEmitAddr)(Type, DReg_t,
-                                                  const Address &);
-
-    TypedEmitRegs RegReg;
-    TypedEmitAddr RegAddr;
-  };
-
-  // Three operand (potentially) cross Xmm/GPR instructions.
-  // The last operand must be an immediate.
-  template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
-    typedef void (AssemblerX8632::*TypedEmitRegRegImm)(Type, DReg_t, SReg_t,
-                                                       const Immediate &);
-    typedef void (AssemblerX8632::*TypedEmitRegAddrImm)(Type, DReg_t,
-                                                        const Address &,
-                                                        const Immediate &);
-
-    TypedEmitRegRegImm RegRegImm;
-    TypedEmitRegAddrImm RegAddrImm;
-  };
-
-  /*
-   * Emit Machine Instructions.
-   */
-  void call(GPRRegister reg);
-  void call(const Address &address);
-  void call(const ConstantRelocatable *label);
-  void call(const Immediate &abs_address);
-
-  static const intptr_t kCallExternalLabelSize = 5;
-
-  void pushl(GPRRegister reg);
-
-  void popl(GPRRegister reg);
-  void popl(const Address &address);
-
-  void pushal();
-  void popal();
-
-  void setcc(CondX86::BrCond condition, ByteRegister dst);
-  void setcc(CondX86::BrCond condition, const Address &address);
-
-  void mov(Type Ty, GPRRegister dst, const Immediate &src);
-  void mov(Type Ty, GPRRegister dst, GPRRegister src);
-
-  void mov(Type Ty, GPRRegister dst, const Address &src);
-  void mov(Type Ty, const Address &dst, GPRRegister src);
-  void mov(Type Ty, const Address &dst, const Immediate &imm);
-
-  void movzx(Type Ty, GPRRegister dst, GPRRegister src);
-  void movzx(Type Ty, GPRRegister dst, const Address &src);
-  void movsx(Type Ty, GPRRegister dst, GPRRegister src);
-  void movsx(Type Ty, GPRRegister dst, const Address &src);
-
-  void lea(Type Ty, GPRRegister dst, const Address &src);
-
-  void cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst, GPRRegister src);
-  void cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst, const Address &src);
-
-  void rep_movsb();
-
-  void movss(Type Ty, XmmRegister dst, const Address &src);
-  void movss(Type Ty, const Address &dst, XmmRegister src);
-  void movss(Type Ty, XmmRegister dst, XmmRegister src);
-
-  void movd(XmmRegister dst, GPRRegister src);
-  void movd(XmmRegister dst, const Address &src);
-  void movd(GPRRegister dst, XmmRegister src);
-  void movd(const Address &dst, XmmRegister src);
-
-  void movq(XmmRegister dst, XmmRegister src);
-  void movq(const Address &dst, XmmRegister src);
-  void movq(XmmRegister dst, const Address &src);
-
-  void addss(Type Ty, XmmRegister dst, XmmRegister src);
-  void addss(Type Ty, XmmRegister dst, const Address &src);
-  void subss(Type Ty, XmmRegister dst, XmmRegister src);
-  void subss(Type Ty, XmmRegister dst, const Address &src);
-  void mulss(Type Ty, XmmRegister dst, XmmRegister src);
-  void mulss(Type Ty, XmmRegister dst, const Address &src);
-  void divss(Type Ty, XmmRegister dst, XmmRegister src);
-  void divss(Type Ty, XmmRegister dst, const Address &src);
-
-  void movaps(XmmRegister dst, XmmRegister src);
-
-  void movups(XmmRegister dst, XmmRegister src);
-  void movups(XmmRegister dst, const Address &src);
-  void movups(const Address &dst, XmmRegister src);
-
-  void padd(Type Ty, XmmRegister dst, XmmRegister src);
-  void padd(Type Ty, XmmRegister dst, const Address &src);
-  void pand(Type Ty, XmmRegister dst, XmmRegister src);
-  void pand(Type Ty, XmmRegister dst, const Address &src);
-  void pandn(Type Ty, XmmRegister dst, XmmRegister src);
-  void pandn(Type Ty, XmmRegister dst, const Address &src);
-  void pmull(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmull(Type Ty, XmmRegister dst, const Address &src);
-  void pmuludq(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmuludq(Type Ty, XmmRegister dst, const Address &src);
-  void por(Type Ty, XmmRegister dst, XmmRegister src);
-  void por(Type Ty, XmmRegister dst, const Address &src);
-  void psub(Type Ty, XmmRegister dst, XmmRegister src);
-  void psub(Type Ty, XmmRegister dst, const Address &src);
-  void pxor(Type Ty, XmmRegister dst, XmmRegister src);
-  void pxor(Type Ty, XmmRegister dst, const Address &src);
-
-  void psll(Type Ty, XmmRegister dst, XmmRegister src);
-  void psll(Type Ty, XmmRegister dst, const Address &src);
-  void psll(Type Ty, XmmRegister dst, const Immediate &src);
-
-  void psra(Type Ty, XmmRegister dst, XmmRegister src);
-  void psra(Type Ty, XmmRegister dst, const Address &src);
-  void psra(Type Ty, XmmRegister dst, const Immediate &src);
-  void psrl(Type Ty, XmmRegister dst, XmmRegister src);
-  void psrl(Type Ty, XmmRegister dst, const Address &src);
-  void psrl(Type Ty, XmmRegister dst, const Immediate &src);
-
-  void addps(Type Ty, XmmRegister dst, XmmRegister src);
-  void addps(Type Ty, XmmRegister dst, const Address &src);
-  void subps(Type Ty, XmmRegister dst, XmmRegister src);
-  void subps(Type Ty, XmmRegister dst, const Address &src);
-  void divps(Type Ty, XmmRegister dst, XmmRegister src);
-  void divps(Type Ty, XmmRegister dst, const Address &src);
-  void mulps(Type Ty, XmmRegister dst, XmmRegister src);
-  void mulps(Type Ty, XmmRegister dst, const Address &src);
-  void minps(XmmRegister dst, XmmRegister src);
-  void maxps(XmmRegister dst, XmmRegister src);
-  void andps(XmmRegister dst, XmmRegister src);
-  void andps(XmmRegister dst, const Address &src);
-  void orps(XmmRegister dst, XmmRegister src);
-
-  void blendvps(Type Ty, XmmRegister dst, XmmRegister src);
-  void blendvps(Type Ty, XmmRegister dst, const Address &src);
-  void pblendvb(Type Ty, XmmRegister dst, XmmRegister src);
-  void pblendvb(Type Ty, XmmRegister dst, const Address &src);
-
-  void cmpps(XmmRegister dst, XmmRegister src, CondX86::CmppsCond CmpCondition);
-  void cmpps(XmmRegister dst, const Address &src,
-             CondX86::CmppsCond CmpCondition);
-
-  void sqrtps(XmmRegister dst);
-  void rsqrtps(XmmRegister dst);
-  void reciprocalps(XmmRegister dst);
-  void movhlps(XmmRegister dst, XmmRegister src);
-  void movlhps(XmmRegister dst, XmmRegister src);
-  void unpcklps(XmmRegister dst, XmmRegister src);
-  void unpckhps(XmmRegister dst, XmmRegister src);
-  void unpcklpd(XmmRegister dst, XmmRegister src);
-  void unpckhpd(XmmRegister dst, XmmRegister src);
-
-  void set1ps(XmmRegister dst, GPRRegister tmp, const Immediate &imm);
-  void shufps(XmmRegister dst, XmmRegister src, const Immediate &mask);
-
-  void minpd(XmmRegister dst, XmmRegister src);
-  void maxpd(XmmRegister dst, XmmRegister src);
-  void sqrtpd(XmmRegister dst);
-  void shufpd(XmmRegister dst, XmmRegister src, const Immediate &mask);
-
-  void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
-  void pshufd(Type Ty, XmmRegister dst, const Address &src,
-              const Immediate &mask);
-  void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
-  void shufps(Type Ty, XmmRegister dst, const Address &src,
-              const Immediate &mask);
-
-  void cvtdq2ps(Type, XmmRegister dst, XmmRegister src);
-  void cvtdq2ps(Type, XmmRegister dst, const Address &src);
-
-  void cvttps2dq(Type, XmmRegister dst, XmmRegister src);
-  void cvttps2dq(Type, XmmRegister dst, const Address &src);
-
-  void cvtsi2ss(Type DestTy, XmmRegister dst, GPRRegister src);
-  void cvtsi2ss(Type DestTy, XmmRegister dst, const Address &src);
-
-  void cvtfloat2float(Type SrcTy, XmmRegister dst, XmmRegister src);
-  void cvtfloat2float(Type SrcTy, XmmRegister dst, const Address &src);
-
-  void cvttss2si(Type SrcTy, GPRRegister dst, XmmRegister src);
-  void cvttss2si(Type SrcTy, GPRRegister dst, const Address &src);
-
-  void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
-  void ucomiss(Type Ty, XmmRegister a, const Address &b);
-
-  void movmskpd(GPRRegister dst, XmmRegister src);
-  void movmskps(GPRRegister dst, XmmRegister src);
-
-  void sqrtss(Type Ty, XmmRegister dst, const Address &src);
-  void sqrtss(Type Ty, XmmRegister dst, XmmRegister src);
-
-  void xorpd(XmmRegister dst, const Address &src);
-  void xorpd(XmmRegister dst, XmmRegister src);
-  void xorps(XmmRegister dst, const Address &src);
-  void xorps(XmmRegister dst, XmmRegister src);
-
-  void andpd(XmmRegister dst, const Address &src);
-  void andpd(XmmRegister dst, XmmRegister src);
-
-  void orpd(XmmRegister dst, XmmRegister src);
-
-  void insertps(Type Ty, XmmRegister dst, XmmRegister src,
-                const Immediate &imm);
-  void insertps(Type Ty, XmmRegister dst, const Address &src,
-                const Immediate &imm);
-
-  void pinsr(Type Ty, XmmRegister dst, GPRRegister src, const Immediate &imm);
-  void pinsr(Type Ty, XmmRegister dst, const Address &src,
-             const Immediate &imm);
-
-  void pextr(Type Ty, GPRRegister dst, XmmRegister src, const Immediate &imm);
-  void pextr(Type Ty, GPRRegister dst, const Address &src,
-             const Immediate &imm);
-
-  void pmovsxdq(XmmRegister dst, XmmRegister src);
-
-  void pcmpeq(Type Ty, XmmRegister dst, XmmRegister src);
-  void pcmpeq(Type Ty, XmmRegister dst, const Address &src);
-  void pcmpgt(Type Ty, XmmRegister dst, XmmRegister src);
-  void pcmpgt(Type Ty, XmmRegister dst, const Address &src);
-
-  enum RoundingMode {
-    kRoundToNearest = 0x0,
-    kRoundDown = 0x1,
-    kRoundUp = 0x2,
-    kRoundToZero = 0x3
-  };
-  void roundsd(XmmRegister dst, XmmRegister src, RoundingMode mode);
-
-  void fld(Type Ty, const Address &src);
-  void fstp(Type Ty, const Address &dst);
-  void fstp(X87STRegister st);
-
-  void fnstcw(const Address &dst);
-  void fldcw(const Address &src);
-
-  void fistpl(const Address &dst);
-  void fistps(const Address &dst);
-  void fildl(const Address &src);
-  void filds(const Address &src);
-
-  void fincstp();
-
-  void cmp(Type Ty, GPRRegister reg0, GPRRegister reg1);
-  void cmp(Type Ty, GPRRegister reg, const Address &address);
-  void cmp(Type Ty, GPRRegister reg, const Immediate &imm);
-  void cmp(Type Ty, const Address &address, GPRRegister reg);
-  void cmp(Type Ty, const Address &address, const Immediate &imm);
-
-  void test(Type Ty, GPRRegister reg0, GPRRegister reg1);
-  void test(Type Ty, GPRRegister reg, const Immediate &imm);
-  void test(Type Ty, const Address &address, GPRRegister reg);
-  void test(Type Ty, const Address &address, const Immediate &imm);
-
-  void And(Type Ty, GPRRegister dst, GPRRegister src);
-  void And(Type Ty, GPRRegister dst, const Address &address);
-  void And(Type Ty, GPRRegister dst, const Immediate &imm);
-  void And(Type Ty, const Address &address, GPRRegister reg);
-  void And(Type Ty, const Address &address, const Immediate &imm);
-
-  void Or(Type Ty, GPRRegister dst, GPRRegister src);
-  void Or(Type Ty, GPRRegister dst, const Address &address);
-  void Or(Type Ty, GPRRegister dst, const Immediate &imm);
-  void Or(Type Ty, const Address &address, GPRRegister reg);
-  void Or(Type Ty, const Address &address, const Immediate &imm);
-
-  void Xor(Type Ty, GPRRegister dst, GPRRegister src);
-  void Xor(Type Ty, GPRRegister dst, const Address &address);
-  void Xor(Type Ty, GPRRegister dst, const Immediate &imm);
-  void Xor(Type Ty, const Address &address, GPRRegister reg);
-  void Xor(Type Ty, const Address &address, const Immediate &imm);
-
-  void add(Type Ty, GPRRegister dst, GPRRegister src);
-  void add(Type Ty, GPRRegister reg, const Address &address);
-  void add(Type Ty, GPRRegister reg, const Immediate &imm);
-  void add(Type Ty, const Address &address, GPRRegister reg);
-  void add(Type Ty, const Address &address, const Immediate &imm);
-
-  void adc(Type Ty, GPRRegister dst, GPRRegister src);
-  void adc(Type Ty, GPRRegister dst, const Address &address);
-  void adc(Type Ty, GPRRegister reg, const Immediate &imm);
-  void adc(Type Ty, const Address &address, GPRRegister reg);
-  void adc(Type Ty, const Address &address, const Immediate &imm);
-
-  void sub(Type Ty, GPRRegister dst, GPRRegister src);
-  void sub(Type Ty, GPRRegister reg, const Address &address);
-  void sub(Type Ty, GPRRegister reg, const Immediate &imm);
-  void sub(Type Ty, const Address &address, GPRRegister reg);
-  void sub(Type Ty, const Address &address, const Immediate &imm);
-
-  void sbb(Type Ty, GPRRegister dst, GPRRegister src);
-  void sbb(Type Ty, GPRRegister reg, const Address &address);
-  void sbb(Type Ty, GPRRegister reg, const Immediate &imm);
-  void sbb(Type Ty, const Address &address, GPRRegister reg);
-  void sbb(Type Ty, const Address &address, const Immediate &imm);
-
-  void cbw();
-  void cwd();
-  void cdq();
-
-  void div(Type Ty, GPRRegister reg);
-  void div(Type Ty, const Address &address);
-
-  void idiv(Type Ty, GPRRegister reg);
-  void idiv(Type Ty, const Address &address);
-
-  void imul(Type Ty, GPRRegister dst, GPRRegister src);
-  void imul(Type Ty, GPRRegister reg, const Immediate &imm);
-  void imul(Type Ty, GPRRegister reg, const Address &address);
-
-  void imul(Type Ty, GPRRegister reg);
-  void imul(Type Ty, const Address &address);
-
-  void mul(Type Ty, GPRRegister reg);
-  void mul(Type Ty, const Address &address);
-
-  void incl(GPRRegister reg);
-  void incl(const Address &address);
-
-  void decl(GPRRegister reg);
-  void decl(const Address &address);
-
-  void rol(Type Ty, GPRRegister reg, const Immediate &imm);
-  void rol(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void rol(Type Ty, const Address &operand, GPRRegister shifter);
-
-  void shl(Type Ty, GPRRegister reg, const Immediate &imm);
-  void shl(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void shl(Type Ty, const Address &operand, GPRRegister shifter);
-
-  void shr(Type Ty, GPRRegister reg, const Immediate &imm);
-  void shr(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void shr(Type Ty, const Address &operand, GPRRegister shifter);
-
-  void sar(Type Ty, GPRRegister reg, const Immediate &imm);
-  void sar(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void sar(Type Ty, const Address &address, GPRRegister shifter);
-
-  void shld(Type Ty, GPRRegister dst, GPRRegister src);
-  void shld(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
-  void shld(Type Ty, const Address &operand, GPRRegister src);
-  void shrd(Type Ty, GPRRegister dst, GPRRegister src);
-  void shrd(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
-  void shrd(Type Ty, const Address &dst, GPRRegister src);
-
-  void neg(Type Ty, GPRRegister reg);
-  void neg(Type Ty, const Address &addr);
-  void notl(GPRRegister reg);
-
-  void bsf(Type Ty, GPRRegister dst, GPRRegister src);
-  void bsf(Type Ty, GPRRegister dst, const Address &src);
-  void bsr(Type Ty, GPRRegister dst, GPRRegister src);
-  void bsr(Type Ty, GPRRegister dst, const Address &src);
-
-  void bswap(Type Ty, GPRRegister reg);
-
-  void bt(GPRRegister base, GPRRegister offset);
-
-  void ret();
-  void ret(const Immediate &imm);
-
-  // 'size' indicates size in bytes and must be in the range 1..8.
-  void nop(int size = 1);
-  void int3();
-  void hlt();
-  void ud2();
-
-  void j(CondX86::BrCond condition, Label *label, bool near = kFarJump);
-  void j(CondX86::BrCond condition, const ConstantRelocatable *label);
-
-  void jmp(GPRRegister reg);
-  void jmp(Label *label, bool near = kFarJump);
-  void jmp(const ConstantRelocatable *label);
-
-  void mfence();
-
-  void lock();
-  void cmpxchg(Type Ty, const Address &address, GPRRegister reg, bool Locked);
-  void cmpxchg8b(const Address &address, bool Locked);
-  void xadd(Type Ty, const Address &address, GPRRegister reg, bool Locked);
-  void xchg(Type Ty, const Address &address, GPRRegister reg);
-
-  void emitSegmentOverride(uint8_t prefix);
-
-  intptr_t preferredLoopAlignment() { return 16; }
-  void align(intptr_t alignment, intptr_t offset);
-  void bind(Label *label);
-
-  intptr_t CodeSize() const { return Buffer.size(); }
-
-private:
-  inline void emitUint8(uint8_t value);
-  inline void emitInt16(int16_t value);
-  inline void emitInt32(int32_t value);
-  inline void emitRegisterOperand(int rm, int reg);
-  inline void emitXmmRegisterOperand(int rm, XmmRegister reg);
-  inline void emitFixup(AssemblerFixup *fixup);
-  inline void emitOperandSizeOverride();
-
-  void emitOperand(int rm, const Operand &operand);
-  void emitImmediate(Type ty, const Immediate &imm);
-  void emitComplexI8(int rm, const Operand &operand,
-                     const Immediate &immediate);
-  void emitComplex(Type Ty, int rm, const Operand &operand,
-                   const Immediate &immediate);
-  void emitLabel(Label *label, intptr_t instruction_size);
-  void emitLabelLink(Label *label);
-  void emitNearLabelLink(Label *label);
-
-  void emitGenericShift(int rm, Type Ty, GPRRegister reg, const Immediate &imm);
-  void emitGenericShift(int rm, Type Ty, const Operand &operand,
-                        GPRRegister shifter);
-
-  typedef std::vector<Label *> LabelVector;
-  // A vector of pool-allocated x86 labels for CFG nodes.
-  LabelVector CfgNodeLabels;
-  // A vector of pool-allocated x86 labels for Local labels.
-  LabelVector LocalLabels;
-
-  Label *GetOrCreateLabel(SizeT Number, LabelVector &Labels);
-
-  // The arith_int() methods factor out the commonality between the encodings of
-  // add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp().  The Tag
-  // parameter is statically asserted to be less than 8.
-  template <uint32_t Tag>
-  void arith_int(Type Ty, GPRRegister reg, const Immediate &imm);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, GPRRegister reg, const Address &address);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, const Address &address, GPRRegister reg);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, const Address &address, const Immediate &imm);
 };
 
-inline void AssemblerX8632::emitUint8(uint8_t value) {
-  Buffer.emit<uint8_t>(value);
-}
-
-inline void AssemblerX8632::emitInt16(int16_t value) {
-  Buffer.emit<int16_t>(value);
-}
-
-inline void AssemblerX8632::emitInt32(int32_t value) {
-  Buffer.emit<int32_t>(value);
-}
-
-inline void AssemblerX8632::emitRegisterOperand(int rm, int reg) {
-  assert(rm >= 0 && rm < 8);
-  Buffer.emit<uint8_t>(0xC0 + (rm << 3) + reg);
-}
-
-inline void AssemblerX8632::emitXmmRegisterOperand(int rm, XmmRegister reg) {
-  emitRegisterOperand(rm, static_cast<GPRRegister>(reg));
-}
-
-inline void AssemblerX8632::emitFixup(AssemblerFixup *fixup) {
-  Buffer.emitFixup(fixup);
-}
-
-inline void AssemblerX8632::emitOperandSizeOverride() { emitUint8(0x66); }
-
 } // end of namespace X8632
 } // end of namespace Ice
 
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
new file mode 100644
index 0000000..e6516f6
--- /dev/null
+++ b/src/IceAssemblerX86Base.h
@@ -0,0 +1,959 @@
+//===- subzero/src/IceAssemblerX86Base.h - base x86 assembler -*- C++ -*---===//
+//
+// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+//
+// Modified by the Subzero authors.
+//
+//===----------------------------------------------------------------------===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AssemblerX86 template class for x86, the base of all
+// X86 assemblers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICEASSEMBLERX86BASE_H
+#define SUBZERO_SRC_ICEASSEMBLERX86BASE_H
+
+#include "IceAssembler.h"
+#include "IceDefs.h"
+#include "IceOperand.h"
+#include "IceTypes.h"
+#include "IceUtils.h"
+
+namespace Ice {
+
+namespace X86Internal {
+
+template <class Machine> class AssemblerX86Base;
+template <class Machine> struct MachineTraits;
+
+constexpr int MAX_NOP_SIZE = 8;
+
+class Immediate {
+  Immediate(const Immediate &) = delete;
+  Immediate &operator=(const Immediate &) = delete;
+
+public:
+  explicit Immediate(int32_t value) : value_(value) {}
+
+  Immediate(RelocOffsetT offset, AssemblerFixup *fixup)
+      : value_(offset), fixup_(fixup) {
+    // Use the Offset in the "value" for now. If we decide to process fixups,
+    // we'll need to patch that offset with the true value.
+  }
+
+  int32_t value() const { return value_; }
+  AssemblerFixup *fixup() const { return fixup_; }
+
+  bool is_int8() const {
+    // We currently only allow 32-bit fixups, and they usually have value = 0,
+    // so if fixup_ != nullptr, it shouldn't be classified as int8/16.
+    return fixup_ == nullptr && Utils::IsInt(8, value_);
+  }
+  bool is_uint8() const {
+    return fixup_ == nullptr && Utils::IsUint(8, value_);
+  }
+  bool is_uint16() const {
+    return fixup_ == nullptr && Utils::IsUint(16, value_);
+  }
+
+private:
+  const int32_t value_;
+  AssemblerFixup *fixup_ = nullptr;
+};
+
+class Label {
+  Label(const Label &) = delete;
+  Label &operator=(const Label &) = delete;
+
+public:
+  Label() {
+    if (BuildDefs::asserts()) {
+      for (int i = 0; i < kMaxUnresolvedBranches; i++) {
+        unresolved_near_positions_[i] = -1;
+      }
+    }
+  }
+
+  ~Label() = default;
+
+  void FinalCheck() const {
+    // Assert if label is being destroyed with unresolved branches pending.
+    assert(!IsLinked());
+    assert(!HasNear());
+  }
+
+  // TODO(jvoung): why are labels offset by this?
+  static const uint32_t kWordSize = sizeof(uint32_t);
+
+  // Returns the position for bound labels (branches that come after this
+  // are considered backward branches). Cannot be used for unused or linked
+  // labels.
+  intptr_t Position() const {
+    assert(IsBound());
+    return -position_ - kWordSize;
+  }
+
+  // Returns the position of an earlier branch instruction that was linked
+  // to this label (branches that use this are considered forward branches).
+  // The linked instructions form a linked list, of sorts, using the
+  // instruction's displacement field for the location of the next
+  // instruction that is also linked to this label.
+  intptr_t LinkPosition() const {
+    assert(IsLinked());
+    return position_ - kWordSize;
+  }
+
+  // Returns the position of an earlier branch instruction which
+  // assumes that this label is "near", and bumps iterator to the
+  // next near position.
+  intptr_t NearPosition() {
+    assert(HasNear());
+    return unresolved_near_positions_[--num_unresolved_];
+  }
+
+  bool IsBound() const { return position_ < 0; }
+  bool IsLinked() const { return position_ > 0; }
+  bool IsUnused() const { return (position_ == 0) && (num_unresolved_ == 0); }
+  bool HasNear() const { return num_unresolved_ != 0; }
+
+private:
+  void BindTo(intptr_t position) {
+    assert(!IsBound());
+    assert(!HasNear());
+    position_ = -position - kWordSize;
+    assert(IsBound());
+  }
+
+  void LinkTo(intptr_t position) {
+    assert(!IsBound());
+    position_ = position + kWordSize;
+    assert(IsLinked());
+  }
+
+  void NearLinkTo(intptr_t position) {
+    assert(!IsBound());
+    assert(num_unresolved_ < kMaxUnresolvedBranches);
+    unresolved_near_positions_[num_unresolved_++] = position;
+  }
+
+  static constexpr int kMaxUnresolvedBranches = 20;
+
+  intptr_t position_ = 0;
+  intptr_t num_unresolved_ = 0;
+  // TODO(stichnot,jvoung): Can this instead be
+  // llvm::SmallVector<intptr_t, kMaxUnresolvedBranches> ?
+  intptr_t unresolved_near_positions_[kMaxUnresolvedBranches];
+
+  template <class> friend class AssemblerX86Base;
+};
+
+template <class Machine> class AssemblerX86Base : public Assembler {
+  AssemblerX86Base(const AssemblerX86Base &) = delete;
+  AssemblerX86Base &operator=(const AssemblerX86Base &) = delete;
+
+protected:
+  AssemblerX86Base(AssemblerKind Kind, bool use_far_branches)
+      : Assembler(Kind) {
+    // This mode is only needed and implemented for MIPS and ARM.
+    assert(!use_far_branches);
+    (void)use_far_branches;
+  }
+
+public:
+  using Traits = MachineTraits<Machine>;
+
+  ~AssemblerX86Base() override;
+
+  static const bool kNearJump = true;
+  static const bool kFarJump = false;
+
+  void alignFunction() override;
+
+  SizeT getBundleAlignLog2Bytes() const override { return 5; }
+
+  const char *getNonExecPadDirective() const override { return ".p2align"; }
+
+  llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const override {
+    static const uint8_t Padding[] = {0xF4};
+    return llvm::ArrayRef<uint8_t>(Padding, 1);
+  }
+
+  void padWithNop(intptr_t Padding) override {
+    while (Padding > MAX_NOP_SIZE) {
+      nop(MAX_NOP_SIZE);
+      Padding -= MAX_NOP_SIZE;
+    }
+    if (Padding)
+      nop(Padding);
+  }
+
+  Label *GetOrCreateCfgNodeLabel(SizeT NodeNumber);
+  void bindCfgNodeLabel(SizeT NodeNumber) override;
+  Label *GetOrCreateLocalLabel(SizeT Number);
+  void BindLocalLabel(SizeT Number);
+
+  bool fixupIsPCRel(FixupKind Kind) const override {
+    // Currently assuming this is the only PC-rel relocation type used.
+    // TODO(jpp): Traits.PcRelTypes.count(Kind) != 0
+    return Kind == Traits::PcRelFixup;
+  }
+
+  // Operations to emit GPR instructions (and dispatch on operand type).
+  typedef void (AssemblerX86Base::*TypedEmitGPR)(Type,
+                                                 typename Traits::GPRRegister);
+  typedef void (AssemblerX86Base::*TypedEmitAddr)(
+      Type, const typename Traits::Address &);
+  struct GPREmitterOneOp {
+    TypedEmitGPR Reg;
+    TypedEmitAddr Addr;
+  };
+
+  typedef void (AssemblerX86Base::*TypedEmitGPRGPR)(
+      Type, typename Traits::GPRRegister, typename Traits::GPRRegister);
+  typedef void (AssemblerX86Base::*TypedEmitGPRAddr)(
+      Type, typename Traits::GPRRegister, const typename Traits::Address &);
+  typedef void (AssemblerX86Base::*TypedEmitGPRImm)(
+      Type, typename Traits::GPRRegister, const Immediate &);
+  struct GPREmitterRegOp {
+    TypedEmitGPRGPR GPRGPR;
+    TypedEmitGPRAddr GPRAddr;
+    TypedEmitGPRImm GPRImm;
+  };
+
+  struct GPREmitterShiftOp {
+    // Technically, Addr/GPR and Addr/Imm are also allowed, but */Addr are not.
+    // In practice, we always normalize the Dest to a Register first.
+    TypedEmitGPRGPR GPRGPR;
+    TypedEmitGPRImm GPRImm;
+  };
+
+  typedef void (AssemblerX86Base::*TypedEmitGPRGPRImm)(
+      Type, typename Traits::GPRRegister, typename Traits::GPRRegister,
+      const Immediate &);
+  struct GPREmitterShiftD {
+    // Technically AddrGPR and AddrGPRImm are also allowed, but in practice
+    // we always normalize Dest to a Register first.
+    TypedEmitGPRGPR GPRGPR;
+    TypedEmitGPRGPRImm GPRGPRImm;
+  };
+
+  typedef void (AssemblerX86Base::*TypedEmitAddrGPR)(
+      Type, const typename Traits::Address &, typename Traits::GPRRegister);
+  typedef void (AssemblerX86Base::*TypedEmitAddrImm)(
+      Type, const typename Traits::Address &, const Immediate &);
+  struct GPREmitterAddrOp {
+    TypedEmitAddrGPR AddrGPR;
+    TypedEmitAddrImm AddrImm;
+  };
+
+  // Operations to emit XMM instructions (and dispatch on operand type).
+  typedef void (AssemblerX86Base::*TypedEmitXmmXmm)(
+      Type, typename Traits::XmmRegister, typename Traits::XmmRegister);
+  typedef void (AssemblerX86Base::*TypedEmitXmmAddr)(
+      Type, typename Traits::XmmRegister, const typename Traits::Address &);
+  struct XmmEmitterRegOp {
+    TypedEmitXmmXmm XmmXmm;
+    TypedEmitXmmAddr XmmAddr;
+  };
+
+  typedef void (AssemblerX86Base::*EmitXmmXmm)(typename Traits::XmmRegister,
+                                               typename Traits::XmmRegister);
+  typedef void (AssemblerX86Base::*EmitXmmAddr)(
+      typename Traits::XmmRegister, const typename Traits::Address &);
+  typedef void (AssemblerX86Base::*EmitAddrXmm)(
+      const typename Traits::Address &, typename Traits::XmmRegister);
+  struct XmmEmitterMovOps {
+    EmitXmmXmm XmmXmm;
+    EmitXmmAddr XmmAddr;
+    EmitAddrXmm AddrXmm;
+  };
+
+  typedef void (AssemblerX86Base::*TypedEmitXmmImm)(
+      Type, typename Traits::XmmRegister, const Immediate &);
+
+  struct XmmEmitterShiftOp {
+    TypedEmitXmmXmm XmmXmm;
+    TypedEmitXmmAddr XmmAddr;
+    TypedEmitXmmImm XmmImm;
+  };
+
+  // Cross Xmm/GPR cast instructions.
+  template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
+    typedef void (AssemblerX86Base::*TypedEmitRegs)(Type, DReg_t, SReg_t);
+    typedef void (AssemblerX86Base::*TypedEmitAddr)(
+        Type, DReg_t, const typename Traits::Address &);
+
+    TypedEmitRegs RegReg;
+    TypedEmitAddr RegAddr;
+  };
+
+  // Three operand (potentially) cross Xmm/GPR instructions.
+  // The last operand must be an immediate.
+  template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
+    typedef void (AssemblerX86Base::*TypedEmitRegRegImm)(Type, DReg_t, SReg_t,
+                                                         const Immediate &);
+    typedef void (AssemblerX86Base::*TypedEmitRegAddrImm)(
+        Type, DReg_t, const typename Traits::Address &, const Immediate &);
+
+    TypedEmitRegRegImm RegRegImm;
+    TypedEmitRegAddrImm RegAddrImm;
+  };
+
+  /*
+   * Emit Machine Instructions.
+   */
+  void call(typename Traits::GPRRegister reg);
+  void call(const typename Traits::Address &address);
+  void call(const ConstantRelocatable *label);
+  void call(const Immediate &abs_address);
+
+  static const intptr_t kCallExternalLabelSize = 5;
+
+  void pushl(typename Traits::GPRRegister reg);
+
+  void popl(typename Traits::GPRRegister reg);
+  void popl(const typename Traits::Address &address);
+
+  void pushal();
+  void popal();
+
+  void setcc(typename Traits::Cond::BrCond condition,
+             typename Traits::ByteRegister dst);
+  void setcc(typename Traits::Cond::BrCond condition,
+             const typename Traits::Address &address);
+
+  void mov(Type Ty, typename Traits::GPRRegister dst, const Immediate &src);
+  void mov(Type Ty, typename Traits::GPRRegister dst,
+           typename Traits::GPRRegister src);
+
+  void mov(Type Ty, typename Traits::GPRRegister dst,
+           const typename Traits::Address &src);
+  void mov(Type Ty, const typename Traits::Address &dst,
+           typename Traits::GPRRegister src);
+  void mov(Type Ty, const typename Traits::Address &dst, const Immediate &imm);
+
+  void movzx(Type Ty, typename Traits::GPRRegister dst,
+             typename Traits::GPRRegister src);
+  void movzx(Type Ty, typename Traits::GPRRegister dst,
+             const typename Traits::Address &src);
+  void movsx(Type Ty, typename Traits::GPRRegister dst,
+             typename Traits::GPRRegister src);
+  void movsx(Type Ty, typename Traits::GPRRegister dst,
+             const typename Traits::Address &src);
+
+  void lea(Type Ty, typename Traits::GPRRegister dst,
+           const typename Traits::Address &src);
+
+  void cmov(Type Ty, typename Traits::Cond::BrCond cond,
+            typename Traits::GPRRegister dst, typename Traits::GPRRegister src);
+  void cmov(Type Ty, typename Traits::Cond::BrCond cond,
+            typename Traits::GPRRegister dst,
+            const typename Traits::Address &src);
+
+  void rep_movsb();
+
+  void movss(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void movss(Type Ty, const typename Traits::Address &dst,
+             typename Traits::XmmRegister src);
+  void movss(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+
+  void movd(typename Traits::XmmRegister dst, typename Traits::GPRRegister src);
+  void movd(typename Traits::XmmRegister dst,
+            const typename Traits::Address &src);
+  void movd(typename Traits::GPRRegister dst, typename Traits::XmmRegister src);
+  void movd(const typename Traits::Address &dst,
+            typename Traits::XmmRegister src);
+
+  void movq(typename Traits::XmmRegister dst, typename Traits::XmmRegister src);
+  void movq(const typename Traits::Address &dst,
+            typename Traits::XmmRegister src);
+  void movq(typename Traits::XmmRegister dst,
+            const typename Traits::Address &src);
+
+  void addss(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void addss(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void subss(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void subss(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void mulss(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void mulss(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void divss(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void divss(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+
+  void movaps(typename Traits::XmmRegister dst,
+              typename Traits::XmmRegister src);
+
+  void movups(typename Traits::XmmRegister dst,
+              typename Traits::XmmRegister src);
+  void movups(typename Traits::XmmRegister dst,
+              const typename Traits::Address &src);
+  void movups(const typename Traits::Address &dst,
+              typename Traits::XmmRegister src);
+
+  void padd(Type Ty, typename Traits::XmmRegister dst,
+            typename Traits::XmmRegister src);
+  void padd(Type Ty, typename Traits::XmmRegister dst,
+            const typename Traits::Address &src);
+  void pand(Type Ty, typename Traits::XmmRegister dst,
+            typename Traits::XmmRegister src);
+  void pand(Type Ty, typename Traits::XmmRegister dst,
+            const typename Traits::Address &src);
+  void pandn(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void pandn(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void pmull(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void pmull(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void pmuludq(Type Ty, typename Traits::XmmRegister dst,
+               typename Traits::XmmRegister src);
+  void pmuludq(Type Ty, typename Traits::XmmRegister dst,
+               const typename Traits::Address &src);
+  void por(Type Ty, typename Traits::XmmRegister dst,
+           typename Traits::XmmRegister src);
+  void por(Type Ty, typename Traits::XmmRegister dst,
+           const typename Traits::Address &src);
+  void psub(Type Ty, typename Traits::XmmRegister dst,
+            typename Traits::XmmRegister src);
+  void psub(Type Ty, typename Traits::XmmRegister dst,
+            const typename Traits::Address &src);
+  void pxor(Type Ty, typename Traits::XmmRegister dst,
+            typename Traits::XmmRegister src);
+  void pxor(Type Ty, typename Traits::XmmRegister dst,
+            const typename Traits::Address &src);
+
+  void psll(Type Ty, typename Traits::XmmRegister dst,
+            typename Traits::XmmRegister src);
+  void psll(Type Ty, typename Traits::XmmRegister dst,
+            const typename Traits::Address &src);
+  void psll(Type Ty, typename Traits::XmmRegister dst, const Immediate &src);
+
+  void psra(Type Ty, typename Traits::XmmRegister dst,
+            typename Traits::XmmRegister src);
+  void psra(Type Ty, typename Traits::XmmRegister dst,
+            const typename Traits::Address &src);
+  void psra(Type Ty, typename Traits::XmmRegister dst, const Immediate &src);
+  void psrl(Type Ty, typename Traits::XmmRegister dst,
+            typename Traits::XmmRegister src);
+  void psrl(Type Ty, typename Traits::XmmRegister dst,
+            const typename Traits::Address &src);
+  void psrl(Type Ty, typename Traits::XmmRegister dst, const Immediate &src);
+
+  void addps(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void addps(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void subps(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void subps(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void divps(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void divps(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void mulps(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void mulps(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void minps(typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void maxps(typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void andps(typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void andps(typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void orps(typename Traits::XmmRegister dst, typename Traits::XmmRegister src);
+
+  void blendvps(Type Ty, typename Traits::XmmRegister dst,
+                typename Traits::XmmRegister src);
+  void blendvps(Type Ty, typename Traits::XmmRegister dst,
+                const typename Traits::Address &src);
+  void pblendvb(Type Ty, typename Traits::XmmRegister dst,
+                typename Traits::XmmRegister src);
+  void pblendvb(Type Ty, typename Traits::XmmRegister dst,
+                const typename Traits::Address &src);
+
+  void cmpps(typename Traits::XmmRegister dst, typename Traits::XmmRegister src,
+             typename Traits::Cond::CmppsCond CmpCondition);
+  void cmpps(typename Traits::XmmRegister dst,
+             const typename Traits::Address &src,
+             typename Traits::Cond::CmppsCond CmpCondition);
+
+  void sqrtps(typename Traits::XmmRegister dst);
+  void rsqrtps(typename Traits::XmmRegister dst);
+  void reciprocalps(typename Traits::XmmRegister dst);
+  void movhlps(typename Traits::XmmRegister dst,
+               typename Traits::XmmRegister src);
+  void movlhps(typename Traits::XmmRegister dst,
+               typename Traits::XmmRegister src);
+  void unpcklps(typename Traits::XmmRegister dst,
+                typename Traits::XmmRegister src);
+  void unpckhps(typename Traits::XmmRegister dst,
+                typename Traits::XmmRegister src);
+  void unpcklpd(typename Traits::XmmRegister dst,
+                typename Traits::XmmRegister src);
+  void unpckhpd(typename Traits::XmmRegister dst,
+                typename Traits::XmmRegister src);
+
+  void set1ps(typename Traits::XmmRegister dst,
+              typename Traits::GPRRegister tmp, const Immediate &imm);
+  void shufps(typename Traits::XmmRegister dst,
+              typename Traits::XmmRegister src, const Immediate &mask);
+
+  void minpd(typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void maxpd(typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void sqrtpd(typename Traits::XmmRegister dst);
+  void shufpd(typename Traits::XmmRegister dst,
+              typename Traits::XmmRegister src, const Immediate &mask);
+
+  void pshufd(Type Ty, typename Traits::XmmRegister dst,
+              typename Traits::XmmRegister src, const Immediate &mask);
+  void pshufd(Type Ty, typename Traits::XmmRegister dst,
+              const typename Traits::Address &src, const Immediate &mask);
+  void shufps(Type Ty, typename Traits::XmmRegister dst,
+              typename Traits::XmmRegister src, const Immediate &mask);
+  void shufps(Type Ty, typename Traits::XmmRegister dst,
+              const typename Traits::Address &src, const Immediate &mask);
+
+  void cvtdq2ps(Type, typename Traits::XmmRegister dst,
+                typename Traits::XmmRegister src);
+  void cvtdq2ps(Type, typename Traits::XmmRegister dst,
+                const typename Traits::Address &src);
+
+  void cvttps2dq(Type, typename Traits::XmmRegister dst,
+                 typename Traits::XmmRegister src);
+  void cvttps2dq(Type, typename Traits::XmmRegister dst,
+                 const typename Traits::Address &src);
+
+  void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst,
+                typename Traits::GPRRegister src);
+  void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst,
+                const typename Traits::Address &src);
+
+  void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
+                      typename Traits::XmmRegister src);
+  void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
+                      const typename Traits::Address &src);
+
+  void cvttss2si(Type SrcTy, typename Traits::GPRRegister dst,
+                 typename Traits::XmmRegister src);
+  void cvttss2si(Type SrcTy, typename Traits::GPRRegister dst,
+                 const typename Traits::Address &src);
+
+  void ucomiss(Type Ty, typename Traits::XmmRegister a,
+               typename Traits::XmmRegister b);
+  void ucomiss(Type Ty, typename Traits::XmmRegister a,
+               const typename Traits::Address &b);
+
+  void movmskpd(typename Traits::GPRRegister dst,
+                typename Traits::XmmRegister src);
+  void movmskps(typename Traits::GPRRegister dst,
+                typename Traits::XmmRegister src);
+
+  void sqrtss(Type Ty, typename Traits::XmmRegister dst,
+              const typename Traits::Address &src);
+  void sqrtss(Type Ty, typename Traits::XmmRegister dst,
+              typename Traits::XmmRegister src);
+
+  void xorpd(typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void xorpd(typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+  void xorps(typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void xorps(typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+
+  void andpd(typename Traits::XmmRegister dst,
+             const typename Traits::Address &src);
+  void andpd(typename Traits::XmmRegister dst,
+             typename Traits::XmmRegister src);
+
+  void orpd(typename Traits::XmmRegister dst, typename Traits::XmmRegister src);
+
+  void insertps(Type Ty, typename Traits::XmmRegister dst,
+                typename Traits::XmmRegister src, const Immediate &imm);
+  void insertps(Type Ty, typename Traits::XmmRegister dst,
+                const typename Traits::Address &src, const Immediate &imm);
+
+  void pinsr(Type Ty, typename Traits::XmmRegister dst,
+             typename Traits::GPRRegister src, const Immediate &imm);
+  void pinsr(Type Ty, typename Traits::XmmRegister dst,
+             const typename Traits::Address &src, const Immediate &imm);
+
+  void pextr(Type Ty, typename Traits::GPRRegister dst,
+             typename Traits::XmmRegister src, const Immediate &imm);
+  void pextr(Type Ty, typename Traits::GPRRegister dst,
+             const typename Traits::Address &src, const Immediate &imm);
+
+  void pmovsxdq(typename Traits::XmmRegister dst,
+                typename Traits::XmmRegister src);
+
+  void pcmpeq(Type Ty, typename Traits::XmmRegister dst,
+              typename Traits::XmmRegister src);
+  void pcmpeq(Type Ty, typename Traits::XmmRegister dst,
+              const typename Traits::Address &src);
+  void pcmpgt(Type Ty, typename Traits::XmmRegister dst,
+              typename Traits::XmmRegister src);
+  void pcmpgt(Type Ty, typename Traits::XmmRegister dst,
+              const typename Traits::Address &src);
+
+  enum RoundingMode {
+    kRoundToNearest = 0x0,
+    kRoundDown = 0x1,
+    kRoundUp = 0x2,
+    kRoundToZero = 0x3
+  };
+  void roundsd(typename Traits::XmmRegister dst,
+               typename Traits::XmmRegister src, RoundingMode mode);
+
+  void fld(Type Ty, const typename Traits::Address &src);
+  void fstp(Type Ty, const typename Traits::Address &dst);
+  void fstp(typename Traits::X87STRegister st);
+
+  void fnstcw(const typename Traits::Address &dst);
+  void fldcw(const typename Traits::Address &src);
+
+  void fistpl(const typename Traits::Address &dst);
+  void fistps(const typename Traits::Address &dst);
+  void fildl(const typename Traits::Address &src);
+  void filds(const typename Traits::Address &src);
+
+  void fincstp();
+
+  void cmp(Type Ty, typename Traits::GPRRegister reg0,
+           typename Traits::GPRRegister reg1);
+  void cmp(Type Ty, typename Traits::GPRRegister reg,
+           const typename Traits::Address &address);
+  void cmp(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
+  void cmp(Type Ty, const typename Traits::Address &address,
+           typename Traits::GPRRegister reg);
+  void cmp(Type Ty, const typename Traits::Address &address,
+           const Immediate &imm);
+
+  void test(Type Ty, typename Traits::GPRRegister reg0,
+            typename Traits::GPRRegister reg1);
+  void test(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
+  void test(Type Ty, const typename Traits::Address &address,
+            typename Traits::GPRRegister reg);
+  void test(Type Ty, const typename Traits::Address &address,
+            const Immediate &imm);
+
+  void And(Type Ty, typename Traits::GPRRegister dst,
+           typename Traits::GPRRegister src);
+  void And(Type Ty, typename Traits::GPRRegister dst,
+           const typename Traits::Address &address);
+  void And(Type Ty, typename Traits::GPRRegister dst, const Immediate &imm);
+  void And(Type Ty, const typename Traits::Address &address,
+           typename Traits::GPRRegister reg);
+  void And(Type Ty, const typename Traits::Address &address,
+           const Immediate &imm);
+
+  void Or(Type Ty, typename Traits::GPRRegister dst,
+          typename Traits::GPRRegister src);
+  void Or(Type Ty, typename Traits::GPRRegister dst,
+          const typename Traits::Address &address);
+  void Or(Type Ty, typename Traits::GPRRegister dst, const Immediate &imm);
+  void Or(Type Ty, const typename Traits::Address &address,
+          typename Traits::GPRRegister reg);
+  void Or(Type Ty, const typename Traits::Address &address,
+          const Immediate &imm);
+
+  void Xor(Type Ty, typename Traits::GPRRegister dst,
+           typename Traits::GPRRegister src);
+  void Xor(Type Ty, typename Traits::GPRRegister dst,
+           const typename Traits::Address &address);
+  void Xor(Type Ty, typename Traits::GPRRegister dst, const Immediate &imm);
+  void Xor(Type Ty, const typename Traits::Address &address,
+           typename Traits::GPRRegister reg);
+  void Xor(Type Ty, const typename Traits::Address &address,
+           const Immediate &imm);
+
+  void add(Type Ty, typename Traits::GPRRegister dst,
+           typename Traits::GPRRegister src);
+  void add(Type Ty, typename Traits::GPRRegister reg,
+           const typename Traits::Address &address);
+  void add(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
+  void add(Type Ty, const typename Traits::Address &address,
+           typename Traits::GPRRegister reg);
+  void add(Type Ty, const typename Traits::Address &address,
+           const Immediate &imm);
+
+  void adc(Type Ty, typename Traits::GPRRegister dst,
+           typename Traits::GPRRegister src);
+  void adc(Type Ty, typename Traits::GPRRegister dst,
+           const typename Traits::Address &address);
+  void adc(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
+  void adc(Type Ty, const typename Traits::Address &address,
+           typename Traits::GPRRegister reg);
+  void adc(Type Ty, const typename Traits::Address &address,
+           const Immediate &imm);
+
+  void sub(Type Ty, typename Traits::GPRRegister dst,
+           typename Traits::GPRRegister src);
+  void sub(Type Ty, typename Traits::GPRRegister reg,
+           const typename Traits::Address &address);
+  void sub(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
+  void sub(Type Ty, const typename Traits::Address &address,
+           typename Traits::GPRRegister reg);
+  void sub(Type Ty, const typename Traits::Address &address,
+           const Immediate &imm);
+
+  void sbb(Type Ty, typename Traits::GPRRegister dst,
+           typename Traits::GPRRegister src);
+  void sbb(Type Ty, typename Traits::GPRRegister reg,
+           const typename Traits::Address &address);
+  void sbb(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
+  void sbb(Type Ty, const typename Traits::Address &address,
+           typename Traits::GPRRegister reg);
+  void sbb(Type Ty, const typename Traits::Address &address,
+           const Immediate &imm);
+
+  void cbw();
+  void cwd();
+  void cdq();
+
+  void div(Type Ty, typename Traits::GPRRegister reg);
+  void div(Type Ty, const typename Traits::Address &address);
+
+  void idiv(Type Ty, typename Traits::GPRRegister reg);
+  void idiv(Type Ty, const typename Traits::Address &address);
+
+  void imul(Type Ty, typename Traits::GPRRegister dst,
+            typename Traits::GPRRegister src);
+  void imul(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
+  void imul(Type Ty, typename Traits::GPRRegister reg,
+            const typename Traits::Address &address);
+
+  void imul(Type Ty, typename Traits::GPRRegister reg);
+  void imul(Type Ty, const typename Traits::Address &address);
+
+  void mul(Type Ty, typename Traits::GPRRegister reg);
+  void mul(Type Ty, const typename Traits::Address &address);
+
+  void incl(typename Traits::GPRRegister reg);
+  void incl(const typename Traits::Address &address);
+
+  void decl(typename Traits::GPRRegister reg);
+  void decl(const typename Traits::Address &address);
+
+  void rol(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
+  void rol(Type Ty, typename Traits::GPRRegister operand,
+           typename Traits::GPRRegister shifter);
+  void rol(Type Ty, const typename Traits::Address &operand,
+           typename Traits::GPRRegister shifter);
+
+  void shl(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
+  void shl(Type Ty, typename Traits::GPRRegister operand,
+           typename Traits::GPRRegister shifter);
+  void shl(Type Ty, const typename Traits::Address &operand,
+           typename Traits::GPRRegister shifter);
+
+  void shr(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
+  void shr(Type Ty, typename Traits::GPRRegister operand,
+           typename Traits::GPRRegister shifter);
+  void shr(Type Ty, const typename Traits::Address &operand,
+           typename Traits::GPRRegister shifter);
+
+  void sar(Type Ty, typename Traits::GPRRegister reg, const Immediate &imm);
+  void sar(Type Ty, typename Traits::GPRRegister operand,
+           typename Traits::GPRRegister shifter);
+  void sar(Type Ty, const typename Traits::Address &address,
+           typename Traits::GPRRegister shifter);
+
+  void shld(Type Ty, typename Traits::GPRRegister dst,
+            typename Traits::GPRRegister src);
+  void shld(Type Ty, typename Traits::GPRRegister dst,
+            typename Traits::GPRRegister src, const Immediate &imm);
+  void shld(Type Ty, const typename Traits::Address &operand,
+            typename Traits::GPRRegister src);
+  void shrd(Type Ty, typename Traits::GPRRegister dst,
+            typename Traits::GPRRegister src);
+  void shrd(Type Ty, typename Traits::GPRRegister dst,
+            typename Traits::GPRRegister src, const Immediate &imm);
+  void shrd(Type Ty, const typename Traits::Address &dst,
+            typename Traits::GPRRegister src);
+
+  void neg(Type Ty, typename Traits::GPRRegister reg);
+  void neg(Type Ty, const typename Traits::Address &addr);
+  void notl(typename Traits::GPRRegister reg);
+
+  void bsf(Type Ty, typename Traits::GPRRegister dst,
+           typename Traits::GPRRegister src);
+  void bsf(Type Ty, typename Traits::GPRRegister dst,
+           const typename Traits::Address &src);
+  void bsr(Type Ty, typename Traits::GPRRegister dst,
+           typename Traits::GPRRegister src);
+  void bsr(Type Ty, typename Traits::GPRRegister dst,
+           const typename Traits::Address &src);
+
+  void bswap(Type Ty, typename Traits::GPRRegister reg);
+
+  void bt(typename Traits::GPRRegister base,
+          typename Traits::GPRRegister offset);
+
+  void ret();
+  void ret(const Immediate &imm);
+
+  // 'size' indicates size in bytes and must be in the range 1..8.
+  void nop(int size = 1);
+  void int3();
+  void hlt();
+  void ud2();
+
+  void j(typename Traits::Cond::BrCond condition, Label *label,
+         bool near = kFarJump);
+  void j(typename Traits::Cond::BrCond condition,
+         const ConstantRelocatable *label);
+
+  void jmp(typename Traits::GPRRegister reg);
+  void jmp(Label *label, bool near = kFarJump);
+  void jmp(const ConstantRelocatable *label);
+
+  void mfence();
+
+  void lock();
+  void cmpxchg(Type Ty, const typename Traits::Address &address,
+               typename Traits::GPRRegister reg, bool Locked);
+  void cmpxchg8b(const typename Traits::Address &address, bool Locked);
+  void xadd(Type Ty, const typename Traits::Address &address,
+            typename Traits::GPRRegister reg, bool Locked);
+  void xchg(Type Ty, const typename Traits::Address &address,
+            typename Traits::GPRRegister reg);
+
+  void emitSegmentOverride(uint8_t prefix);
+
+  intptr_t preferredLoopAlignment() { return 16; }
+  void align(intptr_t alignment, intptr_t offset);
+  void bind(Label *label);
+
+  intptr_t CodeSize() const { return Buffer.size(); }
+
+private:
+  inline void emitUint8(uint8_t value);
+  inline void emitInt16(int16_t value);
+  inline void emitInt32(int32_t value);
+  inline void emitRegisterOperand(int rm, int reg);
+  inline void emitXmmRegisterOperand(int rm, typename Traits::XmmRegister reg);
+  inline void emitFixup(AssemblerFixup *fixup);
+  inline void emitOperandSizeOverride();
+
+  void emitOperand(int rm, const typename Traits::Operand &operand);
+  void emitImmediate(Type ty, const Immediate &imm);
+  void emitComplexI8(int rm, const typename Traits::Operand &operand,
+                     const Immediate &immediate);
+  void emitComplex(Type Ty, int rm, const typename Traits::Operand &operand,
+                   const Immediate &immediate);
+  void emitLabel(Label *label, intptr_t instruction_size);
+  void emitLabelLink(Label *label);
+  void emitNearLabelLink(Label *label);
+
+  void emitGenericShift(int rm, Type Ty, typename Traits::GPRRegister reg,
+                        const Immediate &imm);
+  void emitGenericShift(int rm, Type Ty,
+                        const typename Traits::Operand &operand,
+                        typename Traits::GPRRegister shifter);
+
+  typedef std::vector<Label *> LabelVector;
+  // A vector of pool-allocated x86 labels for CFG nodes.
+  LabelVector CfgNodeLabels;
+  // A vector of pool-allocated x86 labels for Local labels.
+  LabelVector LocalLabels;
+
+  Label *GetOrCreateLabel(SizeT Number, LabelVector &Labels);
+
+  // The arith_int() methods factor out the commonality between the encodings of
+  // add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp().  The Tag
+  // parameter is statically asserted to be less than 8.
+  template <uint32_t Tag>
+  void arith_int(Type Ty, typename Traits::GPRRegister reg,
+                 const Immediate &imm);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, typename Traits::GPRRegister reg0,
+                 typename Traits::GPRRegister reg1);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, typename Traits::GPRRegister reg,
+                 const typename Traits::Address &address);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, const typename Traits::Address &address,
+                 typename Traits::GPRRegister reg);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, const typename Traits::Address &address,
+                 const Immediate &imm);
+};
+
+template <class Machine>
+inline void AssemblerX86Base<Machine>::emitUint8(uint8_t value) {
+  Buffer.emit<uint8_t>(value);
+}
+
+template <class Machine>
+inline void AssemblerX86Base<Machine>::emitInt16(int16_t value) {
+  Buffer.emit<int16_t>(value);
+}
+
+template <class Machine>
+inline void AssemblerX86Base<Machine>::emitInt32(int32_t value) {
+  Buffer.emit<int32_t>(value);
+}
+
+template <class Machine>
+inline void AssemblerX86Base<Machine>::emitRegisterOperand(int rm, int reg) {
+  assert(rm >= 0 && rm < 8);
+  Buffer.emit<uint8_t>(0xC0 + (rm << 3) + reg);
+}
+
+template <class Machine>
+inline void AssemblerX86Base<Machine>::emitXmmRegisterOperand(
+    int rm, typename Traits::XmmRegister reg) {
+  emitRegisterOperand(rm, static_cast<typename Traits::GPRRegister>(reg));
+}
+
+template <class Machine>
+inline void AssemblerX86Base<Machine>::emitFixup(AssemblerFixup *fixup) {
+  Buffer.emitFixup(fixup);
+}
+
+template <class Machine>
+inline void AssemblerX86Base<Machine>::emitOperandSizeOverride() {
+  emitUint8(0x66);
+}
+
+} // end of namespace X86Internal
+
+namespace X8632 {
+using Immediate = ::Ice::X86Internal::Immediate;
+using Label = ::Ice::X86Internal::Label;
+} // end of namespace X8632
+} // end of namespace Ice
+
+#include "IceAssemblerX86BaseImpl.h"
+
+#endif // SUBZERO_SRC_ICEASSEMBLERX86BASE_H
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
new file mode 100644
index 0000000..f8ba4d4
--- /dev/null
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -0,0 +1,3109 @@
+//===- subzero/src/IceAssemblerX86BaseImpl.h - base x86 assembler -*- C++ -*-=//
+// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+//
+// Modified by the Subzero authors.
+//
+//===----------------------------------------------------------------------===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AssemblerX86Base template class, which is the base
+// Assembler class for X86 assemblers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IceAssemblerX86Base.h"
+
+#include "IceCfg.h"
+#include "IceOperand.h"
+
+namespace Ice {
+namespace X86Internal {
+
+template <class Machine>
+AssemblerX86Base<Machine>::~AssemblerX86Base<Machine>() {
+  if (BuildDefs::asserts()) {
+    for (const Label *Label : CfgNodeLabels) {
+      Label->FinalCheck();
+    }
+    for (const Label *Label : LocalLabels) {
+      Label->FinalCheck();
+    }
+  }
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::alignFunction() {
+  SizeT Align = 1 << getBundleAlignLog2Bytes();
+  SizeT BytesNeeded = Utils::OffsetToAlignment(Buffer.getPosition(), Align);
+  const SizeT HltSize = 1;
+  while (BytesNeeded > 0) {
+    hlt();
+    BytesNeeded -= HltSize;
+  }
+}
+
+template <class Machine>
+Label *AssemblerX86Base<Machine>::GetOrCreateLabel(SizeT Number,
+                                                   LabelVector &Labels) {
+  Label *L = nullptr;
+  if (Number == Labels.size()) {
+    L = new (this->allocate<Label>()) Label();
+    Labels.push_back(L);
+    return L;
+  }
+  if (Number > Labels.size()) {
+    Labels.resize(Number + 1);
+  }
+  L = Labels[Number];
+  if (!L) {
+    L = new (this->allocate<Label>()) Label();
+    Labels[Number] = L;
+  }
+  return L;
+}
+
+template <class Machine>
+Label *AssemblerX86Base<Machine>::GetOrCreateCfgNodeLabel(SizeT NodeNumber) {
+  return GetOrCreateLabel(NodeNumber, CfgNodeLabels);
+}
+
+template <class Machine>
+Label *AssemblerX86Base<Machine>::GetOrCreateLocalLabel(SizeT Number) {
+  return GetOrCreateLabel(Number, LocalLabels);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::bindCfgNodeLabel(SizeT NodeNumber) {
+  assert(!getPreliminary());
+  Label *L = GetOrCreateCfgNodeLabel(NodeNumber);
+  this->bind(L);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::BindLocalLabel(SizeT Number) {
+  Label *L = GetOrCreateLocalLabel(Number);
+  if (!getPreliminary())
+    this->bind(L);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::call(typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitRegisterOperand(2, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::call(const typename Traits::Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitOperand(2, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::call(const ConstantRelocatable *label) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  intptr_t call_start = Buffer.getPosition();
+  emitUint8(0xE8);
+  emitFixup(this->createFixup(Traits::PcRelFixup, label));
+  emitInt32(-4);
+  assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
+  (void)call_start;
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::call(const Immediate &abs_address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  intptr_t call_start = Buffer.getPosition();
+  emitUint8(0xE8);
+  emitFixup(this->createFixup(Traits::PcRelFixup, AssemblerFixup::NullSymbol));
+  emitInt32(abs_address.value() - 4);
+  assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
+  (void)call_start;
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pushl(typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x50 + reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::popl(typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x58 + reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::popl(const typename Traits::Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x8F);
+  emitOperand(0, address);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::pushal() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x60);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::popal() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x61);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::setcc(typename Traits::Cond::BrCond condition,
+                                      typename Traits::ByteRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x90 + condition);
+  emitUint8(0xC0 + dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::setcc(typename Traits::Cond::BrCond condition,
+                                      const typename Traits::Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x90 + condition);
+  emitOperand(0, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::mov(Type Ty, typename Traits::GPRRegister dst,
+                                    const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (isByteSizedType(Ty)) {
+    emitUint8(0xB0 + dst);
+    emitUint8(imm.value() & 0xFF);
+    return;
+  }
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0xB8 + dst);
+  emitImmediate(Ty, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::mov(Type Ty, typename Traits::GPRRegister dst,
+                                    typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0x88);
+  } else {
+    emitUint8(0x89);
+  }
+  emitRegisterOperand(src, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::mov(Type Ty, typename Traits::GPRRegister dst,
+                                    const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0x8A);
+  } else {
+    emitUint8(0x8B);
+  }
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::mov(Type Ty,
+                                    const typename Traits::Address &dst,
+                                    typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0x88);
+  } else {
+    emitUint8(0x89);
+  }
+  emitOperand(src, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::mov(Type Ty,
+                                    const typename Traits::Address &dst,
+                                    const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0xC6);
+    emitOperand(0, dst);
+    emitUint8(imm.value() & 0xFF);
+  } else {
+    emitUint8(0xC7);
+    emitOperand(0, dst);
+    emitImmediate(Ty, imm);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movzx(Type SrcTy,
+                                      typename Traits::GPRRegister dst,
+                                      typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
+  emitUint8(0x0F);
+  emitUint8(ByteSized ? 0xB6 : 0xB7);
+  emitRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movzx(Type SrcTy,
+                                      typename Traits::GPRRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
+  emitUint8(0x0F);
+  emitUint8(ByteSized ? 0xB6 : 0xB7);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movsx(Type SrcTy,
+                                      typename Traits::GPRRegister dst,
+                                      typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
+  emitUint8(0x0F);
+  emitUint8(ByteSized ? 0xBE : 0xBF);
+  emitRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movsx(Type SrcTy,
+                                      typename Traits::GPRRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
+  emitUint8(0x0F);
+  emitUint8(ByteSized ? 0xBE : 0xBF);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::lea(Type Ty, typename Traits::GPRRegister dst,
+                                    const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x8D);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cmov(Type Ty,
+                                     typename Traits::Cond::BrCond cond,
+                                     typename Traits::GPRRegister dst,
+                                     typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  else
+    assert(Ty == IceType_i32);
+  emitUint8(0x0F);
+  emitUint8(0x40 + cond);
+  emitRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cmov(Type Ty,
+                                     typename Traits::Cond::BrCond cond,
+                                     typename Traits::GPRRegister dst,
+                                     const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  else
+    assert(Ty == IceType_i32);
+  emitUint8(0x0F);
+  emitUint8(0x40 + cond);
+  emitOperand(dst, src);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::rep_movsb() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0xA4);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movss(Type Ty, typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x10);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movss(Type Ty,
+                                      const typename Traits::Address &dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x11);
+  emitOperand(src, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movss(Type Ty, typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x11);
+  emitXmmRegisterOperand(src, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movd(typename Traits::XmmRegister dst,
+                                     typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x6E);
+  emitRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movd(typename Traits::XmmRegister dst,
+                                     const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x6E);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movd(typename Traits::GPRRegister dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitRegisterOperand(src, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movd(const typename Traits::Address &dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitOperand(src, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movq(typename Traits::XmmRegister dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movq(const typename Traits::Address &dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xD6);
+  emitOperand(src, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movq(typename Traits::XmmRegister dst,
+                                     const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::addss(Type Ty, typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::addss(Type Ty, typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::subss(Type Ty, typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::subss(Type Ty, typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::mulss(Type Ty, typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::mulss(Type Ty, typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::divss(Type Ty, typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::divss(Type Ty, typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::fld(Type Ty,
+                                    const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
+  emitOperand(0, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::fstp(Type Ty,
+                                     const typename Traits::Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
+  emitOperand(3, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::fstp(typename Traits::X87STRegister st) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDD);
+  emitUint8(0xD8 + st);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movaps(typename Traits::XmmRegister dst,
+                                       typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x28);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movups(typename Traits::XmmRegister dst,
+                                       typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x10);
+  emitRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movups(typename Traits::XmmRegister dst,
+                                       const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x10);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movups(const typename Traits::Address &dst,
+                                       typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x11);
+  emitOperand(src, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::padd(Type Ty, typename Traits::XmmRegister dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xFC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xFD);
+  } else {
+    emitUint8(0xFE);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::padd(Type Ty, typename Traits::XmmRegister dst,
+                                     const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xFC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xFD);
+  } else {
+    emitUint8(0xFE);
+  }
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pand(Type /* Ty */,
+                                     typename Traits::XmmRegister dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDB);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pand(Type /* Ty */,
+                                     typename Traits::XmmRegister dst,
+                                     const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDB);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pandn(Type /* Ty */,
+                                      typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDF);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pandn(Type /* Ty */,
+                                      typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDF);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pmull(Type Ty, typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD5);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x38);
+    emitUint8(0x40);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pmull(Type Ty, typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD5);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x38);
+    emitUint8(0x40);
+  }
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pmuludq(Type /* Ty */,
+                                        typename Traits::XmmRegister dst,
+                                        typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xF4);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pmuludq(Type /* Ty */,
+                                        typename Traits::XmmRegister dst,
+                                        const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xF4);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::por(Type /* Ty */,
+                                    typename Traits::XmmRegister dst,
+                                    typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEB);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::por(Type /* Ty */,
+                                    typename Traits::XmmRegister dst,
+                                    const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEB);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::psub(Type Ty, typename Traits::XmmRegister dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xF8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xF9);
+  } else {
+    emitUint8(0xFA);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::psub(Type Ty, typename Traits::XmmRegister dst,
+                                     const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xF8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xF9);
+  } else {
+    emitUint8(0xFA);
+  }
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pxor(Type /* Ty */,
+                                     typename Traits::XmmRegister dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEF);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pxor(Type /* Ty */,
+                                     typename Traits::XmmRegister dst,
+                                     const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEF);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::psll(Type Ty, typename Traits::XmmRegister dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xF1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xF2);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::psll(Type Ty, typename Traits::XmmRegister dst,
+                                     const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xF1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xF2);
+  }
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::psll(Type Ty, typename Traits::XmmRegister dst,
+                                     const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_int8());
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0x71);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x72);
+  }
+  emitRegisterOperand(6, dst);
+  emitUint8(imm.value() & 0xFF);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::psra(Type Ty, typename Traits::XmmRegister dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xE1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xE2);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::psra(Type Ty, typename Traits::XmmRegister dst,
+                                     const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xE1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xE2);
+  }
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::psra(Type Ty, typename Traits::XmmRegister dst,
+                                     const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_int8());
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0x71);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x72);
+  }
+  emitRegisterOperand(4, dst);
+  emitUint8(imm.value() & 0xFF);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::psrl(Type Ty, typename Traits::XmmRegister dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD1);
+  } else if (Ty == IceType_f64) {
+    emitUint8(0xD3);
+  } else {
+    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
+    emitUint8(0xD2);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::psrl(Type Ty, typename Traits::XmmRegister dst,
+                                     const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD1);
+  } else if (Ty == IceType_f64) {
+    emitUint8(0xD3);
+  } else {
+    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
+    emitUint8(0xD2);
+  }
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::psrl(Type Ty, typename Traits::XmmRegister dst,
+                                     const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_int8());
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0x71);
+  } else if (Ty == IceType_f64) {
+    emitUint8(0x73);
+  } else {
+    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
+    emitUint8(0x72);
+  }
+  emitRegisterOperand(2, dst);
+  emitUint8(imm.value() & 0xFF);
+}
+
+// {add,sub,mul,div}ps are given a Ty parameter for consistency with
+// {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows
+// addpd, etc., we can use the Ty parameter to decide on adding
+// a 0x66 prefix.
+template <class Machine>
+void AssemblerX86Base<Machine>::addps(Type /* Ty */,
+                                      typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::addps(Type /* Ty */,
+                                      typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::subps(Type /* Ty */,
+                                      typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::subps(Type /* Ty */,
+                                      typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::divps(Type /* Ty */,
+                                      typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::divps(Type /* Ty */,
+                                      typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::mulps(Type /* Ty */,
+                                      typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::mulps(Type /* Ty */,
+                                      typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::minps(typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5D);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::maxps(typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5F);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::andps(typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x54);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::andps(typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x54);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::orps(typename Traits::XmmRegister dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x56);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::blendvps(Type /* Ty */,
+                                         typename Traits::XmmRegister dst,
+                                         typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x14);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::blendvps(Type /* Ty */,
+                                         typename Traits::XmmRegister dst,
+                                         const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x14);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pblendvb(Type /* Ty */,
+                                         typename Traits::XmmRegister dst,
+                                         typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x10);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pblendvb(Type /* Ty */,
+                                         typename Traits::XmmRegister dst,
+                                         const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x10);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cmpps(
+    typename Traits::XmmRegister dst, typename Traits::XmmRegister src,
+    typename Traits::Cond::CmppsCond CmpCondition) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC2);
+  emitXmmRegisterOperand(dst, src);
+  emitUint8(CmpCondition);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cmpps(
+    typename Traits::XmmRegister dst, const typename Traits::Address &src,
+    typename Traits::Cond::CmppsCond CmpCondition) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC2);
+  emitOperand(dst, src);
+  emitUint8(CmpCondition);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sqrtps(typename Traits::XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::rsqrtps(typename Traits::XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x52);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::reciprocalps(typename Traits::XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x53);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movhlps(typename Traits::XmmRegister dst,
+                                        typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x12);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movlhps(typename Traits::XmmRegister dst,
+                                        typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x16);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::unpcklps(typename Traits::XmmRegister dst,
+                                         typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x14);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::unpckhps(typename Traits::XmmRegister dst,
+                                         typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x15);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::unpcklpd(typename Traits::XmmRegister dst,
+                                         typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x14);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::unpckhpd(typename Traits::XmmRegister dst,
+                                         typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x15);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::set1ps(typename Traits::XmmRegister dst,
+                                       typename Traits::GPRRegister tmp1,
+                                       const Immediate &imm) {
+  // Load 32-bit immediate value into tmp1.
+  mov(IceType_i32, tmp1, imm);
+  // Move value from tmp1 into dst.
+  movd(dst, tmp1);
+  // Broadcast low lane into other three lanes.
+  shufps(dst, dst, Immediate(0x0));
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shufps(typename Traits::XmmRegister dst,
+                                       typename Traits::XmmRegister src,
+                                       const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC6);
+  emitXmmRegisterOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pshufd(Type /* Ty */,
+                                       typename Traits::XmmRegister dst,
+                                       typename Traits::XmmRegister src,
+                                       const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x70);
+  emitXmmRegisterOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pshufd(Type /* Ty */,
+                                       typename Traits::XmmRegister dst,
+                                       const typename Traits::Address &src,
+                                       const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x70);
+  emitOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shufps(Type /* Ty */,
+                                       typename Traits::XmmRegister dst,
+                                       typename Traits::XmmRegister src,
+                                       const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC6);
+  emitXmmRegisterOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shufps(Type /* Ty */,
+                                       typename Traits::XmmRegister dst,
+                                       const typename Traits::Address &src,
+                                       const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC6);
+  emitOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::minpd(typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x5D);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::maxpd(typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x5F);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sqrtpd(typename Traits::XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shufpd(typename Traits::XmmRegister dst,
+                                       typename Traits::XmmRegister src,
+                                       const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xC6);
+  emitXmmRegisterOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cvtdq2ps(Type /* Ignore */,
+                                         typename Traits::XmmRegister dst,
+                                         typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cvtdq2ps(Type /* Ignore */,
+                                         typename Traits::XmmRegister dst,
+                                         const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cvttps2dq(Type /* Ignore */,
+                                          typename Traits::XmmRegister dst,
+                                          typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cvttps2dq(Type /* Ignore */,
+                                          typename Traits::XmmRegister dst,
+                                          const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cvtsi2ss(Type DestTy,
+                                         typename Traits::XmmRegister dst,
+                                         typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2A);
+  emitRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cvtsi2ss(Type DestTy,
+                                         typename Traits::XmmRegister dst,
+                                         const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2A);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cvtfloat2float(
+    Type SrcTy, typename Traits::XmmRegister dst,
+    typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // ss2sd or sd2ss
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5A);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cvtfloat2float(
+    Type SrcTy, typename Traits::XmmRegister dst,
+    const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5A);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cvttss2si(Type SrcTy,
+                                          typename Traits::GPRRegister dst,
+                                          typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2C);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cvttss2si(Type SrcTy,
+                                          typename Traits::GPRRegister dst,
+                                          const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2C);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::ucomiss(Type Ty, typename Traits::XmmRegister a,
+                                        typename Traits::XmmRegister b) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_f64)
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x2E);
+  emitXmmRegisterOperand(a, b);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::ucomiss(Type Ty, typename Traits::XmmRegister a,
+                                        const typename Traits::Address &b) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_f64)
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x2E);
+  emitOperand(a, b);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movmskpd(typename Traits::GPRRegister dst,
+                                         typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x50);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::movmskps(typename Traits::GPRRegister dst,
+                                         typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x50);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sqrtss(Type Ty,
+                                       typename Traits::XmmRegister dst,
+                                       const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sqrtss(Type Ty,
+                                       typename Traits::XmmRegister dst,
+                                       typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::xorpd(typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x57);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::xorpd(typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x57);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::orpd(typename Traits::XmmRegister dst,
+                                     typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x56);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::xorps(typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x57);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::xorps(typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x57);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::andpd(typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x54);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::andpd(typename Traits::XmmRegister dst,
+                                      typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x54);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::insertps(Type Ty,
+                                         typename Traits::XmmRegister dst,
+                                         typename Traits::XmmRegister src,
+                                         const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  assert(isVectorFloatingType(Ty));
+  (void)Ty;
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x3A);
+  emitUint8(0x21);
+  emitXmmRegisterOperand(dst, src);
+  emitUint8(imm.value());
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::insertps(Type Ty,
+                                         typename Traits::XmmRegister dst,
+                                         const typename Traits::Address &src,
+                                         const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  assert(isVectorFloatingType(Ty));
+  (void)Ty;
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x3A);
+  emitUint8(0x21);
+  emitOperand(dst, src);
+  emitUint8(imm.value());
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pinsr(Type Ty, typename Traits::XmmRegister dst,
+                                      typename Traits::GPRRegister src,
+                                      const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  if (Ty == IceType_i16) {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0xC4);
+    emitXmmRegisterOperand(dst, typename Traits::XmmRegister(src));
+    emitUint8(imm.value());
+  } else {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0x3A);
+    emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
+    emitXmmRegisterOperand(dst, typename Traits::XmmRegister(src));
+    emitUint8(imm.value());
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pinsr(Type Ty, typename Traits::XmmRegister dst,
+                                      const typename Traits::Address &src,
+                                      const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  if (Ty == IceType_i16) {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0xC4);
+    emitOperand(dst, src);
+    emitUint8(imm.value());
+  } else {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0x3A);
+    emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
+    emitOperand(dst, src);
+    emitUint8(imm.value());
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pextr(Type Ty, typename Traits::GPRRegister dst,
+                                      typename Traits::XmmRegister src,
+                                      const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  if (Ty == IceType_i16) {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0xC5);
+    emitXmmRegisterOperand(typename Traits::XmmRegister(dst), src);
+    emitUint8(imm.value());
+  } else {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0x3A);
+    emitUint8(isByteSizedType(Ty) ? 0x14 : 0x16);
+    // SSE 4.1 versions are "MRI" because dst can be mem, while
+    // pextrw (SSE2) is RMI because dst must be reg.
+    emitXmmRegisterOperand(src, typename Traits::XmmRegister(dst));
+    emitUint8(imm.value());
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pmovsxdq(typename Traits::XmmRegister dst,
+                                         typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x25);
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pcmpeq(Type Ty,
+                                       typename Traits::XmmRegister dst,
+                                       typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x74);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x75);
+  } else {
+    emitUint8(0x76);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pcmpeq(Type Ty,
+                                       typename Traits::XmmRegister dst,
+                                       const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x74);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x75);
+  } else {
+    emitUint8(0x76);
+  }
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pcmpgt(Type Ty,
+                                       typename Traits::XmmRegister dst,
+                                       typename Traits::XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x64);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x65);
+  } else {
+    emitUint8(0x66);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::pcmpgt(Type Ty,
+                                       typename Traits::XmmRegister dst,
+                                       const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x64);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x65);
+  } else {
+    emitUint8(0x66);
+  }
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::roundsd(typename Traits::XmmRegister dst,
+                                        typename Traits::XmmRegister src,
+                                        RoundingMode mode) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x3A);
+  emitUint8(0x0B);
+  emitXmmRegisterOperand(dst, src);
+  // Mask precision exeption.
+  emitUint8(static_cast<uint8_t>(mode) | 0x8);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::fnstcw(const typename Traits::Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xD9);
+  emitOperand(7, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::fldcw(const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xD9);
+  emitOperand(5, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::fistpl(const typename Traits::Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDF);
+  emitOperand(7, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::fistps(const typename Traits::Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDB);
+  emitOperand(3, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::fildl(const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDF);
+  emitOperand(5, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::filds(const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDB);
+  emitOperand(0, src);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::fincstp() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xD9);
+  emitUint8(0xF7);
+}
+
+template <class Machine>
+template <uint32_t Tag>
+void AssemblerX86Base<Machine>::arith_int(Type Ty,
+                                          typename Traits::GPRRegister reg,
+                                          const Immediate &imm) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (isByteSizedType(Ty)) {
+    emitComplexI8(Tag, typename Traits::Operand(reg), imm);
+    return;
+  }
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitComplex(Ty, Tag, typename Traits::Operand(reg), imm);
+}
+
+template <class Machine>
+template <uint32_t Tag>
+void AssemblerX86Base<Machine>::arith_int(Type Ty,
+                                          typename Traits::GPRRegister reg0,
+                                          typename Traits::GPRRegister reg1) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(Tag * 8 + 2);
+  else
+    emitUint8(Tag * 8 + 3);
+  emitRegisterOperand(reg0, reg1);
+}
+
+template <class Machine>
+template <uint32_t Tag>
+void AssemblerX86Base<Machine>::arith_int(
+    Type Ty, typename Traits::GPRRegister reg,
+    const typename Traits::Address &address) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(Tag * 8 + 2);
+  else
+    emitUint8(Tag * 8 + 3);
+  emitOperand(reg, address);
+}
+
+template <class Machine>
+template <uint32_t Tag>
+void AssemblerX86Base<Machine>::arith_int(
+    Type Ty, const typename Traits::Address &address,
+    typename Traits::GPRRegister reg) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(Tag * 8 + 0);
+  else
+    emitUint8(Tag * 8 + 1);
+  emitOperand(reg, address);
+}
+
+template <class Machine>
+template <uint32_t Tag>
+void AssemblerX86Base<Machine>::arith_int(
+    Type Ty, const typename Traits::Address &address, const Immediate &imm) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (isByteSizedType(Ty)) {
+    emitComplexI8(Tag, address, imm);
+    return;
+  }
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitComplex(Ty, Tag, address, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cmp(Type Ty, typename Traits::GPRRegister reg,
+                                    const Immediate &imm) {
+  arith_int<7>(Ty, reg, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cmp(Type Ty, typename Traits::GPRRegister reg0,
+                                    typename Traits::GPRRegister reg1) {
+  arith_int<7>(Ty, reg0, reg1);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cmp(Type Ty, typename Traits::GPRRegister reg,
+                                    const typename Traits::Address &address) {
+  arith_int<7>(Ty, reg, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cmp(Type Ty,
+                                    const typename Traits::Address &address,
+                                    typename Traits::GPRRegister reg) {
+  arith_int<7>(Ty, address, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cmp(Type Ty,
+                                    const typename Traits::Address &address,
+                                    const Immediate &imm) {
+  arith_int<7>(Ty, address, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::test(Type Ty, typename Traits::GPRRegister reg1,
+                                     typename Traits::GPRRegister reg2) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(0x84);
+  else
+    emitUint8(0x85);
+  emitRegisterOperand(reg1, reg2);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::test(Type Ty,
+                                     const typename Traits::Address &addr,
+                                     typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(0x84);
+  else
+    emitUint8(0x85);
+  emitOperand(reg, addr);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::test(Type Ty, typename Traits::GPRRegister reg,
+                                     const Immediate &immediate) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // For registers that have a byte variant (EAX, EBX, ECX, and EDX)
+  // we only test the byte register to keep the encoding short.
+  // This is legal even if the register had high bits set since
+  // this only sets flags registers based on the "AND" of the two operands,
+  // and the immediate had zeros at those high bits.
+  if (immediate.is_uint8() && reg < 4) {
+    // Use zero-extended 8-bit immediate.
+    if (reg == Traits::Encoded_Reg_Accumulator) {
+      emitUint8(0xA8);
+    } else {
+      emitUint8(0xF6);
+      emitUint8(0xC0 + reg);
+    }
+    emitUint8(immediate.value() & 0xFF);
+  } else if (reg == Traits::Encoded_Reg_Accumulator) {
+    // Use short form if the destination is EAX.
+    if (Ty == IceType_i16)
+      emitOperandSizeOverride();
+    emitUint8(0xA9);
+    emitImmediate(Ty, immediate);
+  } else {
+    if (Ty == IceType_i16)
+      emitOperandSizeOverride();
+    emitUint8(0xF7);
+    emitRegisterOperand(0, reg);
+    emitImmediate(Ty, immediate);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::test(Type Ty,
+                                     const typename Traits::Address &addr,
+                                     const Immediate &immediate) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // If the immediate is short, we only test the byte addr to keep the
+  // encoding short.
+  if (immediate.is_uint8()) {
+    // Use zero-extended 8-bit immediate.
+    emitUint8(0xF6);
+    emitOperand(0, addr);
+    emitUint8(immediate.value() & 0xFF);
+  } else {
+    if (Ty == IceType_i16)
+      emitOperandSizeOverride();
+    emitUint8(0xF7);
+    emitOperand(0, addr);
+    emitImmediate(Ty, immediate);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::And(Type Ty, typename Traits::GPRRegister dst,
+                                    typename Traits::GPRRegister src) {
+  arith_int<4>(Ty, dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::And(Type Ty, typename Traits::GPRRegister dst,
+                                    const typename Traits::Address &address) {
+  arith_int<4>(Ty, dst, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::And(Type Ty, typename Traits::GPRRegister dst,
+                                    const Immediate &imm) {
+  arith_int<4>(Ty, dst, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::And(Type Ty,
+                                    const typename Traits::Address &address,
+                                    typename Traits::GPRRegister reg) {
+  arith_int<4>(Ty, address, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::And(Type Ty,
+                                    const typename Traits::Address &address,
+                                    const Immediate &imm) {
+  arith_int<4>(Ty, address, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::Or(Type Ty, typename Traits::GPRRegister dst,
+                                   typename Traits::GPRRegister src) {
+  arith_int<1>(Ty, dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::Or(Type Ty, typename Traits::GPRRegister dst,
+                                   const typename Traits::Address &address) {
+  arith_int<1>(Ty, dst, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::Or(Type Ty, typename Traits::GPRRegister dst,
+                                   const Immediate &imm) {
+  arith_int<1>(Ty, dst, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::Or(Type Ty,
+                                   const typename Traits::Address &address,
+                                   typename Traits::GPRRegister reg) {
+  arith_int<1>(Ty, address, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::Or(Type Ty,
+                                   const typename Traits::Address &address,
+                                   const Immediate &imm) {
+  arith_int<1>(Ty, address, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::Xor(Type Ty, typename Traits::GPRRegister dst,
+                                    typename Traits::GPRRegister src) {
+  arith_int<6>(Ty, dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::Xor(Type Ty, typename Traits::GPRRegister dst,
+                                    const typename Traits::Address &address) {
+  arith_int<6>(Ty, dst, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::Xor(Type Ty, typename Traits::GPRRegister dst,
+                                    const Immediate &imm) {
+  arith_int<6>(Ty, dst, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::Xor(Type Ty,
+                                    const typename Traits::Address &address,
+                                    typename Traits::GPRRegister reg) {
+  arith_int<6>(Ty, address, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::Xor(Type Ty,
+                                    const typename Traits::Address &address,
+                                    const Immediate &imm) {
+  arith_int<6>(Ty, address, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::add(Type Ty, typename Traits::GPRRegister dst,
+                                    typename Traits::GPRRegister src) {
+  arith_int<0>(Ty, dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::add(Type Ty, typename Traits::GPRRegister reg,
+                                    const typename Traits::Address &address) {
+  arith_int<0>(Ty, reg, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::add(Type Ty, typename Traits::GPRRegister reg,
+                                    const Immediate &imm) {
+  arith_int<0>(Ty, reg, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::add(Type Ty,
+                                    const typename Traits::Address &address,
+                                    typename Traits::GPRRegister reg) {
+  arith_int<0>(Ty, address, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::add(Type Ty,
+                                    const typename Traits::Address &address,
+                                    const Immediate &imm) {
+  arith_int<0>(Ty, address, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::adc(Type Ty, typename Traits::GPRRegister dst,
+                                    typename Traits::GPRRegister src) {
+  arith_int<2>(Ty, dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::adc(Type Ty, typename Traits::GPRRegister dst,
+                                    const typename Traits::Address &address) {
+  arith_int<2>(Ty, dst, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::adc(Type Ty, typename Traits::GPRRegister reg,
+                                    const Immediate &imm) {
+  arith_int<2>(Ty, reg, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::adc(Type Ty,
+                                    const typename Traits::Address &address,
+                                    typename Traits::GPRRegister reg) {
+  arith_int<2>(Ty, address, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::adc(Type Ty,
+                                    const typename Traits::Address &address,
+                                    const Immediate &imm) {
+  arith_int<2>(Ty, address, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sub(Type Ty, typename Traits::GPRRegister dst,
+                                    typename Traits::GPRRegister src) {
+  arith_int<5>(Ty, dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sub(Type Ty, typename Traits::GPRRegister reg,
+                                    const typename Traits::Address &address) {
+  arith_int<5>(Ty, reg, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sub(Type Ty, typename Traits::GPRRegister reg,
+                                    const Immediate &imm) {
+  arith_int<5>(Ty, reg, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sub(Type Ty,
+                                    const typename Traits::Address &address,
+                                    typename Traits::GPRRegister reg) {
+  arith_int<5>(Ty, address, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sub(Type Ty,
+                                    const typename Traits::Address &address,
+                                    const Immediate &imm) {
+  arith_int<5>(Ty, address, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sbb(Type Ty, typename Traits::GPRRegister dst,
+                                    typename Traits::GPRRegister src) {
+  arith_int<3>(Ty, dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sbb(Type Ty, typename Traits::GPRRegister dst,
+                                    const typename Traits::Address &address) {
+  arith_int<3>(Ty, dst, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sbb(Type Ty, typename Traits::GPRRegister reg,
+                                    const Immediate &imm) {
+  arith_int<3>(Ty, reg, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sbb(Type Ty,
+                                    const typename Traits::Address &address,
+                                    typename Traits::GPRRegister reg) {
+  arith_int<3>(Ty, address, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sbb(Type Ty,
+                                    const typename Traits::Address &address,
+                                    const Immediate &imm) {
+  arith_int<3>(Ty, address, imm);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::cbw() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitOperandSizeOverride();
+  emitUint8(0x98);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::cwd() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitOperandSizeOverride();
+  emitUint8(0x99);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::cdq() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x99);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::div(Type Ty, typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(6, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::div(Type Ty,
+                                    const typename Traits::Address &addr) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(6, addr);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::idiv(Type Ty,
+                                     typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(7, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::idiv(Type Ty,
+                                     const typename Traits::Address &addr) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(7, addr);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister dst,
+                                     typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAF);
+  emitRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister reg,
+                                     const typename Traits::Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAF);
+  emitOperand(reg, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister reg,
+                                     const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (imm.is_int8()) {
+    emitUint8(0x6B);
+    emitRegisterOperand(reg, reg);
+    emitUint8(imm.value() & 0xFF);
+  } else {
+    emitUint8(0x69);
+    emitRegisterOperand(reg, reg);
+    emitImmediate(Ty, imm);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::imul(Type Ty,
+                                     typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(5, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::imul(Type Ty,
+                                     const typename Traits::Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(5, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::mul(Type Ty, typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(4, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::mul(Type Ty,
+                                    const typename Traits::Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(4, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::incl(typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x40 + reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::incl(const typename Traits::Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitOperand(0, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::decl(typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x48 + reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::decl(const typename Traits::Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitOperand(1, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::rol(Type Ty, typename Traits::GPRRegister reg,
+                                    const Immediate &imm) {
+  emitGenericShift(0, Ty, reg, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::rol(Type Ty,
+                                    typename Traits::GPRRegister operand,
+                                    typename Traits::GPRRegister shifter) {
+  emitGenericShift(0, Ty, typename Traits::Operand(operand), shifter);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::rol(Type Ty,
+                                    const typename Traits::Address &operand,
+                                    typename Traits::GPRRegister shifter) {
+  emitGenericShift(0, Ty, operand, shifter);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shl(Type Ty, typename Traits::GPRRegister reg,
+                                    const Immediate &imm) {
+  emitGenericShift(4, Ty, reg, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shl(Type Ty,
+                                    typename Traits::GPRRegister operand,
+                                    typename Traits::GPRRegister shifter) {
+  emitGenericShift(4, Ty, typename Traits::Operand(operand), shifter);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shl(Type Ty,
+                                    const typename Traits::Address &operand,
+                                    typename Traits::GPRRegister shifter) {
+  emitGenericShift(4, Ty, operand, shifter);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shr(Type Ty, typename Traits::GPRRegister reg,
+                                    const Immediate &imm) {
+  emitGenericShift(5, Ty, reg, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shr(Type Ty,
+                                    typename Traits::GPRRegister operand,
+                                    typename Traits::GPRRegister shifter) {
+  emitGenericShift(5, Ty, typename Traits::Operand(operand), shifter);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shr(Type Ty,
+                                    const typename Traits::Address &operand,
+                                    typename Traits::GPRRegister shifter) {
+  emitGenericShift(5, Ty, operand, shifter);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sar(Type Ty, typename Traits::GPRRegister reg,
+                                    const Immediate &imm) {
+  emitGenericShift(7, Ty, reg, imm);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sar(Type Ty,
+                                    typename Traits::GPRRegister operand,
+                                    typename Traits::GPRRegister shifter) {
+  emitGenericShift(7, Ty, typename Traits::Operand(operand), shifter);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::sar(Type Ty,
+                                    const typename Traits::Address &address,
+                                    typename Traits::GPRRegister shifter) {
+  emitGenericShift(7, Ty, address, shifter);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shld(Type Ty, typename Traits::GPRRegister dst,
+                                     typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xA5);
+  emitRegisterOperand(src, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shld(Type Ty, typename Traits::GPRRegister dst,
+                                     typename Traits::GPRRegister src,
+                                     const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  assert(imm.is_int8());
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xA4);
+  emitRegisterOperand(src, dst);
+  emitUint8(imm.value() & 0xFF);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shld(Type Ty,
+                                     const typename Traits::Address &operand,
+                                     typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xA5);
+  emitOperand(src, operand);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shrd(Type Ty, typename Traits::GPRRegister dst,
+                                     typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAD);
+  emitRegisterOperand(src, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shrd(Type Ty, typename Traits::GPRRegister dst,
+                                     typename Traits::GPRRegister src,
+                                     const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  assert(imm.is_int8());
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAC);
+  emitRegisterOperand(src, dst);
+  emitUint8(imm.value() & 0xFF);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::shrd(Type Ty,
+                                     const typename Traits::Address &dst,
+                                     typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAD);
+  emitOperand(src, dst);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::neg(Type Ty, typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(3, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::neg(Type Ty,
+                                    const typename Traits::Address &addr) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(3, addr);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::notl(typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF7);
+  emitUint8(0xD0 | reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::bswap(Type Ty,
+                                      typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i32);
+  (void)Ty;
+  emitUint8(0x0F);
+  emitUint8(0xC8 | reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::bsf(Type Ty, typename Traits::GPRRegister dst,
+                                    typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBC);
+  emitRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::bsf(Type Ty, typename Traits::GPRRegister dst,
+                                    const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBC);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::bsr(Type Ty, typename Traits::GPRRegister dst,
+                                    typename Traits::GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBD);
+  emitRegisterOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::bsr(Type Ty, typename Traits::GPRRegister dst,
+                                    const typename Traits::Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBD);
+  emitOperand(dst, src);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::bt(typename Traits::GPRRegister base,
+                                   typename Traits::GPRRegister offset) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xA3);
+  emitRegisterOperand(offset, base);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::ret() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xC3);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::ret(const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xC2);
+  assert(imm.is_uint16());
+  emitUint8(imm.value() & 0xFF);
+  emitUint8((imm.value() >> 8) & 0xFF);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::nop(int size) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // There are nops up to size 15, but for now just provide up to size 8.
+  assert(0 < size && size <= MAX_NOP_SIZE);
+  switch (size) {
+  case 1:
+    emitUint8(0x90);
+    break;
+  case 2:
+    emitUint8(0x66);
+    emitUint8(0x90);
+    break;
+  case 3:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x00);
+    break;
+  case 4:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x40);
+    emitUint8(0x00);
+    break;
+  case 5:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x44);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  case 6:
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x44);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  case 7:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x80);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  case 8:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x84);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  default:
+    llvm_unreachable("Unimplemented");
+  }
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::int3() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xCC);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::hlt() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF4);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::ud2() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x0B);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::j(typename Traits::Cond::BrCond condition,
+                                  Label *label, bool near) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    static const int kLongSize = 6;
+    intptr_t offset = label->Position() - Buffer.size();
+    assert(offset <= 0);
+    if (Utils::IsInt(8, offset - kShortSize)) {
+      // TODO(stichnot): Here and in jmp(), we may need to be more
+      // conservative about the backward branch distance if the branch
+      // instruction is within a bundle_lock sequence, because the
+      // distance may increase when padding is added.  This isn't an
+      // issue for branches outside a bundle_lock, because if padding
+      // is added, the retry may change it to a long backward branch
+      // without affecting any of the bookkeeping.
+      emitUint8(0x70 + condition);
+      emitUint8((offset - kShortSize) & 0xFF);
+    } else {
+      emitUint8(0x0F);
+      emitUint8(0x80 + condition);
+      emitInt32(offset - kLongSize);
+    }
+  } else if (near) {
+    emitUint8(0x70 + condition);
+    emitNearLabelLink(label);
+  } else {
+    emitUint8(0x0F);
+    emitUint8(0x80 + condition);
+    emitLabelLink(label);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::j(typename Traits::Cond::BrCond condition,
+                                  const ConstantRelocatable *label) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x80 + condition);
+  emitFixup(this->createFixup(Traits::PcRelFixup, label));
+  emitInt32(-4);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::jmp(typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitRegisterOperand(4, reg);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::jmp(Label *label, bool near) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    static const int kLongSize = 5;
+    intptr_t offset = label->Position() - Buffer.size();
+    assert(offset <= 0);
+    if (Utils::IsInt(8, offset - kShortSize)) {
+      emitUint8(0xEB);
+      emitUint8((offset - kShortSize) & 0xFF);
+    } else {
+      emitUint8(0xE9);
+      emitInt32(offset - kLongSize);
+    }
+  } else if (near) {
+    emitUint8(0xEB);
+    emitNearLabelLink(label);
+  } else {
+    emitUint8(0xE9);
+    emitLabelLink(label);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::jmp(const ConstantRelocatable *label) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xE9);
+  emitFixup(this->createFixup(Traits::PcRelFixup, label));
+  emitInt32(-4);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::mfence() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xAE);
+  emitUint8(0xF0);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::lock() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF0);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cmpxchg(Type Ty,
+                                        const typename Traits::Address &address,
+                                        typename Traits::GPRRegister reg,
+                                        bool Locked) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (Locked)
+    emitUint8(0xF0);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xB0);
+  else
+    emitUint8(0xB1);
+  emitOperand(reg, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::cmpxchg8b(
+    const typename Traits::Address &address, bool Locked) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Locked)
+    emitUint8(0xF0);
+  emitUint8(0x0F);
+  emitUint8(0xC7);
+  emitOperand(1, address);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::xadd(Type Ty,
+                                     const typename Traits::Address &addr,
+                                     typename Traits::GPRRegister reg,
+                                     bool Locked) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (Locked)
+    emitUint8(0xF0);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xC0);
+  else
+    emitUint8(0xC1);
+  emitOperand(reg, addr);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::xchg(Type Ty,
+                                     const typename Traits::Address &addr,
+                                     typename Traits::GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0x86);
+  else
+    emitUint8(0x87);
+  emitOperand(reg, addr);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::emitSegmentOverride(uint8_t prefix) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(prefix);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::align(intptr_t alignment, intptr_t offset) {
+  assert(llvm::isPowerOf2_32(alignment));
+  intptr_t pos = offset + Buffer.getPosition();
+  intptr_t mod = pos & (alignment - 1);
+  if (mod == 0) {
+    return;
+  }
+  intptr_t bytes_needed = alignment - mod;
+  while (bytes_needed > MAX_NOP_SIZE) {
+    nop(MAX_NOP_SIZE);
+    bytes_needed -= MAX_NOP_SIZE;
+  }
+  if (bytes_needed) {
+    nop(bytes_needed);
+  }
+  assert(((offset + Buffer.getPosition()) & (alignment - 1)) == 0);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::bind(Label *label) {
+  intptr_t bound = Buffer.size();
+  assert(!label->IsBound()); // Labels can only be bound once.
+  while (label->IsLinked()) {
+    intptr_t position = label->LinkPosition();
+    intptr_t next = Buffer.load<int32_t>(position);
+    Buffer.store<int32_t>(position, bound - (position + 4));
+    label->position_ = next;
+  }
+  while (label->HasNear()) {
+    intptr_t position = label->NearPosition();
+    intptr_t offset = bound - (position + 1);
+    assert(Utils::IsInt(8, offset));
+    Buffer.store<int8_t>(position, offset);
+  }
+  label->BindTo(bound);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::emitOperand(
+    int rm, const typename Traits::Operand &operand) {
+  assert(rm >= 0 && rm < 8);
+  const intptr_t length = operand.length_;
+  assert(length > 0);
+  // Emit the ModRM byte updated with the given RM value.
+  assert((operand.encoding_[0] & 0x38) == 0);
+  emitUint8(operand.encoding_[0] + (rm << 3));
+  if (operand.fixup()) {
+    emitFixup(operand.fixup());
+  }
+  // Emit the rest of the encoded operand.
+  for (intptr_t i = 1; i < length; i++) {
+    emitUint8(operand.encoding_[i]);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::emitImmediate(Type Ty, const Immediate &imm) {
+  if (Ty == IceType_i16) {
+    assert(!imm.fixup());
+    emitInt16(imm.value());
+  } else {
+    if (imm.fixup()) {
+      emitFixup(imm.fixup());
+    }
+    emitInt32(imm.value());
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::emitComplexI8(
+    int rm, const typename Traits::Operand &operand,
+    const Immediate &immediate) {
+  assert(rm >= 0 && rm < 8);
+  assert(immediate.is_int8());
+  if (operand.IsRegister(Traits::Encoded_Reg_Accumulator)) {
+    // Use short form if the destination is al.
+    emitUint8(0x04 + (rm << 3));
+    emitUint8(immediate.value() & 0xFF);
+  } else {
+    // Use sign-extended 8-bit immediate.
+    emitUint8(0x80);
+    emitOperand(rm, operand);
+    emitUint8(immediate.value() & 0xFF);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::emitComplex(
+    Type Ty, int rm, const typename Traits::Operand &operand,
+    const Immediate &immediate) {
+  assert(rm >= 0 && rm < 8);
+  if (immediate.is_int8()) {
+    // Use sign-extended 8-bit immediate.
+    emitUint8(0x83);
+    emitOperand(rm, operand);
+    emitUint8(immediate.value() & 0xFF);
+  } else if (operand.IsRegister(Traits::Encoded_Reg_Accumulator)) {
+    // Use short form if the destination is eax.
+    emitUint8(0x05 + (rm << 3));
+    emitImmediate(Ty, immediate);
+  } else {
+    emitUint8(0x81);
+    emitOperand(rm, operand);
+    emitImmediate(Ty, immediate);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::emitLabel(Label *label,
+                                          intptr_t instruction_size) {
+  if (label->IsBound()) {
+    intptr_t offset = label->Position() - Buffer.size();
+    assert(offset <= 0);
+    emitInt32(offset - instruction_size);
+  } else {
+    emitLabelLink(label);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::emitLabelLink(Label *Label) {
+  assert(!Label->IsBound());
+  intptr_t Position = Buffer.size();
+  emitInt32(Label->position_);
+  if (!getPreliminary())
+    Label->LinkTo(Position);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::emitNearLabelLink(Label *label) {
+  assert(!label->IsBound());
+  intptr_t position = Buffer.size();
+  emitUint8(0);
+  if (!getPreliminary())
+    label->NearLinkTo(position);
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::emitGenericShift(
+    int rm, Type Ty, typename Traits::GPRRegister reg, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_int8());
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (imm.value() == 1) {
+    emitUint8(isByteSizedArithType(Ty) ? 0xD0 : 0xD1);
+    emitOperand(rm, typename Traits::Operand(reg));
+  } else {
+    emitUint8(isByteSizedArithType(Ty) ? 0xC0 : 0xC1);
+    emitOperand(rm, typename Traits::Operand(reg));
+    emitUint8(imm.value() & 0xFF);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::emitGenericShift(
+    int rm, Type Ty, const typename Traits::Operand &operand,
+    typename Traits::GPRRegister shifter) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(shifter == Traits::Encoded_Reg_Counter);
+  (void)shifter;
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(isByteSizedArithType(Ty) ? 0xD2 : 0xD3);
+  emitOperand(rm, operand);
+}
+
+} // end of namespace X86Internal
+} // end of namespace Ice
diff --git a/src/IceConditionCodesX8664.h b/src/IceConditionCodesX8664.h
index 59a1493..ec30f88 100644
--- a/src/IceConditionCodesX8664.h
+++ b/src/IceConditionCodesX8664.h
@@ -19,28 +19,27 @@
 
 namespace Ice {
 
-namespace CondX8664 {
-// An enum of condition codes used for branches and cmov. The enum value
-// should match the value used to encode operands in binary instructions.
-enum BrCond {
+class CondX8664 {
+  // An enum of condition codes used for branches and cmov. The enum value
+  // should match the value used to encode operands in binary instructions.
+  enum BrCond {
 #define X(tag, encode, opp, dump, emit) tag encode,
-  ICEINSTX8664BR_TABLE
+    ICEINSTX8664BR_TABLE
 #undef X
-      Br_None
-};
+        Br_None
+  };
 
-// An enum of condition codes relevant to the CMPPS instruction. The enum
-// value should match the value used to encode operands in binary
-// instructions.
-enum CmppsCond {
+  // An enum of condition codes relevant to the CMPPS instruction. The enum
+  // value should match the value used to encode operands in binary
+  // instructions.
+  enum CmppsCond {
 #define X(tag, emit) tag,
-  ICEINSTX8664CMPPS_TABLE
+    ICEINSTX8664CMPPS_TABLE
 #undef X
-      Cmpps_Invalid
+        Cmpps_Invalid
+  };
 };
 
-} // end of namespace CondX8664
-
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICECONDITIONCODESX8664_H
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 96d801a..2866430 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -28,12 +28,12 @@
 namespace {
 
 const struct InstX8632BrAttributes_ {
-  CondX86::BrCond Opposite;
+  X8632::Traits::Cond::BrCond Opposite;
   const char *DisplayString;
   const char *EmitString;
 } InstX8632BrAttributes[] = {
 #define X(tag, encode, opp, dump, emit)                                        \
-  { CondX86::opp, dump, emit }                                                 \
+  { X8632::Traits::Cond::opp, dump, emit }                                     \
   ,
     ICEINSTX8632BR_TABLE
 #undef X
@@ -85,7 +85,8 @@
   return TypeX8632Attributes[Ty].FldString;
 }
 
-CondX86::BrCond InstX8632::getOppositeCondition(CondX86::BrCond Cond) {
+X8632::Traits::Cond::BrCond
+InstX8632::getOppositeCondition(X8632::Traits::Cond::BrCond Cond) {
   return InstX8632BrAttributes[Cond].Opposite;
 }
 
@@ -159,7 +160,8 @@
 
 InstX8632Br::InstX8632Br(Cfg *Func, const CfgNode *TargetTrue,
                          const CfgNode *TargetFalse,
-                         const InstX8632Label *Label, CondX86::BrCond Condition)
+                         const InstX8632Label *Label,
+                         X8632::Traits::Cond::BrCond Condition)
     : InstX8632(Func, InstX8632::Br, 0, nullptr), Condition(Condition),
       TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {}
 
@@ -178,7 +180,8 @@
     return false;
 
   // Unconditional branch to the next node can be removed.
-  if (Condition == CondX86::Br_None && getTargetFalse() == NextNode) {
+  if (Condition == X8632::Traits::Cond::Br_None &&
+      getTargetFalse() == NextNode) {
     assert(getTargetTrue() == nullptr);
     setDeleted();
     return true;
@@ -193,7 +196,7 @@
   // (which was already tested above), then invert the branch
   // condition, swap the targets, and set new fallthrough to nullptr.
   if (getTargetTrue() == NextNode) {
-    assert(Condition != CondX86::Br_None);
+    assert(Condition != X8632::Traits::Cond::Br_None);
     Condition = getOppositeCondition(Condition);
     TargetTrue = getTargetFalse();
     TargetFalse = nullptr;
@@ -225,7 +228,7 @@
 }
 
 InstX8632Cmov::InstX8632Cmov(Cfg *Func, Variable *Dest, Operand *Source,
-                             CondX86::BrCond Condition)
+                             X8632::Traits::Cond::BrCond Condition)
     : InstX8632(Func, InstX8632::Cmov, 2, Dest), Condition(Condition) {
   // The final result is either the original Dest, or Source, so mark
   // both as sources.
@@ -234,7 +237,7 @@
 }
 
 InstX8632Cmpps::InstX8632Cmpps(Cfg *Func, Variable *Dest, Operand *Source,
-                               CondX86::CmppsCond Condition)
+                               X8632::Traits::Cond::CmppsCond Condition)
     : InstX8632(Func, InstX8632::Cmpps, 2, Dest), Condition(Condition) {
   addSource(Dest);
   addSource(Source);
@@ -352,7 +355,8 @@
     addSource(Source);
 }
 
-InstX8632Setcc::InstX8632Setcc(Cfg *Func, Variable *Dest, CondX86::BrCond Cond)
+InstX8632Setcc::InstX8632Setcc(Cfg *Func, Variable *Dest,
+                               X8632::Traits::Cond::BrCond Cond)
     : InstX8632(Func, InstX8632::Setcc, 0, Dest), Condition(Cond) {}
 
 InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source,
@@ -417,7 +421,7 @@
   Ostream &Str = Func->getContext()->getStrEmit();
   Str << "\t";
 
-  if (Condition == CondX86::Br_None) {
+  if (Condition == X8632::Traits::Cond::Br_None) {
     Str << "jmp";
   } else {
     Str << InstX8632BrAttributes[Condition].EmitString;
@@ -426,7 +430,7 @@
   if (Label) {
     Str << "\t" << Label->getName(Func);
   } else {
-    if (Condition == CondX86::Br_None) {
+    if (Condition == X8632::Traits::Cond::Br_None) {
       Str << "\t" << getTargetFalse()->getAsmName();
     } else {
       Str << "\t" << getTargetTrue()->getAsmName();
@@ -443,7 +447,7 @@
     X8632::Label *L = Asm->GetOrCreateLocalLabel(Label->getNumber());
     // In all these cases, local Labels should only be used for Near.
     const bool Near = true;
-    if (Condition == CondX86::Br_None) {
+    if (Condition == X8632::Traits::Cond::Br_None) {
       Asm->jmp(L, Near);
     } else {
       Asm->j(Condition, L, Near);
@@ -452,7 +456,7 @@
     // Pessimistically assume it's far. This only affects Labels that
     // are not Bound.
     const bool Near = false;
-    if (Condition == CondX86::Br_None) {
+    if (Condition == X8632::Traits::Cond::Br_None) {
       X8632::Label *L =
           Asm->GetOrCreateCfgNodeLabel(getTargetFalse()->getIndex());
       assert(!getTargetTrue());
@@ -476,7 +480,7 @@
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "br ";
 
-  if (Condition == CondX86::Br_None) {
+  if (Condition == X8632::Traits::Cond::Br_None) {
     Str << "label %"
         << (Label ? Label->getName(Func) : getTargetFalse()->getName());
     return;
@@ -636,8 +640,9 @@
           RegX8632::getEncodedByteRegOrGPR(Ty, Var->getRegNum());
       (Asm->*(Emitter.Reg))(Ty, VarReg);
     } else {
-      X8632::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                   ->stackVarToAsmOperand(Var));
+      X8632::Traits::Address StackAddr(
+          static_cast<TargetX8632 *>(Func->getTarget())
+              ->stackVarToAsmOperand(Var));
       (Asm->*(Emitter.Addr))(Ty, StackAddr);
     }
   } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Op)) {
@@ -666,7 +671,7 @@
               : RegX8632::getEncodedGPR(SrcVar->getRegNum());
       (Asm->*(Emitter.GPRGPR))(Ty, VarReg, SrcReg);
     } else {
-      X8632::Address SrcStackAddr =
+      X8632::Traits::Address SrcStackAddr =
           static_cast<TargetX8632 *>(Func->getTarget())
               ->stackVarToAsmOperand(SrcVar);
       (Asm->*(Emitter.GPRAddr))(Ty, VarReg, SrcStackAddr);
@@ -688,7 +693,8 @@
 }
 
 void emitIASAddrOpTyGPR(
-    const Cfg *Func, Type Ty, const X8632::Address &Addr, const Operand *Src,
+    const Cfg *Func, Type Ty, const X8632::Traits::Address &Addr,
+    const Operand *Src,
     const X8632::AssemblerX8632::GPREmitterAddrOp &Emitter) {
   X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
   // Src can only be Reg or Immediate.
@@ -713,8 +719,9 @@
     const X8632::AssemblerX8632::GPREmitterAddrOp &Emitter) {
   if (const auto Op0Var = llvm::dyn_cast<Variable>(Op0)) {
     assert(!Op0Var->hasReg());
-    X8632::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                 ->stackVarToAsmOperand(Op0Var));
+    X8632::Traits::Address StackAddr(
+        static_cast<TargetX8632 *>(Func->getTarget())
+            ->stackVarToAsmOperand(Op0Var));
     emitIASAddrOpTyGPR(Func, Ty, StackAddr, Op1, Emitter);
   } else if (const auto Op0Mem = llvm::dyn_cast<OperandX8632Mem>(Op0)) {
     X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
@@ -786,7 +793,7 @@
           RegX8632::getEncodedXmm(SrcVar->getRegNum());
       (Asm->*(Emitter.XmmXmm))(Ty, VarReg, SrcReg);
     } else {
-      X8632::Address SrcStackAddr =
+      X8632::Traits::Address SrcStackAddr =
           static_cast<TargetX8632 *>(Func->getTarget())
               ->stackVarToAsmOperand(SrcVar);
       (Asm->*(Emitter.XmmAddr))(Ty, VarReg, SrcStackAddr);
@@ -813,7 +820,7 @@
           RegX8632::getEncodedXmm(SrcVar->getRegNum());
       (Asm->*(Emitter.XmmXmm))(Ty, VarReg, SrcReg);
     } else {
-      X8632::Address SrcStackAddr =
+      X8632::Traits::Address SrcStackAddr =
           static_cast<TargetX8632 *>(Func->getTarget())
               ->stackVarToAsmOperand(SrcVar);
       (Asm->*(Emitter.XmmAddr))(Ty, VarReg, SrcStackAddr);
@@ -823,7 +830,7 @@
     (Asm->*(Emitter.XmmAddr))(Ty, VarReg, Mem->toAsmAddress(Asm));
   } else if (const auto Imm = llvm::dyn_cast<Constant>(Src)) {
     (Asm->*(Emitter.XmmAddr))(Ty, VarReg,
-                              X8632::Address::ofConstPool(Asm, Imm));
+                              X8632::Traits::Address::ofConstPool(Asm, Imm));
   } else {
     llvm_unreachable("Unexpected operand type");
   }
@@ -842,7 +849,7 @@
       SReg_t SrcReg = srcEnc(SrcVar->getRegNum());
       (Asm->*(Emitter.RegReg))(DispatchTy, DestReg, SrcReg);
     } else {
-      X8632::Address SrcStackAddr =
+      X8632::Traits::Address SrcStackAddr =
           static_cast<TargetX8632 *>(Func->getTarget())
               ->stackVarToAsmOperand(SrcVar);
       (Asm->*(Emitter.RegAddr))(DispatchTy, DestReg, SrcStackAddr);
@@ -871,7 +878,7 @@
       SReg_t SrcReg = srcEnc(SrcVar->getRegNum());
       (Asm->*(Emitter.RegRegImm))(DispatchTy, DestReg, SrcReg, Imm);
     } else {
-      X8632::Address SrcStackAddr =
+      X8632::Traits::Address SrcStackAddr =
           static_cast<TargetX8632 *>(Func->getTarget())
               ->stackVarToAsmOperand(SrcVar);
       (Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg, SrcStackAddr, Imm);
@@ -896,8 +903,9 @@
         (Asm->*(Emitter.XmmXmm))(DestReg,
                                  RegX8632::getEncodedXmm(SrcVar->getRegNum()));
       } else {
-        X8632::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                     ->stackVarToAsmOperand(SrcVar));
+        X8632::Traits::Address StackAddr(
+            static_cast<TargetX8632 *>(Func->getTarget())
+                ->stackVarToAsmOperand(SrcVar));
         (Asm->*(Emitter.XmmAddr))(DestReg, StackAddr);
       }
     } else if (const auto SrcMem = llvm::dyn_cast<OperandX8632Mem>(Src)) {
@@ -907,8 +915,9 @@
       llvm_unreachable("Unexpected operand type");
     }
   } else {
-    X8632::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                 ->stackVarToAsmOperand(Dest));
+    X8632::Traits::Address StackAddr(
+        static_cast<TargetX8632 *>(Func->getTarget())
+            ->stackVarToAsmOperand(Dest));
     // Src must be a register in this case.
     const auto SrcVar = llvm::cast<Variable>(Src);
     assert(SrcVar->hasReg());
@@ -1194,7 +1203,7 @@
   bool InstructionSetIsValid =
       getDest()->getType() == IceType_v8i16 ||
       static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-          TargetX8632::SSE4_1;
+          X8632::Traits::SSE4_1;
   (void)TypesAreValid;
   (void)InstructionSetIsValid;
   assert(TypesAreValid);
@@ -1210,7 +1219,7 @@
   bool InstructionSetIsValid =
       Ty == IceType_v8i16 ||
       static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-          TargetX8632::SSE4_1;
+          X8632::Traits::SSE4_1;
   (void)TypesAreValid;
   (void)InstructionSetIsValid;
   assert(TypesAreValid);
@@ -1336,13 +1345,13 @@
   if (!BuildDefs::dump())
     return;
   assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-         TargetX8632::SSE4_1);
+         X8632::Traits::SSE4_1);
   emitVariableBlendInst(Opcode, this, Func);
 }
 
 template <> void InstX8632Blendvps::emitIAS(const Cfg *Func) const {
   assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-         TargetX8632::SSE4_1);
+         X8632::Traits::SSE4_1);
   static const X8632::AssemblerX8632::XmmEmitterRegOp Emitter = {
       &X8632::AssemblerX8632::blendvps, &X8632::AssemblerX8632::blendvps};
   emitIASVariableBlendInst(this, Func, Emitter);
@@ -1352,13 +1361,13 @@
   if (!BuildDefs::dump())
     return;
   assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-         TargetX8632::SSE4_1);
+         X8632::Traits::SSE4_1);
   emitVariableBlendInst(Opcode, this, Func);
 }
 
 template <> void InstX8632Pblendvb::emitIAS(const Cfg *Func) const {
   assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-         TargetX8632::SSE4_1);
+         X8632::Traits::SSE4_1);
   static const X8632::AssemblerX8632::XmmEmitterRegOp Emitter = {
       &X8632::AssemblerX8632::pblendvb, &X8632::AssemblerX8632::pblendvb};
   emitIASVariableBlendInst(this, Func, Emitter);
@@ -1416,7 +1425,7 @@
 template <> void InstX8632Insertps::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 3);
   assert(static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-         TargetX8632::SSE4_1);
+         X8632::Traits::SSE4_1);
   const Variable *Dest = getDest();
   assert(Dest == getSrc(0));
   Type Ty = Dest->getType();
@@ -1601,7 +1610,7 @@
   Ostream &Str = Func->getContext()->getStrEmit();
   Variable *Dest = getDest();
   Str << "\t";
-  assert(Condition != CondX86::Br_None);
+  assert(Condition != X8632::Traits::Cond::Br_None);
   assert(getDest()->hasReg());
   Str << "cmov" << InstX8632BrAttributes[Condition].DisplayString
       << getWidthString(Dest->getType()) << "\t";
@@ -1611,7 +1620,7 @@
 }
 
 void InstX8632Cmov::emitIAS(const Cfg *Func) const {
-  assert(Condition != CondX86::Br_None);
+  assert(Condition != X8632::Traits::Cond::Br_None);
   assert(getDest()->hasReg());
   assert(getSrcSize() == 2);
   Operand *Src = getSrc(1);
@@ -1654,7 +1663,7 @@
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 2);
-  assert(Condition < CondX86::Cmpps_Invalid);
+  assert(Condition < X8632::Traits::Cond::Cmpps_Invalid);
   Str << "\t";
   Str << "cmp" << InstX8632CmppsAttributes[Condition].EmitString << "ps"
       << "\t";
@@ -1666,7 +1675,7 @@
 void InstX8632Cmpps::emitIAS(const Cfg *Func) const {
   X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
   assert(getSrcSize() == 2);
-  assert(Condition < CondX86::Cmpps_Invalid);
+  assert(Condition < X8632::Traits::Cond::Cmpps_Invalid);
   // Assuming there isn't any load folding for cmpps, and vector constants
   // are not allowed in PNaCl.
   assert(llvm::isa<Variable>(getSrc(1)));
@@ -1675,8 +1684,9 @@
     Asm->cmpps(RegX8632::getEncodedXmm(getDest()->getRegNum()),
                RegX8632::getEncodedXmm(SrcVar->getRegNum()), Condition);
   } else {
-    X8632::Address SrcStackAddr = static_cast<TargetX8632 *>(Func->getTarget())
-                                      ->stackVarToAsmOperand(SrcVar);
+    X8632::Traits::Address SrcStackAddr =
+        static_cast<TargetX8632 *>(Func->getTarget())
+            ->stackVarToAsmOperand(SrcVar);
     Asm->cmpps(RegX8632::getEncodedXmm(getDest()->getRegNum()), SrcStackAddr,
                Condition);
   }
@@ -1686,7 +1696,7 @@
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrDump();
-  assert(Condition < CondX86::Cmpps_Invalid);
+  assert(Condition < X8632::Traits::Cond::Cmpps_Invalid);
   dumpDest(Func);
   Str << " = cmp" << InstX8632CmppsAttributes[Condition].EmitString << "ps"
       << "\t";
@@ -1713,7 +1723,7 @@
   Type Ty = getSrc(0)->getType();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-  const X8632::Address Addr = Mem->toAsmAddress(Asm);
+  const X8632::Traits::Address Addr = Mem->toAsmAddress(Asm);
   const auto VarReg = llvm::cast<Variable>(getSrc(2));
   assert(VarReg->hasReg());
   const RegX8632::GPRRegister Reg =
@@ -1749,7 +1759,7 @@
   X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-  const X8632::Address Addr = Mem->toAsmAddress(Asm);
+  const X8632::Traits::Address Addr = Mem->toAsmAddress(Asm);
   Asm->cmpxchg8b(Addr, Locked);
 }
 
@@ -2027,8 +2037,9 @@
     X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
     if (const auto DestVar = llvm::dyn_cast<Variable>(Dest)) {
       assert(!DestVar->hasReg());
-      X8632::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                   ->stackVarToAsmOperand(DestVar));
+      X8632::Traits::Address StackAddr(
+          static_cast<TargetX8632 *>(Func->getTarget())
+              ->stackVarToAsmOperand(DestVar));
       Asm->movss(DestTy, StackAddr, SrcReg);
     } else {
       const auto DestMem = llvm::cast<OperandX8632Mem>(Dest);
@@ -2217,8 +2228,9 @@
   } else {
     // Dest must be Stack and Src *could* be a register. Use Src's type
     // to decide on the emitters.
-    X8632::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                 ->stackVarToAsmOperand(Dest));
+    X8632::Traits::Address StackAddr(
+        static_cast<TargetX8632 *>(Func->getTarget())
+            ->stackVarToAsmOperand(Dest));
     if (isScalarFloatingType(SrcTy)) {
       // Src must be a register.
       const auto SrcVar = llvm::cast<Variable>(Src);
@@ -2251,8 +2263,9 @@
     if (SrcVar->hasReg()) {
       Asm->movd(DestReg, RegX8632::getEncodedGPR(SrcVar->getRegNum()));
     } else {
-      X8632::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                   ->stackVarToAsmOperand(SrcVar));
+      X8632::Traits::Address StackAddr(
+          static_cast<TargetX8632 *>(Func->getTarget())
+              ->stackVarToAsmOperand(SrcVar));
       Asm->movd(DestReg, StackAddr);
     }
   } else {
@@ -2263,8 +2276,9 @@
     if (Dest->hasReg()) {
       Asm->movd(RegX8632::getEncodedGPR(Dest->getRegNum()), SrcReg);
     } else {
-      X8632::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                   ->stackVarToAsmOperand(Dest));
+      X8632::Traits::Address StackAddr(
+          static_cast<TargetX8632 *>(Func->getTarget())
+              ->stackVarToAsmOperand(Dest));
       Asm->movd(StackAddr, SrcReg);
     }
   }
@@ -2414,20 +2428,22 @@
       // temporary stack slot.
       X8632::Immediate Width(typeWidthInBytes(Ty));
       Asm->sub(IceType_i32, RegX8632::Encoded_Reg_esp, Width);
-      X8632::Address StackSlot = X8632::Address(RegX8632::Encoded_Reg_esp, 0);
+      X8632::Traits::Address StackSlot =
+          X8632::Traits::Address(RegX8632::Encoded_Reg_esp, 0);
       Asm->movss(Ty, StackSlot, RegX8632::getEncodedXmm(Var->getRegNum()));
       Asm->fld(Ty, StackSlot);
       Asm->add(IceType_i32, RegX8632::Encoded_Reg_esp, Width);
     } else {
-      X8632::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                   ->stackVarToAsmOperand(Var));
+      X8632::Traits::Address StackAddr(
+          static_cast<TargetX8632 *>(Func->getTarget())
+              ->stackVarToAsmOperand(Var));
       Asm->fld(Ty, StackAddr);
     }
   } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Src)) {
     assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
     Asm->fld(Ty, Mem->toAsmAddress(Asm));
   } else if (const auto Imm = llvm::dyn_cast<Constant>(Src)) {
-    Asm->fld(Ty, X8632::Address::ofConstPool(Asm, Imm));
+    Asm->fld(Ty, X8632::Traits::Address::ofConstPool(Asm, Imm));
   } else {
     llvm_unreachable("Unexpected operand type");
   }
@@ -2489,8 +2505,9 @@
   }
   Type Ty = Dest->getType();
   if (!Dest->hasReg()) {
-    X8632::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                 ->stackVarToAsmOperand(Dest));
+    X8632::Traits::Address StackAddr(
+        static_cast<TargetX8632 *>(Func->getTarget())
+            ->stackVarToAsmOperand(Dest));
     Asm->fstp(Ty, StackAddr);
   } else {
     // Dest is a physical (xmm) register, so st(0) needs to go through
@@ -2499,7 +2516,8 @@
     // the stack slot.
     X8632::Immediate Width(typeWidthInBytes(Ty));
     Asm->sub(IceType_i32, RegX8632::Encoded_Reg_esp, Width);
-    X8632::Address StackSlot = X8632::Address(RegX8632::Encoded_Reg_esp, 0);
+    X8632::Traits::Address StackSlot =
+        X8632::Traits::Address(RegX8632::Encoded_Reg_esp, 0);
     Asm->fstp(Ty, StackSlot);
     Asm->movss(Ty, RegX8632::getEncodedXmm(Dest->getRegNum()), StackSlot);
     Asm->add(IceType_i32, RegX8632::Encoded_Reg_esp, Width);
@@ -2541,7 +2559,7 @@
   assert(getSrc(0)->getType() == IceType_v8i16 ||
          getSrc(0)->getType() == IceType_v8i1 ||
          static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-             TargetX8632::SSE4_1);
+             X8632::Traits::SSE4_1);
   Str << "\t" << Opcode << TypeX8632Attributes[getSrc(0)->getType()].PackString
       << "\t";
   getSrc(1)->emit(Func);
@@ -2563,7 +2581,7 @@
   Type DispatchTy = Dest->getType();
   assert(DispatchTy == IceType_i16 ||
          static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-             TargetX8632::SSE4_1);
+             X8632::Traits::SSE4_1);
   // pextrw must take a register dest. There is an SSE4.1 version that takes
   // a memory dest, but we aren't using it. For uniformity, just restrict
   // them all to have a register dest for now.
@@ -2587,7 +2605,7 @@
   assert(getDest()->getType() == IceType_v8i16 ||
          getDest()->getType() == IceType_v8i1 ||
          static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-             TargetX8632::SSE4_1);
+             X8632::Traits::SSE4_1);
   Str << "\t" << Opcode << TypeX8632Attributes[getDest()->getType()].PackString
       << "\t";
   getSrc(2)->emit(Func);
@@ -2615,7 +2633,7 @@
   Type DispatchTy = Src0->getType();
   assert(DispatchTy == IceType_i16 ||
          static_cast<TargetX8632 *>(Func->getTarget())->getInstructionSet() >=
-             TargetX8632::SSE4_1);
+             X8632::Traits::SSE4_1);
   // If src1 is a register, it should always be r32 (this should fall out
   // from the encodings for ByteRegs overlapping the encodings for r32),
   // but we have to trust the regalloc to not choose "ah", where it
@@ -2796,7 +2814,7 @@
 }
 
 void InstX8632Setcc::emitIAS(const Cfg *Func) const {
-  assert(Condition != CondX86::Br_None);
+  assert(Condition != X8632::Traits::Cond::Br_None);
   assert(getDest()->getType() == IceType_i1);
   assert(getSrcSize() == 0);
   X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
@@ -2835,7 +2853,7 @@
   Type Ty = getSrc(0)->getType();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-  const X8632::Address Addr = Mem->toAsmAddress(Asm);
+  const X8632::Traits::Address Addr = Mem->toAsmAddress(Asm);
   const auto VarReg = llvm::cast<Variable>(getSrc(1));
   assert(VarReg->hasReg());
   const RegX8632::GPRRegister Reg =
@@ -2871,7 +2889,7 @@
   Type Ty = getSrc(0)->getType();
   const auto Mem = llvm::cast<OperandX8632Mem>(getSrc(0));
   assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
-  const X8632::Address Addr = Mem->toAsmAddress(Asm);
+  const X8632::Traits::Address Addr = Mem->toAsmAddress(Asm);
   const auto VarReg = llvm::cast<Variable>(getSrc(1));
   assert(VarReg->hasReg());
   const RegX8632::GPRRegister Reg =
@@ -2982,7 +3000,7 @@
   }
 }
 
-X8632::Address OperandX8632Mem::toAsmAddress(Assembler *Asm) const {
+X8632::Traits::Address OperandX8632Mem::toAsmAddress(Assembler *Asm) const {
   int32_t Disp = 0;
   AssemblerFixup *Fixup = nullptr;
   // Determine the offset (is it relocatable?)
@@ -3000,29 +3018,31 @@
 
   // Now convert to the various possible forms.
   if (getBase() && getIndex()) {
-    return X8632::Address(RegX8632::getEncodedGPR(getBase()->getRegNum()),
-                          RegX8632::getEncodedGPR(getIndex()->getRegNum()),
-                          X8632::ScaleFactor(getShift()), Disp);
+    return X8632::Traits::Address(
+        RegX8632::getEncodedGPR(getBase()->getRegNum()),
+        RegX8632::getEncodedGPR(getIndex()->getRegNum()),
+        X8632::Traits::ScaleFactor(getShift()), Disp);
   } else if (getBase()) {
-    return X8632::Address(RegX8632::getEncodedGPR(getBase()->getRegNum()),
-                          Disp);
+    return X8632::Traits::Address(
+        RegX8632::getEncodedGPR(getBase()->getRegNum()), Disp);
   } else if (getIndex()) {
-    return X8632::Address(RegX8632::getEncodedGPR(getIndex()->getRegNum()),
-                          X8632::ScaleFactor(getShift()), Disp);
+    return X8632::Traits::Address(
+        RegX8632::getEncodedGPR(getIndex()->getRegNum()),
+        X8632::Traits::ScaleFactor(getShift()), Disp);
   } else if (Fixup) {
-    return X8632::Address::Absolute(Disp, Fixup);
+    return X8632::Traits::Address::Absolute(Disp, Fixup);
   } else {
-    return X8632::Address::Absolute(Disp);
+    return X8632::Traits::Address::Absolute(Disp);
   }
 }
 
-X8632::Address VariableSplit::toAsmAddress(const Cfg *Func) const {
+X8632::Traits::Address VariableSplit::toAsmAddress(const Cfg *Func) const {
   assert(!Var->hasReg());
   const TargetLowering *Target = Func->getTarget();
   int32_t Offset =
       Var->getStackOffset() + Target->getStackAdjustment() + getOffset();
-  return X8632::Address(RegX8632::getEncodedGPR(Target->getFrameOrStackReg()),
-                        Offset);
+  return X8632::Traits::Address(
+      RegX8632::getEncodedGPR(Target->getFrameOrStackReg()), Offset);
 }
 
 void VariableSplit::emit(const Cfg *Func) const {
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 51868b4..0145293 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -22,6 +22,7 @@
 #include "IceInst.h"
 #include "IceInstX8632.def"
 #include "IceOperand.h"
+#include "IceTargetLoweringX8632Traits.h"
 
 namespace Ice {
 
@@ -76,7 +77,7 @@
   uint16_t getShift() const { return Shift; }
   SegmentRegisters getSegmentRegister() const { return SegmentReg; }
   void emitSegmentOverride(X8632::AssemblerX8632 *Asm) const;
-  X8632::Address toAsmAddress(Assembler *Asm) const;
+  X8632::Traits::Address toAsmAddress(Assembler *Asm) const;
   void emit(const Cfg *Func) const override;
   using OperandX8632::dump;
   void dump(const Cfg *Func, Ostream &Str) const override;
@@ -122,7 +123,7 @@
   }
   int32_t getOffset() const { return Part == High ? 4 : 0; }
 
-  X8632::Address toAsmAddress(const Cfg *Func) const;
+  X8632::Traits::Address toAsmAddress(const Cfg *Func) const;
   void emit(const Cfg *Func) const override;
   using OperandX8632::dump;
   void dump(const Cfg *Func, Ostream &Str) const override;
@@ -279,7 +280,8 @@
 
   static const char *getWidthString(Type Ty);
   static const char *getFldString(Type Ty);
-  static CondX86::BrCond getOppositeCondition(CondX86::BrCond Cond);
+  static X8632::Traits::Cond::BrCond
+  getOppositeCondition(X8632::Traits::Cond::BrCond Cond);
   void dump(const Cfg *Func) const override;
 
   // Shared emit routines for common forms of instructions.
@@ -428,8 +430,9 @@
 public:
   // Create a conditional branch to a node.
   static InstX8632Br *create(Cfg *Func, CfgNode *TargetTrue,
-                             CfgNode *TargetFalse, CondX86::BrCond Condition) {
-    assert(Condition != CondX86::Br_None);
+                             CfgNode *TargetFalse,
+                             X8632::Traits::Cond::BrCond Condition) {
+    assert(Condition != X8632::Traits::Cond::Br_None);
     const InstX8632Label *NoLabel = nullptr;
     return new (Func->allocate<InstX8632Br>())
         InstX8632Br(Func, TargetTrue, TargetFalse, NoLabel, Condition);
@@ -438,15 +441,15 @@
   static InstX8632Br *create(Cfg *Func, CfgNode *Target) {
     const CfgNode *NoCondTarget = nullptr;
     const InstX8632Label *NoLabel = nullptr;
-    return new (Func->allocate<InstX8632Br>())
-        InstX8632Br(Func, NoCondTarget, Target, NoLabel, CondX86::Br_None);
+    return new (Func->allocate<InstX8632Br>()) InstX8632Br(
+        Func, NoCondTarget, Target, NoLabel, X8632::Traits::Cond::Br_None);
   }
   // Create a non-terminator conditional branch to a node, with a
   // fallthrough to the next instruction in the current node.  This is
   // used for switch lowering.
   static InstX8632Br *create(Cfg *Func, CfgNode *Target,
-                             CondX86::BrCond Condition) {
-    assert(Condition != CondX86::Br_None);
+                             X8632::Traits::Cond::BrCond Condition) {
+    assert(Condition != X8632::Traits::Cond::Br_None);
     const CfgNode *NoUncondTarget = nullptr;
     const InstX8632Label *NoLabel = nullptr;
     return new (Func->allocate<InstX8632Br>())
@@ -455,7 +458,7 @@
   // Create a conditional intra-block branch (or unconditional, if
   // Condition==Br_None) to a label in the current block.
   static InstX8632Br *create(Cfg *Func, InstX8632Label *Label,
-                             CondX86::BrCond Condition) {
+                             X8632::Traits::Cond::BrCond Condition) {
     const CfgNode *NoCondTarget = nullptr;
     const CfgNode *NoUncondTarget = nullptr;
     return new (Func->allocate<InstX8632Br>())
@@ -475,7 +478,7 @@
     return Sum;
   }
   bool isUnconditionalBranch() const override {
-    return !Label && Condition == CondX86::Br_None;
+    return !Label && Condition == X8632::Traits::Cond::Br_None;
   }
   bool repointEdge(CfgNode *OldNode, CfgNode *NewNode) override;
   void emit(const Cfg *Func) const override;
@@ -485,9 +488,10 @@
 
 private:
   InstX8632Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
-              const InstX8632Label *Label, CondX86::BrCond Condition);
+              const InstX8632Label *Label,
+              X8632::Traits::Cond::BrCond Condition);
 
-  CondX86::BrCond Condition;
+  X8632::Traits::Cond::BrCond Condition;
   const CfgNode *TargetTrue;
   const CfgNode *TargetFalse;
   const InstX8632Label *Label; // Intra-block branch target
@@ -1256,7 +1260,7 @@
 
 public:
   static InstX8632Cmov *create(Cfg *Func, Variable *Dest, Operand *Source,
-                               CondX86::BrCond Cond) {
+                               X8632::Traits::Cond::BrCond Cond) {
     return new (Func->allocate<InstX8632Cmov>())
         InstX8632Cmov(Func, Dest, Source, Cond);
   }
@@ -1267,9 +1271,9 @@
 
 private:
   InstX8632Cmov(Cfg *Func, Variable *Dest, Operand *Source,
-                CondX86::BrCond Cond);
+                X8632::Traits::Cond::BrCond Cond);
 
-  CondX86::BrCond Condition;
+  X8632::Traits::Cond::BrCond Condition;
 };
 
 // Cmpps instruction - compare packed singled-precision floating point
@@ -1281,7 +1285,7 @@
 
 public:
   static InstX8632Cmpps *create(Cfg *Func, Variable *Dest, Operand *Source,
-                                CondX86::CmppsCond Condition) {
+                                X8632::Traits::Cond::CmppsCond Condition) {
     return new (Func->allocate<InstX8632Cmpps>())
         InstX8632Cmpps(Func, Dest, Source, Condition);
   }
@@ -1292,9 +1296,9 @@
 
 private:
   InstX8632Cmpps(Cfg *Func, Variable *Dest, Operand *Source,
-                 CondX86::CmppsCond Cond);
+                 X8632::Traits::Cond::CmppsCond Cond);
 
-  CondX86::CmppsCond Condition;
+  X8632::Traits::Cond::CmppsCond Condition;
 };
 
 // Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
@@ -1670,7 +1674,7 @@
 
 public:
   static InstX8632Setcc *create(Cfg *Func, Variable *Dest,
-                                CondX86::BrCond Cond) {
+                                X8632::Traits::Cond::BrCond Cond) {
     return new (Func->allocate<InstX8632Setcc>())
         InstX8632Setcc(Func, Dest, Cond);
   }
@@ -1680,9 +1684,9 @@
   static bool classof(const Inst *Inst) { return isClassof(Inst, Setcc); }
 
 private:
-  InstX8632Setcc(Cfg *Func, Variable *Dest, CondX86::BrCond Cond);
+  InstX8632Setcc(Cfg *Func, Variable *Dest, X8632::Traits::Cond::BrCond Cond);
 
-  const CondX86::BrCond Condition;
+  const X8632::Traits::Cond::BrCond Condition;
 };
 
 // Exchanging Add instruction.  Exchanges the first operand (destination
diff --git a/src/IceRegistersX8632.h b/src/IceRegistersX8632.h
index 86ace82..d7bfd6f 100644
--- a/src/IceRegistersX8632.h
+++ b/src/IceRegistersX8632.h
@@ -20,93 +20,92 @@
 
 namespace Ice {
 
-namespace RegX8632 {
-
-// An enum of every register. The enum value may not match the encoding
-// used to binary encode register operands in instructions.
-enum AllRegisters {
+class RegX8632 {
+public:
+  // An enum of every register. The enum value may not match the encoding
+  // used to binary encode register operands in instructions.
+  enum AllRegisters {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
   val,
-  REGX8632_TABLE
+    REGX8632_TABLE
 #undef X
-      Reg_NUM,
+        Reg_NUM,
 #define X(val, init) val init,
-  REGX8632_TABLE_BOUNDS
+    REGX8632_TABLE_BOUNDS
 #undef X
-};
+  };
 
-// An enum of GPR Registers. The enum value does match the encoding used
-// to binary encode register operands in instructions.
-enum GPRRegister {
+  // An enum of GPR Registers. The enum value does match the encoding used
+  // to binary encode register operands in instructions.
+  enum GPRRegister {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
   Encoded_##val encode,
-  REGX8632_GPR_TABLE
+    REGX8632_GPR_TABLE
 #undef X
-      Encoded_Not_GPR = -1
-};
+        Encoded_Not_GPR = -1
+  };
 
-// An enum of XMM Registers. The enum value does match the encoding used
-// to binary encode register operands in instructions.
-enum XmmRegister {
+  // An enum of XMM Registers. The enum value does match the encoding used
+  // to binary encode register operands in instructions.
+  enum XmmRegister {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
   Encoded_##val encode,
-  REGX8632_XMM_TABLE
+    REGX8632_XMM_TABLE
 #undef X
-      Encoded_Not_Xmm = -1
-};
+        Encoded_Not_Xmm = -1
+  };
 
-// An enum of Byte Registers. The enum value does match the encoding used
-// to binary encode register operands in instructions.
-enum ByteRegister {
+  // An enum of Byte Registers. The enum value does match the encoding used
+  // to binary encode register operands in instructions.
+  enum ByteRegister {
 #define X(val, encode) Encoded_##val encode,
-  REGX8632_BYTEREG_TABLE
+    REGX8632_BYTEREG_TABLE
 #undef X
-      Encoded_Not_ByteReg = -1
-};
+        Encoded_Not_ByteReg = -1
+  };
 
-// An enum of X87 Stack Registers. The enum value does match the encoding used
-// to binary encode register operands in instructions.
-enum X87STRegister {
+  // An enum of X87 Stack Registers. The enum value does match the encoding used
+  // to binary encode register operands in instructions.
+  enum X87STRegister {
 #define X(val, encode, name) Encoded_##val encode,
-  X87ST_REGX8632_TABLE
+    X87ST_REGX8632_TABLE
 #undef X
-      Encoded_Not_X87STReg = -1
+        Encoded_Not_X87STReg = -1
+  };
+
+  static inline GPRRegister getEncodedGPR(int32_t RegNum) {
+    assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last);
+    return GPRRegister(RegNum - Reg_GPR_First);
+  }
+
+  static inline XmmRegister getEncodedXmm(int32_t RegNum) {
+    assert(Reg_XMM_First <= RegNum && RegNum <= Reg_XMM_Last);
+    return XmmRegister(RegNum - Reg_XMM_First);
+  }
+
+  static inline ByteRegister getEncodedByteReg(int32_t RegNum) {
+    assert(RegNum == Reg_ah || (Reg_GPR_First <= RegNum && RegNum <= Reg_ebx));
+    if (RegNum == Reg_ah)
+      return Encoded_Reg_ah;
+    return ByteRegister(RegNum - Reg_GPR_First);
+  }
+
+  static inline GPRRegister getEncodedByteRegOrGPR(Type Ty, int32_t RegNum) {
+    if (isByteSizedType(Ty))
+      return GPRRegister(getEncodedByteReg(RegNum));
+    else
+      return getEncodedGPR(RegNum);
+  }
+
+  static inline X87STRegister getEncodedSTReg(int32_t RegNum) {
+    assert(Encoded_X87ST_First <= RegNum && RegNum <= Encoded_X87ST_Last);
+    return X87STRegister(RegNum);
+  }
 };
 
-static inline GPRRegister getEncodedGPR(int32_t RegNum) {
-  assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last);
-  return GPRRegister(RegNum - Reg_GPR_First);
-}
-
-static inline XmmRegister getEncodedXmm(int32_t RegNum) {
-  assert(Reg_XMM_First <= RegNum && RegNum <= Reg_XMM_Last);
-  return XmmRegister(RegNum - Reg_XMM_First);
-}
-
-static inline ByteRegister getEncodedByteReg(int32_t RegNum) {
-  assert(RegNum == Reg_ah || (Reg_GPR_First <= RegNum && RegNum <= Reg_ebx));
-  if (RegNum == Reg_ah)
-    return Encoded_Reg_ah;
-  return ByteRegister(RegNum - Reg_GPR_First);
-}
-
-static inline GPRRegister getEncodedByteRegOrGPR(Type Ty, int32_t RegNum) {
-  if (isByteSizedType(Ty))
-    return GPRRegister(getEncodedByteReg(RegNum));
-  else
-    return getEncodedGPR(RegNum);
-}
-
-static inline X87STRegister getEncodedSTReg(int32_t RegNum) {
-  assert(Encoded_X87ST_First <= RegNum && RegNum <= Encoded_X87ST_Last);
-  return X87STRegister(RegNum);
-}
-
-} // end of namespace RegX8632
-
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICEREGISTERSX8632_H
diff --git a/src/IceRegistersX8664.h b/src/IceRegistersX8664.h
index 96567b2..ceafc86 100644
--- a/src/IceRegistersX8664.h
+++ b/src/IceRegistersX8664.h
@@ -20,80 +20,79 @@
 
 namespace Ice {
 
-namespace RegX8664 {
-
-// An enum of every register. The enum value may not match the encoding
-// used to binary encode register operands in instructions.
-enum AllRegisters {
+class RegX8664 {
+public:
+  // An enum of every register. The enum value may not match the encoding
+  // used to binary encode register operands in instructions.
+  enum AllRegisters {
 #define X(val, encode, name64, name, name16, name8, scratch, preserved,        \
           stackptr, frameptr, isInt, isFP)                                     \
   val,
-  REGX8664_TABLE
+    REGX8664_TABLE
 #undef X
-      Reg_NUM,
+        Reg_NUM,
 #define X(val, init) val init,
-  REGX8664_TABLE_BOUNDS
+    REGX8664_TABLE_BOUNDS
 #undef X
-};
+  };
 
-// An enum of GPR Registers. The enum value does match the encoding used
-// to binary encode register operands in instructions.
-enum GPRRegister {
+  // An enum of GPR Registers. The enum value does match the encoding used
+  // to binary encode register operands in instructions.
+  enum GPRRegister {
 #define X(val, encode, name64, name, name16, name8, scratch, preserved,        \
           stackptr, frameptr, isInt, isFP)                                     \
   Encoded_##val encode,
-  REGX8664_GPR_TABLE
+    REGX8664_GPR_TABLE
 #undef X
-      Encoded_Not_GPR = -1
-};
+        Encoded_Not_GPR = -1
+  };
 
-// An enum of XMM Registers. The enum value does match the encoding used
-// to binary encode register operands in instructions.
-enum XmmRegister {
+  // An enum of XMM Registers. The enum value does match the encoding used
+  // to binary encode register operands in instructions.
+  enum XmmRegister {
 #define X(val, encode, name64, name, name16, name8, scratch, preserved,        \
           stackptr, frameptr, isInt, isFP)                                     \
   Encoded_##val encode,
-  REGX8664_XMM_TABLE
+    REGX8664_XMM_TABLE
 #undef X
-      Encoded_Not_Xmm = -1
-};
+        Encoded_Not_Xmm = -1
+  };
 
-// An enum of Byte Registers. The enum value does match the encoding used
-// to binary encode register operands in instructions.
-enum ByteRegister {
+  // An enum of Byte Registers. The enum value does match the encoding used
+  // to binary encode register operands in instructions.
+  enum ByteRegister {
 #define X(val, encode) Encoded_##val encode,
-  REGX8664_BYTEREG_TABLE
+    REGX8664_BYTEREG_TABLE
 #undef X
-      Encoded_Not_ByteReg = -1
+        Encoded_Not_ByteReg = -1
+  };
+
+  static inline GPRRegister getEncodedGPR(int32_t RegNum) {
+    assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last);
+    return GPRRegister(RegNum - Reg_GPR_First);
+  }
+
+  static inline XmmRegister getEncodedXmm(int32_t RegNum) {
+    assert(Reg_XMM_First <= RegNum && RegNum <= Reg_XMM_Last);
+    return XmmRegister(RegNum - Reg_XMM_First);
+  }
+
+  static inline ByteRegister getEncodedByteReg(int32_t RegNum) {
+    // In x86-64, AH is not encodable when the REX prefix is used; the same
+    // encoding is used for spl. Therefore, ah needs special handling.
+    if (RegNum == Reg_ah)
+      return Encoded_Reg_spl;
+    return ByteRegister(RegNum - Reg_GPR_First);
+  }
+
+  static inline GPRRegister getEncodedByteRegOrGPR(Type Ty, int32_t RegNum) {
+    if (isByteSizedType(Ty))
+      return GPRRegister(getEncodedByteReg(RegNum));
+    else
+      return getEncodedGPR(RegNum);
+  }
 };
 
-static inline GPRRegister getEncodedGPR(int32_t RegNum) {
-  assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last);
-  return GPRRegister(RegNum - Reg_GPR_First);
-}
-
-static inline XmmRegister getEncodedXmm(int32_t RegNum) {
-  assert(Reg_XMM_First <= RegNum && RegNum <= Reg_XMM_Last);
-  return XmmRegister(RegNum - Reg_XMM_First);
-}
-
-static inline ByteRegister getEncodedByteReg(int32_t RegNum) {
-  // In x86-64, AH is not encodable when the REX prefix is used; the same
-  // encoding is used for spl. Therefore, ah needs special handling.
-  if (RegNum == Reg_ah)
-    return Encoded_Reg_spl;
-  return ByteRegister(RegNum - Reg_GPR_First);
-}
-
-static inline GPRRegister getEncodedByteRegOrGPR(Type Ty, int32_t RegNum) {
-  if (isByteSizedType(Ty))
-    return GPRRegister(getEncodedByteReg(RegNum));
-  else
-    return getEncodedGPR(RegNum);
-}
-
-} // end of namespace RegX8664
-
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICEREGISTERSX8664_H
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 7024d14..e33eeef 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2,6 +2,9 @@
 //
 //                        The Subzero Code Generator
 //
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
 //===----------------------------------------------------------------------===//
 //
 // This file implements the TargetLoweringX8632 class, which
@@ -12,100 +15,19 @@
 
 #include "IceTargetLoweringX8632.h"
 
+#include "IceTargetLoweringX8632Traits.h"
 #include "IceTargetLoweringX86Base.h"
 
 namespace Ice {
+
 namespace X86Internal {
-template <> struct MachineTraits<TargetX8632> {
-  using InstructionSet = TargetX8632::X86InstructionSet;
-
-  // The following table summarizes the logic for lowering the fcmp
-  // instruction.  There is one table entry for each of the 16 conditions.
-  //
-  // The first four columns describe the case when the operands are
-  // floating point scalar values.  A comment in lowerFcmp() describes the
-  // lowering template.  In the most general case, there is a compare
-  // followed by two conditional branches, because some fcmp conditions
-  // don't map to a single x86 conditional branch.  However, in many cases
-  // it is possible to swap the operands in the comparison and have a
-  // single conditional branch.  Since it's quite tedious to validate the
-  // table by hand, good execution tests are helpful.
-  //
-  // The last two columns describe the case when the operands are vectors
-  // of floating point values.  For most fcmp conditions, there is a clear
-  // mapping to a single x86 cmpps instruction variant.  Some fcmp
-  // conditions require special code to handle and these are marked in the
-  // table with a Cmpps_Invalid predicate.
-  static const struct TableFcmpType {
-    uint32_t Default;
-    bool SwapScalarOperands;
-    CondX86::BrCond C1, C2;
-    bool SwapVectorOperands;
-    CondX86::CmppsCond Predicate;
-  } TableFcmp[];
-  static const size_t TableFcmpSize;
-
-  // The following table summarizes the logic for lowering the icmp instruction
-  // for i32 and narrower types.  Each icmp condition has a clear mapping to an
-  // x86 conditional branch instruction.
-
-  static const struct TableIcmp32Type {
-    CondX86::BrCond Mapping;
-  } TableIcmp32[];
-  static const size_t TableIcmp32Size;
-
-  // The following table summarizes the logic for lowering the icmp instruction
-  // for the i64 type.  For Eq and Ne, two separate 32-bit comparisons and
-  // conditional branches are needed.  For the other conditions, three separate
-  // conditional branches are needed.
-  static const struct TableIcmp64Type {
-    CondX86::BrCond C1, C2, C3;
-  } TableIcmp64[];
-  static const size_t TableIcmp64Size;
-
-  static CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
-    size_t Index = static_cast<size_t>(Cond);
-    assert(Index < TableIcmp32Size);
-    return TableIcmp32[Index].Mapping;
-  }
-
-  static const struct TableTypeX8632AttributesType {
-    Type InVectorElementType;
-  } TableTypeX8632Attributes[];
-  static const size_t TableTypeX8632AttributesSize;
-
-  // Return the type which the elements of the vector have in the X86
-  // representation of the vector.
-  static Type getInVectorElementType(Type Ty) {
-    assert(isVectorType(Ty));
-    size_t Index = static_cast<size_t>(Ty);
-    (void)Index;
-    assert(Index < TableTypeX8632AttributesSize);
-    return TableTypeX8632Attributes[Ty].InVectorElementType;
-  }
-
-  // The maximum number of arguments to pass in XMM registers
-  static const uint32_t X86_MAX_XMM_ARGS = 4;
-  // The number of bits in a byte
-  static const uint32_t X86_CHAR_BIT = 8;
-  // Stack alignment
-  static const uint32_t X86_STACK_ALIGNMENT_BYTES;
-  // Size of the return address on the stack
-  static const uint32_t X86_RET_IP_SIZE_BYTES = 4;
-  // The number of different NOP instructions
-  static const uint32_t X86_NUM_NOP_VARIANTS = 5;
-
-  // Value is in bytes. Return Value adjusted to the next highest multiple
-  // of the stack alignment.
-  static uint32_t applyStackAlignment(uint32_t Value) {
-    return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
-  }
-};
-
 const MachineTraits<TargetX8632>::TableFcmpType
     MachineTraits<TargetX8632>::TableFcmp[] = {
 #define X(val, dflt, swapS, C1, C2, swapV, pred)                               \
-  { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred }              \
+  {                                                                            \
+    dflt, swapS, X8632::Traits::Cond::C1, X8632::Traits::Cond::C2, swapV,      \
+        X8632::Traits::Cond::pred                                              \
+  }                                                                            \
   ,
         FCMPX8632_TABLE
 #undef X
@@ -117,7 +39,7 @@
 const MachineTraits<TargetX8632>::TableIcmp32Type
     MachineTraits<TargetX8632>::TableIcmp32[] = {
 #define X(val, C_32, C1_64, C2_64, C3_64)                                      \
-  { CondX86::C_32 }                                                            \
+  { X8632::Traits::Cond::C_32 }                                                \
   ,
         ICMPX8632_TABLE
 #undef X
@@ -129,7 +51,10 @@
 const MachineTraits<TargetX8632>::TableIcmp64Type
     MachineTraits<TargetX8632>::TableIcmp64[] = {
 #define X(val, C_32, C1_64, C2_64, C3_64)                                      \
-  { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 }                           \
+  {                                                                            \
+    X8632::Traits::Cond::C1_64, X8632::Traits::Cond::C2_64,                    \
+        X8632::Traits::Cond::C3_64                                             \
+  }                                                                            \
   ,
         ICMPX8632_TABLE
 #undef X
@@ -151,6 +76,7 @@
     llvm::array_lengthof(TableTypeX8632Attributes);
 
 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;
+
 } // end of namespace X86Internal
 
 TargetX8632 *TargetX8632::create(Cfg *Func) {
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 30f6e6e..f24275d 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -21,6 +21,7 @@
 #include "IceInstX8632.h"
 #include "IceRegistersX8632.h"
 #include "IceTargetLowering.h"
+#include "IceTargetLoweringX8632Traits.h"
 
 namespace Ice {
 
@@ -30,16 +31,11 @@
   TargetX8632 &operator=(const TargetX8632 &) = delete;
 
 public:
-  enum X86InstructionSet {
-    Begin,
-    // SSE2 is the PNaCl baseline instruction set.
-    SSE2 = Begin,
-    SSE4_1,
-    End
-  };
+  using X86InstructionSet = X8632::Traits::InstructionSet;
 
   static TargetX8632 *create(Cfg *Func);
-  virtual X8632::Address stackVarToAsmOperand(const Variable *Var) const = 0;
+  virtual X8632::Traits::Address
+  stackVarToAsmOperand(const Variable *Var) const = 0;
   virtual X86InstructionSet getInstructionSet() const = 0;
 
 protected:
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
new file mode 100644
index 0000000..4cd22fa
--- /dev/null
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -0,0 +1,354 @@
+//===- subzero/src/IceTargetLoweringX8632Traits.h - x86-32 traits -*- C++ -*-=//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X8632 Target Lowering Traits.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICETARGETLOWERINGX8632TRAITS_H
+#define SUBZERO_SRC_ICETARGETLOWERINGX8632TRAITS_H
+
+#include "IceAssembler.h"
+#include "IceConditionCodesX8632.h"
+#include "IceDefs.h"
+#include "IceInst.h"
+#include "IceInstX8632.def"
+#include "IceRegistersX8632.h"
+#include "IceTargetLoweringX8632.def"
+
+namespace Ice {
+
+class TargetX8632;
+
+namespace X86Internal {
+
+template <class Machine> struct MachineTraits;
+
+template <> struct MachineTraits<TargetX8632> {
+  //----------------------------------------------------------------------------
+  //     ______  ______  __    __
+  //    /\  __ \/\  ___\/\ "-./  \
+  //    \ \  __ \ \___  \ \ \-./\ \
+  //     \ \_\ \_\/\_____\ \_\ \ \_\
+  //      \/_/\/_/\/_____/\/_/  \/_/
+  //
+  //----------------------------------------------------------------------------
+  enum ScaleFactor { TIMES_1 = 0, TIMES_2 = 1, TIMES_4 = 2, TIMES_8 = 3 };
+
+  using GPRRegister = ::Ice::RegX8632::GPRRegister;
+  using XmmRegister = ::Ice::RegX8632::XmmRegister;
+  using ByteRegister = ::Ice::RegX8632::ByteRegister;
+  using X87STRegister = ::Ice::RegX8632::X87STRegister;
+
+  using Cond = ::Ice::CondX86;
+
+  using RegisterSet = ::Ice::RegX8632;
+  static const GPRRegister Encoded_Reg_Accumulator = RegX8632::Encoded_Reg_eax;
+  static const GPRRegister Encoded_Reg_Counter = RegX8632::Encoded_Reg_ecx;
+  static const FixupKind PcRelFixup = llvm::ELF::R_386_PC32;
+
+  class Operand {
+  public:
+    Operand(const Operand &other)
+        : length_(other.length_), fixup_(other.fixup_) {
+      memmove(&encoding_[0], &other.encoding_[0], other.length_);
+    }
+
+    Operand &operator=(const Operand &other) {
+      length_ = other.length_;
+      fixup_ = other.fixup_;
+      memmove(&encoding_[0], &other.encoding_[0], other.length_);
+      return *this;
+    }
+
+    uint8_t mod() const { return (encoding_at(0) >> 6) & 3; }
+
+    GPRRegister rm() const {
+      return static_cast<GPRRegister>(encoding_at(0) & 7);
+    }
+
+    ScaleFactor scale() const {
+      return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
+    }
+
+    GPRRegister index() const {
+      return static_cast<GPRRegister>((encoding_at(1) >> 3) & 7);
+    }
+
+    GPRRegister base() const {
+      return static_cast<GPRRegister>(encoding_at(1) & 7);
+    }
+
+    int8_t disp8() const {
+      assert(length_ >= 2);
+      return static_cast<int8_t>(encoding_[length_ - 1]);
+    }
+
+    int32_t disp32() const {
+      assert(length_ >= 5);
+      return bit_copy<int32_t>(encoding_[length_ - 4]);
+    }
+
+    AssemblerFixup *fixup() const { return fixup_; }
+
+  protected:
+    Operand() : length_(0), fixup_(nullptr) {} // Needed by subclass Address.
+
+    void SetModRM(int mod, GPRRegister rm) {
+      assert((mod & ~3) == 0);
+      encoding_[0] = (mod << 6) | rm;
+      length_ = 1;
+    }
+
+    void SetSIB(ScaleFactor scale, GPRRegister index, GPRRegister base) {
+      assert(length_ == 1);
+      assert((scale & ~3) == 0);
+      encoding_[1] = (scale << 6) | (index << 3) | base;
+      length_ = 2;
+    }
+
+    void SetDisp8(int8_t disp) {
+      assert(length_ == 1 || length_ == 2);
+      encoding_[length_++] = static_cast<uint8_t>(disp);
+    }
+
+    void SetDisp32(int32_t disp) {
+      assert(length_ == 1 || length_ == 2);
+      intptr_t disp_size = sizeof(disp);
+      memmove(&encoding_[length_], &disp, disp_size);
+      length_ += disp_size;
+    }
+
+    void SetFixup(AssemblerFixup *fixup) { fixup_ = fixup; }
+
+  private:
+    uint8_t length_;
+    uint8_t encoding_[6];
+    AssemblerFixup *fixup_;
+
+    explicit Operand(GPRRegister reg) : fixup_(nullptr) { SetModRM(3, reg); }
+
+    // Get the operand encoding byte at the given index.
+    uint8_t encoding_at(intptr_t index) const {
+      assert(index >= 0 && index < length_);
+      return encoding_[index];
+    }
+
+    // Returns whether or not this operand is really the given register in
+    // disguise. Used from the assembler to generate better encodings.
+    bool IsRegister(GPRRegister reg) const {
+      return ((encoding_[0] & 0xF8) ==
+              0xC0) // Addressing mode is register only.
+             &&
+             ((encoding_[0] & 0x07) == reg); // Register codes match.
+    }
+
+    template <class> friend class AssemblerX86Base;
+  };
+
+  class Address : public Operand {
+    Address() = delete;
+
+  public:
+    Address(const Address &other) : Operand(other) {}
+
+    Address &operator=(const Address &other) {
+      Operand::operator=(other);
+      return *this;
+    }
+
+    Address(GPRRegister base, int32_t disp) {
+      if (disp == 0 && base != RegX8632::Encoded_Reg_ebp) {
+        SetModRM(0, base);
+        if (base == RegX8632::Encoded_Reg_esp)
+          SetSIB(TIMES_1, RegX8632::Encoded_Reg_esp, base);
+      } else if (Utils::IsInt(8, disp)) {
+        SetModRM(1, base);
+        if (base == RegX8632::Encoded_Reg_esp)
+          SetSIB(TIMES_1, RegX8632::Encoded_Reg_esp, base);
+        SetDisp8(disp);
+      } else {
+        SetModRM(2, base);
+        if (base == RegX8632::Encoded_Reg_esp)
+          SetSIB(TIMES_1, RegX8632::Encoded_Reg_esp, base);
+        SetDisp32(disp);
+      }
+    }
+
+    Address(GPRRegister index, ScaleFactor scale, int32_t disp) {
+      assert(index != RegX8632::Encoded_Reg_esp); // Illegal addressing mode.
+      SetModRM(0, RegX8632::Encoded_Reg_esp);
+      SetSIB(scale, index, RegX8632::Encoded_Reg_ebp);
+      SetDisp32(disp);
+    }
+
+    Address(GPRRegister base, GPRRegister index, ScaleFactor scale,
+            int32_t disp) {
+      assert(index != RegX8632::Encoded_Reg_esp); // Illegal addressing mode.
+      if (disp == 0 && base != RegX8632::Encoded_Reg_ebp) {
+        SetModRM(0, RegX8632::Encoded_Reg_esp);
+        SetSIB(scale, index, base);
+      } else if (Utils::IsInt(8, disp)) {
+        SetModRM(1, RegX8632::Encoded_Reg_esp);
+        SetSIB(scale, index, base);
+        SetDisp8(disp);
+      } else {
+        SetModRM(2, RegX8632::Encoded_Reg_esp);
+        SetSIB(scale, index, base);
+        SetDisp32(disp);
+      }
+    }
+
+    // AbsoluteTag is a special tag used by clients to create an absolute
+    // Address.
+    enum AbsoluteTag { ABSOLUTE };
+
+    Address(AbsoluteTag, const uintptr_t Addr) {
+      SetModRM(0, RegX8632::Encoded_Reg_ebp);
+      SetDisp32(Addr);
+    }
+
+    // TODO(jpp): remove this.
+    static Address Absolute(const uintptr_t Addr) {
+      return Address(ABSOLUTE, Addr);
+    }
+
+    Address(AbsoluteTag, RelocOffsetT Offset, AssemblerFixup *Fixup) {
+      SetModRM(0, RegX8632::Encoded_Reg_ebp);
+      // Use the Offset in the displacement for now. If we decide to process
+      // fixups later, we'll need to patch up the emitted displacement.
+      SetDisp32(Offset);
+      SetFixup(Fixup);
+    }
+
+    // TODO(jpp): remove this.
+    static Address Absolute(RelocOffsetT Offset, AssemblerFixup *Fixup) {
+      return Address(ABSOLUTE, Offset, Fixup);
+    }
+
+    static Address ofConstPool(Assembler *Asm, const Constant *Imm) {
+      AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Imm);
+      const RelocOffsetT Offset = 0;
+      return Address(ABSOLUTE, Offset, Fixup);
+    }
+  };
+
+  //----------------------------------------------------------------------------
+  //     __      ______  __     __  ______  ______  __  __   __  ______
+  //    /\ \    /\  __ \/\ \  _ \ \/\  ___\/\  == \/\ \/\ "-.\ \/\  ___\
+  //    \ \ \___\ \ \/\ \ \ \/ ".\ \ \  __\\ \  __<\ \ \ \ \-.  \ \ \__ \
+  //     \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
+  //      \/_____/\/_____/\/_/   \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
+  //
+  //----------------------------------------------------------------------------
+  enum InstructionSet {
+    Begin,
+    // SSE2 is the PNaCl baseline instruction set.
+    SSE2 = Begin,
+    SSE4_1,
+    End
+  };
+
+  // The maximum number of arguments to pass in XMM registers
+  static const uint32_t X86_MAX_XMM_ARGS = 4;
+  // The number of bits in a byte
+  static const uint32_t X86_CHAR_BIT = 8;
+  // Stack alignment. This is defined in IceTargetLoweringX8632.cpp because it
+  // is used as an argument to std::max(), and the default std::less<T> has an
+  // operator(T const&, T const&) which requires this member to have an address.
+  static const uint32_t X86_STACK_ALIGNMENT_BYTES;
+  // Size of the return address on the stack
+  static const uint32_t X86_RET_IP_SIZE_BYTES = 4;
+  // The number of different NOP instructions
+  static const uint32_t X86_NUM_NOP_VARIANTS = 5;
+
+  // Value is in bytes. Return Value adjusted to the next highest multiple
+  // of the stack alignment.
+  static uint32_t applyStackAlignment(uint32_t Value) {
+    return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
+  }
+
+  // Return the type which the elements of the vector have in the X86
+  // representation of the vector.
+  static Type getInVectorElementType(Type Ty) {
+    assert(isVectorType(Ty));
+    size_t Index = static_cast<size_t>(Ty);
+    (void)Index;
+    assert(Index < TableTypeX8632AttributesSize);
+    return TableTypeX8632Attributes[Ty].InVectorElementType;
+  }
+
+  // Note: The following data structures are defined in
+  // IceTargetLoweringX8632.cpp.
+
+  // The following table summarizes the logic for lowering the fcmp
+  // instruction.  There is one table entry for each of the 16 conditions.
+  //
+  // The first four columns describe the case when the operands are
+  // floating point scalar values.  A comment in lowerFcmp() describes the
+  // lowering template.  In the most general case, there is a compare
+  // followed by two conditional branches, because some fcmp conditions
+  // don't map to a single x86 conditional branch.  However, in many cases
+  // it is possible to swap the operands in the comparison and have a
+  // single conditional branch.  Since it's quite tedious to validate the
+  // table by hand, good execution tests are helpful.
+  //
+  // The last two columns describe the case when the operands are vectors
+  // of floating point values.  For most fcmp conditions, there is a clear
+  // mapping to a single x86 cmpps instruction variant.  Some fcmp
+  // conditions require special code to handle and these are marked in the
+  // table with a Cmpps_Invalid predicate.
+  static const struct TableFcmpType {
+    uint32_t Default;
+    bool SwapScalarOperands;
+    CondX86::BrCond C1, C2;
+    bool SwapVectorOperands;
+    CondX86::CmppsCond Predicate;
+  } TableFcmp[];
+  static const size_t TableFcmpSize;
+
+  // The following table summarizes the logic for lowering the icmp instruction
+  // for i32 and narrower types.  Each icmp condition has a clear mapping to an
+  // x86 conditional branch instruction.
+
+  static const struct TableIcmp32Type {
+    CondX86::BrCond Mapping;
+  } TableIcmp32[];
+  static const size_t TableIcmp32Size;
+
+  // The following table summarizes the logic for lowering the icmp instruction
+  // for the i64 type.  For Eq and Ne, two separate 32-bit comparisons and
+  // conditional branches are needed.  For the other conditions, three separate
+  // conditional branches are needed.
+  static const struct TableIcmp64Type {
+    CondX86::BrCond C1, C2, C3;
+  } TableIcmp64[];
+  static const size_t TableIcmp64Size;
+
+  static CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
+    size_t Index = static_cast<size_t>(Cond);
+    assert(Index < TableIcmp32Size);
+    return TableIcmp32[Index].Mapping;
+  }
+
+  static const struct TableTypeX8632AttributesType {
+    Type InVectorElementType;
+  } TableTypeX8632Attributes[];
+  static const size_t TableTypeX8632AttributesSize;
+};
+
+} // end of namespace X86Internal
+
+namespace X8632 {
+using Traits = ::Ice::X86Internal::MachineTraits<TargetX8632>;
+} // end of namespace X8632
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICETARGETLOWERINGX8632TRAITS_H
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 6c32f26..46c53f8 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -107,7 +107,9 @@
   void doLoadOpt();
   bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
 
-  SizeT getNumRegisters() const override { return RegX8632::Reg_NUM; }
+  SizeT getNumRegisters() const override {
+    return Traits::RegisterSet::Reg_NUM;
+  }
   Variable *getPhysicalRegister(SizeT RegNum, Type Ty = IceType_void) override;
   IceString getRegName(SizeT RegNum, Type Ty) const override;
   llvm::SmallBitVector getRegisterSet(RegSetMask Include,
@@ -117,7 +119,8 @@
   }
   bool hasFramePointer() const override { return IsEbpBasedFrame; }
   SizeT getFrameOrStackReg() const override {
-    return IsEbpBasedFrame ? RegX8632::Reg_ebp : RegX8632::Reg_esp;
+    return IsEbpBasedFrame ? Traits::RegisterSet::Reg_ebp
+                           : Traits::RegisterSet::Reg_esp;
   }
   size_t typeWidthInBytesOnStack(Type Ty) const override {
     // Round up to the next multiple of 4 bytes.  In particular, i1,
@@ -148,7 +151,8 @@
   Operand *hiOperand(Operand *Operand);
   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
                               size_t BasicFrameOffset, size_t &InArgsSizeBytes);
-  X8632::Address stackVarToAsmOperand(const Variable *Var) const final;
+  typename Traits::Address
+  stackVarToAsmOperand(const Variable *Var) const final;
 
   typename Traits::InstructionSet getInstructionSet() const final {
     return InstructionSet;
@@ -255,6 +259,7 @@
       llvm::SmallVectorImpl<int32_t> &Permutation,
       const llvm::SmallBitVector &ExcludeRegisters) const override;
 
+  // TODO(jpp): move the helper methods below to the MachineTraits.
   // The following are helpers that insert lowered x86 instructions
   // with minimal syntactic overhead, so that the lowering code can
   // look as close to assembly as practical.
@@ -272,7 +277,7 @@
   }
   void _adjust_stack(int32_t Amount) {
     Context.insert(InstX8632AdjustStack::create(
-        Func, Amount, getPhysicalRegister(RegX8632::Reg_esp)));
+        Func, Amount, getPhysicalRegister(Traits::RegisterSet::Reg_esp)));
   }
   void _addps(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Addps::create(Func, Dest, Src0));
@@ -289,7 +294,7 @@
   void _blendvps(Variable *Dest, Operand *Src0, Operand *Src1) {
     Context.insert(InstX8632Blendvps::create(Func, Dest, Src0, Src1));
   }
-  void _br(CondX86::BrCond Condition, CfgNode *TargetTrue,
+  void _br(typename Traits::Cond::BrCond Condition, CfgNode *TargetTrue,
            CfgNode *TargetFalse) {
     Context.insert(
         InstX8632Br::create(Func, TargetTrue, TargetFalse, Condition));
@@ -297,10 +302,10 @@
   void _br(CfgNode *Target) {
     Context.insert(InstX8632Br::create(Func, Target));
   }
-  void _br(CondX86::BrCond Condition, CfgNode *Target) {
+  void _br(typename Traits::Cond::BrCond Condition, CfgNode *Target) {
     Context.insert(InstX8632Br::create(Func, Target, Condition));
   }
-  void _br(CondX86::BrCond Condition, InstX8632Label *Label) {
+  void _br(typename Traits::Cond::BrCond Condition, InstX8632Label *Label) {
     Context.insert(InstX8632Br::create(Func, Label, Condition));
   }
   void _bsf(Variable *Dest, Operand *Src0) {
@@ -315,13 +320,15 @@
   void _cbwdq(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Cbwdq::create(Func, Dest, Src0));
   }
-  void _cmov(Variable *Dest, Operand *Src0, CondX86::BrCond Condition) {
+  void _cmov(Variable *Dest, Operand *Src0,
+             typename Traits::Cond::BrCond Condition) {
     Context.insert(InstX8632Cmov::create(Func, Dest, Src0, Condition));
   }
   void _cmp(Operand *Src0, Operand *Src1) {
     Context.insert(InstX8632Icmp::create(Func, Src0, Src1));
   }
-  void _cmpps(Variable *Dest, Operand *Src0, CondX86::CmppsCond Condition) {
+  void _cmpps(Variable *Dest, Operand *Src0,
+              typename Traits::Cond::CmppsCond Condition) {
     Context.insert(InstX8632Cmpps::create(Func, Dest, Src0, Condition));
   }
   void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired,
@@ -503,7 +510,7 @@
   void _sbb_rmw(OperandX8632Mem *DestSrc0, Operand *Src1) {
     Context.insert(InstX8632SbbRMW::create(Func, DestSrc0, Src1));
   }
-  void _setcc(Variable *Dest, CondX86::BrCond Condition) {
+  void _setcc(Variable *Dest, typename Traits::Cond::BrCond Condition) {
     Context.insert(InstX8632Setcc::create(Func, Dest, Condition));
   }
   void _shl(Variable *Dest, Operand *Src0) {
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index a2bf75f..9ebeb62 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -177,7 +177,7 @@
     return true;
   case PK_Fcmp:
     return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
-               .C2 != CondX86::Br_None;
+               .C2 != MachineTraits::Cond::Br_None;
   }
 }
 
@@ -285,19 +285,19 @@
   // TODO: Don't initialize IntegerRegisters and friends every time.
   // Instead, initialize in some sort of static initializer for the
   // class.
-  llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
-  llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
-  llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
-  llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
-  llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
-  ScratchRegs.resize(RegX8632::Reg_NUM);
+  llvm::SmallBitVector IntegerRegisters(Traits::RegisterSet::Reg_NUM);
+  llvm::SmallBitVector IntegerRegistersI8(Traits::RegisterSet::Reg_NUM);
+  llvm::SmallBitVector FloatRegisters(Traits::RegisterSet::Reg_NUM);
+  llvm::SmallBitVector VectorRegisters(Traits::RegisterSet::Reg_NUM);
+  llvm::SmallBitVector InvalidRegisters(Traits::RegisterSet::Reg_NUM);
+  ScratchRegs.resize(Traits::RegisterSet::Reg_NUM);
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
-  IntegerRegisters[RegX8632::val] = isInt;                                     \
-  IntegerRegistersI8[RegX8632::val] = isI8;                                    \
-  FloatRegisters[RegX8632::val] = isFP;                                        \
-  VectorRegisters[RegX8632::val] = isFP;                                       \
-  ScratchRegs[RegX8632::val] = scratch;
+  IntegerRegisters[Traits::RegisterSet::val] = isInt;                          \
+  IntegerRegistersI8[Traits::RegisterSet::val] = isI8;                         \
+  FloatRegisters[Traits::RegisterSet::val] = isFP;                             \
+  VectorRegisters[Traits::RegisterSet::val] = isFP;                            \
+  ScratchRegs[Traits::RegisterSet::val] = scratch;
   REGX8632_TABLE;
 #undef X
   TypeToRegisterSet[IceType_void] = InvalidRegisters;
@@ -740,7 +740,7 @@
   if (Ty == IceType_void)
     Ty = IceType_i32;
   if (PhysicalRegisters[Ty].empty())
-    PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);
+    PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
   assert(RegNum < PhysicalRegisters[Ty].size());
   Variable *Reg = PhysicalRegisters[Ty][RegNum];
   if (Reg == nullptr) {
@@ -749,7 +749,7 @@
     PhysicalRegisters[Ty][RegNum] = Reg;
     // Specially mark esp as an "argument" so that it is considered
     // live upon function entry.
-    if (RegNum == RegX8632::Reg_esp) {
+    if (RegNum == Traits::RegisterSet::Reg_esp) {
       Func->addImplicitArg(Reg);
       Reg->setIgnoreLiveness();
     }
@@ -759,7 +759,7 @@
 
 template <class Machine>
 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
-  assert(RegNum < RegX8632::Reg_NUM);
+  assert(RegNum < Traits::RegisterSet::Reg_NUM);
   static IceString RegNames8[] = {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
@@ -805,7 +805,7 @@
 }
 
 template <class Machine>
-X8632::Address
+typename TargetX86Base<Machine>::Traits::Address
 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
   if (Var->hasReg())
     llvm_unreachable("Stack Variable has a register assigned");
@@ -815,7 +815,8 @@
   int32_t Offset = Var->getStackOffset();
   if (!hasFramePointer())
     Offset += getStackAdjustment();
-  return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
+  return typename Traits::Address(
+      Traits::RegisterSet::getEncodedGPR(getFrameOrStackReg()), Offset);
 }
 
 template <class Machine> void TargetX86Base<Machine>::lowerArguments() {
@@ -837,7 +838,7 @@
     // Replace Arg in the argument list with the home register.  Then
     // generate an instruction in the prolog to copy the home register
     // to the assigned location of Arg.
-    int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
+    int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
     ++NumXmmArgs;
     Variable *RegisterArg = Func->template makeVariable(Ty);
     if (BuildDefs::dump())
@@ -997,8 +998,8 @@
     assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
                .count() == 0);
     PreservedRegsSizeBytes += 4;
-    Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
-    Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
+    Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
+    Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
     _push(ebp);
     _mov(ebp, esp);
     // Keep ebp live for late-stage liveness analysis
@@ -1033,7 +1034,7 @@
 
   // Generate "sub esp, SpillAreaSizeBytes"
   if (SpillAreaSizeBytes)
-    _sub(getPhysicalRegister(RegX8632::Reg_esp),
+    _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
          Ctx->getConstantInt32(SpillAreaSizeBytes));
   Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
 
@@ -1117,9 +1118,9 @@
   Context.init(Node);
   Context.setInsertPoint(InsertPoint);
 
-  Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
+  Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
   if (IsEbpBasedFrame) {
-    Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
+    Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
     // For late-stage liveness analysis (e.g. asm-verbose mode),
     // adding a fake use of esp before the assignment of esp=ebp keeps
     // previous esp adjustments from being dead-code eliminated.
@@ -1137,7 +1138,7 @@
       getRegisterSet(RegSet_CalleeSave, RegSet_None);
   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
     SizeT j = CalleeSaves.size() - i - 1;
-    if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
+    if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
       continue;
     if (CalleeSaves[j] && RegsUsed[j]) {
       _pop(getPhysicalRegister(j));
@@ -1155,7 +1156,7 @@
   // FakeUse <original_ret_operand>
   const SizeT BundleSize =
       1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
-  Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
+  Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
   _pop(T_ecx);
   _bundle_lock();
   _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
@@ -1273,26 +1274,26 @@
 llvm::SmallBitVector
 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
                                        RegSetMask Exclude) const {
-  llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
+  llvm::SmallBitVector Registers(Traits::RegisterSet::Reg_NUM);
 
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
   if (scratch && (Include & RegSet_CallerSave))                                \
-    Registers[RegX8632::val] = true;                                           \
+    Registers[Traits::RegisterSet::val] = true;                                \
   if (preserved && (Include & RegSet_CalleeSave))                              \
-    Registers[RegX8632::val] = true;                                           \
+    Registers[Traits::RegisterSet::val] = true;                                \
   if (stackptr && (Include & RegSet_StackPointer))                             \
-    Registers[RegX8632::val] = true;                                           \
+    Registers[Traits::RegisterSet::val] = true;                                \
   if (frameptr && (Include & RegSet_FramePointer))                             \
-    Registers[RegX8632::val] = true;                                           \
+    Registers[Traits::RegisterSet::val] = true;                                \
   if (scratch && (Exclude & RegSet_CallerSave))                                \
-    Registers[RegX8632::val] = false;                                          \
+    Registers[Traits::RegisterSet::val] = false;                               \
   if (preserved && (Exclude & RegSet_CalleeSave))                              \
-    Registers[RegX8632::val] = false;                                          \
+    Registers[Traits::RegisterSet::val] = false;                               \
   if (stackptr && (Exclude & RegSet_StackPointer))                             \
-    Registers[RegX8632::val] = false;                                          \
+    Registers[Traits::RegisterSet::val] = false;                               \
   if (frameptr && (Exclude & RegSet_FramePointer))                             \
-    Registers[RegX8632::val] = false;
+    Registers[Traits::RegisterSet::val] = false;
 
   REGX8632_TABLE
 
@@ -1312,7 +1313,7 @@
   NeedsStackAlignment = true;
 
   // TODO(stichnot): minimize the number of adjustments of esp, etc.
-  Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
+  Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
   Operand *TotalSize = legalize(Inst->getSizeInBytes());
   Variable *Dest = Inst->getDest();
   uint32_t AlignmentParam = Inst->getAlignInBytes();
@@ -1551,8 +1552,8 @@
       break;
     case InstArithmetic::Mul: {
       Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
-      Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);
-      Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);
+      Variable *T_4Lo = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+      Variable *T_4Hi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
       // gcc does the following:
       // a=b*c ==>
       //   t1 = b.hi; t1 *=(imul) c.lo
@@ -1569,7 +1570,7 @@
       _imul(T_1, Src1Lo);
       _mov(T_2, Src1Hi);
       _imul(T_2, Src0Lo);
-      _mov(T_3, Src0Lo, RegX8632::Reg_eax);
+      _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
       _mul(T_4Lo, T_3, Src1Lo);
       // The mul instruction produces two dest variables, edx:eax.  We
       // create a fake definition of edx to account for this.
@@ -1600,13 +1601,13 @@
       Constant *BitTest = Ctx->getConstantInt32(0x20);
       Constant *Zero = Ctx->getConstantZero(IceType_i32);
       InstX8632Label *Label = InstX8632Label::create(Func, this);
-      _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
+      _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
       _mov(T_2, Src0Lo);
       _mov(T_3, Src0Hi);
       _shld(T_3, T_2, T_1);
       _shl(T_2, T_1);
       _test(T_1, BitTest);
-      _br(CondX86::Br_e, Label);
+      _br(Traits::Cond::Br_e, Label);
       // T_2 and T_3 are being assigned again because of the
       // intra-block control flow, so we need the _mov_nonkillable
       // variant to avoid liveness problems.
@@ -1635,13 +1636,13 @@
       Constant *BitTest = Ctx->getConstantInt32(0x20);
       Constant *Zero = Ctx->getConstantZero(IceType_i32);
       InstX8632Label *Label = InstX8632Label::create(Func, this);
-      _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
+      _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
       _mov(T_2, Src0Lo);
       _mov(T_3, Src0Hi);
       _shrd(T_2, T_3, T_1);
       _shr(T_3, T_1);
       _test(T_1, BitTest);
-      _br(CondX86::Br_e, Label);
+      _br(Traits::Cond::Br_e, Label);
       // T_2 and T_3 are being assigned again because of the
       // intra-block control flow, so we need the _mov_nonkillable
       // variant to avoid liveness problems.
@@ -1670,13 +1671,13 @@
       Constant *BitTest = Ctx->getConstantInt32(0x20);
       Constant *SignExtend = Ctx->getConstantInt32(0x1f);
       InstX8632Label *Label = InstX8632Label::create(Func, this);
-      _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
+      _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
       _mov(T_2, Src0Lo);
       _mov(T_3, Src0Hi);
       _shrd(T_2, T_3, T_1);
       _sar(T_3, T_1);
       _test(T_1, BitTest);
-      _br(CondX86::Br_e, Label);
+      _br(Traits::Cond::Br_e, Label);
       // T_2 and T_3 are being assigned again because of the
       // intra-block control flow, so T_2 needs the _mov_nonkillable
       // variant to avoid liveness problems.  T_3 doesn't need special
@@ -1747,7 +1748,7 @@
       bool TypesAreValidForPmull =
           Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
       bool InstructionSetIsValidForPmull =
-          Dest->getType() == IceType_v8i16 || InstructionSet >= Machine::SSE4_1;
+          Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
       if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
         Variable *T = makeReg(Dest->getType());
         _movp(T, Src0);
@@ -1874,7 +1875,7 @@
     // The 8-bit version of imul only allows the form "imul r/m8"
     // where T must be in eax.
     if (isByteSizedArithType(Dest->getType())) {
-      _mov(T, Src0, RegX8632::Reg_eax);
+      _mov(T, Src0, Traits::RegisterSet::Reg_eax);
       Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
     } else {
       _mov(T, Src0);
@@ -1885,21 +1886,21 @@
   case InstArithmetic::Shl:
     _mov(T, Src0);
     if (!llvm::isa<Constant>(Src1))
-      Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
+      Src1 = legalizeToVar(Src1, Traits::RegisterSet::Reg_ecx);
     _shl(T, Src1);
     _mov(Dest, T);
     break;
   case InstArithmetic::Lshr:
     _mov(T, Src0);
     if (!llvm::isa<Constant>(Src1))
-      Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
+      Src1 = legalizeToVar(Src1, Traits::RegisterSet::Reg_ecx);
     _shr(T, Src1);
     _mov(Dest, T);
     break;
   case InstArithmetic::Ashr:
     _mov(T, Src0);
     if (!llvm::isa<Constant>(Src1))
-      Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
+      Src1 = legalizeToVar(Src1, Traits::RegisterSet::Reg_ecx);
     _sar(T, Src1);
     _mov(Dest, T);
     break;
@@ -1910,14 +1911,14 @@
     if (isByteSizedArithType(Dest->getType())) {
       Variable *T_ah = nullptr;
       Constant *Zero = Ctx->getConstantZero(IceType_i8);
-      _mov(T, Src0, RegX8632::Reg_eax);
-      _mov(T_ah, Zero, RegX8632::Reg_ah);
+      _mov(T, Src0, Traits::RegisterSet::Reg_eax);
+      _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);
       _div(T, Src1, T_ah);
       _mov(Dest, T);
     } else {
       Constant *Zero = Ctx->getConstantZero(IceType_i32);
-      _mov(T, Src0, RegX8632::Reg_eax);
-      _mov(T_edx, Zero, RegX8632::Reg_edx);
+      _mov(T, Src0, Traits::RegisterSet::Reg_eax);
+      _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
       _div(T, Src1, T_edx);
       _mov(Dest, T);
     }
@@ -1960,13 +1961,13 @@
     }
     Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
     if (isByteSizedArithType(Dest->getType())) {
-      _mov(T, Src0, RegX8632::Reg_eax);
+      _mov(T, Src0, Traits::RegisterSet::Reg_eax);
       _cbwdq(T, T);
       _idiv(T, Src1, T);
       _mov(Dest, T);
     } else {
-      T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
-      _mov(T, Src0, RegX8632::Reg_eax);
+      T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+      _mov(T, Src0, Traits::RegisterSet::Reg_eax);
       _cbwdq(T_edx, T);
       _idiv(T, Src1, T_edx);
       _mov(Dest, T);
@@ -1977,14 +1978,14 @@
     if (isByteSizedArithType(Dest->getType())) {
       Variable *T_ah = nullptr;
       Constant *Zero = Ctx->getConstantZero(IceType_i8);
-      _mov(T, Src0, RegX8632::Reg_eax);
-      _mov(T_ah, Zero, RegX8632::Reg_ah);
+      _mov(T, Src0, Traits::RegisterSet::Reg_eax);
+      _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);
       _div(T_ah, Src1, T);
       _mov(Dest, T_ah);
     } else {
       Constant *Zero = Ctx->getConstantZero(IceType_i32);
-      _mov(T_edx, Zero, RegX8632::Reg_edx);
-      _mov(T, Src0, RegX8632::Reg_eax);
+      _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
+      _mov(T, Src0, Traits::RegisterSet::Reg_eax);
       _div(T_edx, Src1, T);
       _mov(Dest, T_edx);
     }
@@ -2032,15 +2033,15 @@
     }
     Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
     if (isByteSizedArithType(Dest->getType())) {
-      Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
-      _mov(T, Src0, RegX8632::Reg_eax);
+      Variable *T_ah = makeReg(IceType_i8, Traits::RegisterSet::Reg_ah);
+      _mov(T, Src0, Traits::RegisterSet::Reg_eax);
       _cbwdq(T, T);
       Context.insert(InstFakeDef::create(Func, T_ah));
       _idiv(T_ah, Src1, T);
       _mov(Dest, T_ah);
     } else {
-      T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
-      _mov(T, Src0, RegX8632::Reg_eax);
+      T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+      _mov(T, Src0, Traits::RegisterSet::Reg_eax);
       _cbwdq(T_edx, T);
       _idiv(T_edx, Src1, T);
       _mov(Dest, T_edx);
@@ -2156,7 +2157,7 @@
   Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
   Constant *Zero = Ctx->getConstantZero(IceType_i32);
   _cmp(Src0, Zero);
-  _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
+  _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
 }
 
 template <class Machine>
@@ -2203,7 +2204,8 @@
         ParameterAreaSizeBytes =
             Traits::applyStackAlignment(ParameterAreaSizeBytes);
       }
-      Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
+      Variable *esp =
+          Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
       Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
       StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
       ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
@@ -2241,7 +2243,8 @@
   // code, as the memory operand displacements may end up being smaller
   // before any stack adjustment is done.
   for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
-    Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i);
+    Variable *Reg =
+        legalizeToVar(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
     // Generate a FakeUse of register arguments so that they do not get
     // dead code eliminated as a result of the FakeKill of scratch
     // registers after the call.
@@ -2264,11 +2267,11 @@
     case IceType_i8:
     case IceType_i16:
     case IceType_i32:
-      ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);
+      ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
       break;
     case IceType_i64:
-      ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);
-      ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);
+      ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+      ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
       break;
     case IceType_f32:
     case IceType_f64:
@@ -2282,7 +2285,7 @@
     case IceType_v8i16:
     case IceType_v4i32:
     case IceType_v4f32:
-      ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);
+      ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
       break;
     }
   }
@@ -2311,7 +2314,8 @@
   // Add the appropriate offset to esp.  The call instruction takes care
   // of resetting the stack offset during emission.
   if (ParameterAreaSizeBytes) {
-    Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
+    Variable *esp =
+        Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
     _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
   }
 
@@ -2850,7 +2854,7 @@
 
   // TODO(wala): Determine the best lowering sequences for each type.
   bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
-                     InstructionSet >= Machine::SSE4_1;
+                     InstructionSet >= Traits::SSE4_1;
   if (CanUsePextr && Ty != IceType_v4f32) {
     // Use pextrb, pextrw, or pextrd.
     Constant *Mask = Ctx->getConstantInt32(Index);
@@ -2943,8 +2947,9 @@
 
       switch (Condition) {
       default: {
-        CondX86::CmppsCond Predicate = Traits::TableFcmp[Index].Predicate;
-        assert(Predicate != CondX86::Cmpps_Invalid);
+        typename Traits::Cond::CmppsCond Predicate =
+            Traits::TableFcmp[Index].Predicate;
+        assert(Predicate != Traits::Cond::Cmpps_Invalid);
         T = makeReg(Src0RM->getType());
         _movp(T, Src0RM);
         _cmpps(T, Src1RM, Predicate);
@@ -2954,9 +2959,9 @@
         T = makeReg(Src0RM->getType());
         Variable *T2 = makeReg(Src0RM->getType());
         _movp(T, Src0RM);
-        _cmpps(T, Src1RM, CondX86::Cmpps_neq);
+        _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
         _movp(T2, Src0RM);
-        _cmpps(T2, Src1RM, CondX86::Cmpps_ord);
+        _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
         _pand(T, T2);
       } break;
       case InstFcmp::Ueq: {
@@ -2964,9 +2969,9 @@
         T = makeReg(Src0RM->getType());
         Variable *T2 = makeReg(Src0RM->getType());
         _movp(T, Src0RM);
-        _cmpps(T, Src1RM, CondX86::Cmpps_eq);
+        _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
         _movp(T2, Src0RM);
-        _cmpps(T2, Src1RM, CondX86::Cmpps_unord);
+        _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
         _por(T, T2);
       } break;
       }
@@ -2995,8 +3000,8 @@
   assert(Index < Traits::TableFcmpSize);
   if (Traits::TableFcmp[Index].SwapScalarOperands)
     std::swap(Src0, Src1);
-  bool HasC1 = (Traits::TableFcmp[Index].C1 != CondX86::Br_None);
-  bool HasC2 = (Traits::TableFcmp[Index].C2 != CondX86::Br_None);
+  bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None);
+  bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None);
   if (HasC1) {
     Src0 = legalize(Src0);
     Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
@@ -3154,9 +3159,9 @@
     InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
     _mov(Dest, One);
     _cmp(Src0HiRM, Src1HiRI);
-    if (Traits::TableIcmp64[Index].C1 != CondX86::Br_None)
+    if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
       _br(Traits::TableIcmp64[Index].C1, LabelTrue);
-    if (Traits::TableIcmp64[Index].C2 != CondX86::Br_None)
+    if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
       _br(Traits::TableIcmp64[Index].C2, LabelFalse);
     _cmp(Src0LoRM, Src1LoRI);
     _br(Traits::TableIcmp64[Index].C3, LabelTrue);
@@ -3198,7 +3203,7 @@
   }
 
   if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
-      InstructionSet >= Machine::SSE4_1) {
+      InstructionSet >= Traits::SSE4_1) {
     // Use insertps, pinsrb, pinsrw, or pinsrd.
     Operand *ElementRM =
         legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
@@ -3612,13 +3617,15 @@
     return;
   }
   case Intrinsics::Stacksave: {
-    Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
+    Variable *esp =
+        Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
     Variable *Dest = Instr->getDest();
     _mov(Dest, esp);
     return;
   }
   case Intrinsics::Stackrestore: {
-    Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
+    Variable *esp =
+        Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
     _mov_nonkillable(esp, Instr->getArg(0));
     return;
   }
@@ -3639,10 +3646,10 @@
   if (Expected->getType() == IceType_i64) {
     // Reserve the pre-colored registers first, before adding any more
     // infinite-weight variables from formMemoryOperand's legalization.
-    Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
-    Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
-    Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
-    Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
+    Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+    Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+    Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
+    Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
     _mov(T_eax, loOperand(Expected));
     _mov(T_edx, hiOperand(Expected));
     _mov(T_ebx, loOperand(Desired));
@@ -3656,7 +3663,7 @@
     _mov(DestHi, T_edx);
     return;
   }
-  Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
+  Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax);
   _mov(T_eax, Expected);
   OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
   Variable *DesiredReg = legalizeToVar(Desired);
@@ -3727,7 +3734,8 @@
           lowerAssign(PhiAssign);
           Context.advanceNext();
         }
-        _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
+        _br(Traits::Cond::Br_e, NextBr->getTargetTrue(),
+            NextBr->getTargetFalse());
         // Skip over the old compare and branch, by deleting them.
         NextCmp->setDeleted();
         NextBr->setDeleted();
@@ -3858,13 +3866,13 @@
   Val = legalize(Val);
   Type Ty = Val->getType();
   if (Ty == IceType_i64) {
-    Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
-    Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
+    Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+    Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
     OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
     _mov(T_eax, loOperand(Addr));
     _mov(T_edx, hiOperand(Addr));
-    Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
-    Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
+    Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
+    Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
     InstX8632Label *Label = InstX8632Label::create(Func, this);
     const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
     if (!IsXchg8b) {
@@ -3883,7 +3891,7 @@
     }
     const bool Locked = true;
     _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
-    _br(CondX86::Br_ne, Label);
+    _br(Traits::Cond::Br_ne, Label);
     if (!IsXchg8b) {
       // If Val is a variable, model the extended live range of Val through
       // the end of the loop, since it will be re-used by the loop.
@@ -3908,7 +3916,7 @@
     return;
   }
   OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
-  Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);
+  Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax);
   _mov(T_eax, Addr);
   InstX8632Label *Label = InstX8632Label::create(Func, this);
   Context.insert(Label);
@@ -3919,7 +3927,7 @@
   (this->*Op_Lo)(T, Val);
   const bool Locked = true;
   _cmpxchg(Addr, T_eax, T, Locked);
-  _br(CondX86::Br_ne, Label);
+  _br(Traits::Cond::Br_ne, Label);
   // If Val is a variable, model the extended live range of Val through
   // the end of the loop, since it will be re-used by the loop.
   if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
@@ -3983,7 +3991,7 @@
     Constant *SixtyThree = Ctx->getConstantInt32(63);
     _mov(T_Dest, SixtyThree);
   }
-  _cmov(T_Dest, T, CondX86::Br_ne);
+  _cmov(T_Dest, T, Traits::Cond::Br_ne);
   if (!Cttz) {
     _xor(T_Dest, ThirtyOne);
   }
@@ -4004,7 +4012,7 @@
     _xor(T_Dest2, ThirtyOne);
   }
   _test(SecondVar, SecondVar);
-  _cmov(T_Dest2, T_Dest, CondX86::Br_e);
+  _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
   _mov(DestLo, T_Dest2);
   _mov(DestHi, Ctx->getConstantZero(IceType_i32));
 }
@@ -4306,16 +4314,18 @@
   if (Inst->hasRetValue()) {
     Operand *Src0 = legalize(Inst->getRetValue());
     if (Src0->getType() == IceType_i64) {
-      Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
-      Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
+      Variable *eax =
+          legalizeToVar(loOperand(Src0), Traits::RegisterSet::Reg_eax);
+      Variable *edx =
+          legalizeToVar(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
       Reg = eax;
       Context.insert(InstFakeUse::create(Func, edx));
     } else if (isScalarFloatingType(Src0->getType())) {
       _fld(Src0);
     } else if (isVectorType(Src0->getType())) {
-      Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
+      Reg = legalizeToVar(Src0, Traits::RegisterSet::Reg_xmm0);
     } else {
-      _mov(Reg, Src0, RegX8632::Reg_eax);
+      _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
     }
   }
   // Add a ret instruction even if sandboxing is enabled, because
@@ -4327,7 +4337,8 @@
   // eliminated.  TODO: Are there more places where the fake use
   // should be inserted?  E.g. "void f(int n){while(1) g(n);}" may not
   // have a ret instruction.
-  Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
+  Variable *esp =
+      Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
   Context.insert(InstFakeUse::create(Func, esp));
 }
 
@@ -4344,7 +4355,7 @@
     Variable *T = makeReg(SrcTy);
     Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
     Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
-    if (InstructionSet >= Machine::SSE4_1) {
+    if (InstructionSet >= Traits::SSE4_1) {
       // TODO(wala): If the condition operand is a constant, use blendps
       // or pblendw.
       //
@@ -4352,7 +4363,7 @@
       if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
           SrcTy == IceType_v4f32) {
         Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
-        Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
+        Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
         _movp(xmm0, ConditionRM);
         _psll(xmm0, Ctx->getConstantInt8(31));
         _movp(T, SrcFRM);
@@ -4362,7 +4373,7 @@
         assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
         Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
                                                               : IceType_v16i8;
-        Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
+        Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
         lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
         _movp(T, SrcFRM);
         _pblendvb(T, SrcTRM, xmm0);
@@ -4370,7 +4381,7 @@
       }
       return;
     }
-    // Lower select without Machine::SSE4.1:
+    // Lower select without Traits::SSE4.1:
     // a=d?b:c ==>
     //   if elementtype(d) != i1:
     //      d=sext(d);
@@ -4397,7 +4408,7 @@
     return;
   }
 
-  CondX86::BrCond Cond = CondX86::Br_ne;
+  typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne;
   Operand *CmpOpnd0 = nullptr;
   Operand *CmpOpnd1 = nullptr;
   // Handle folding opportunities.
@@ -4542,9 +4553,9 @@
       Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
       InstX8632Label *Label = InstX8632Label::create(Func, this);
       _cmp(Src0Lo, ValueLo);
-      _br(CondX86::Br_ne, Label);
+      _br(Traits::Cond::Br_ne, Label);
       _cmp(Src0Hi, ValueHi);
-      _br(CondX86::Br_e, Inst->getLabel(I));
+      _br(Traits::Cond::Br_e, Inst->getLabel(I));
       Context.insert(Label);
     }
     _br(Inst->getLabelDefault());
@@ -4559,7 +4570,7 @@
   for (SizeT I = 0; I < NumCases; ++I) {
     Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
     _cmp(Src0, Value);
-    _br(CondX86::Br_e, Inst->getLabel(I));
+    _br(Traits::Cond::Br_e, Inst->getLabel(I));
   }
 
   _br(Inst->getLabelDefault());
@@ -5208,7 +5219,7 @@
     const llvm::SmallBitVector &ExcludeRegisters) const {
   // TODO(stichnot): Declaring Permutation this way loses type/size
   // information.  Fix this in conjunction with the caller-side TODO.
-  assert(Permutation.size() >= RegX8632::Reg_NUM);
+  assert(Permutation.size() >= Traits::RegisterSet::Reg_NUM);
   // Expected upper bound on the number of registers in a single
   // equivalence class.  For x86-32, this would comprise the 8 XMM
   // registers.  This is for performance, not correctness.
@@ -5223,15 +5234,15 @@
 // explicitly excluded from shuffling.
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
-  if (ExcludeRegisters[RegX8632::val]) {                                       \
+  if (ExcludeRegisters[Traits::RegisterSet::val]) {                            \
     /* val stays the same in the resulting permutation. */                     \
-    Permutation[RegX8632::val] = RegX8632::val;                                \
+    Permutation[Traits::RegisterSet::val] = Traits::RegisterSet::val;          \
     ++NumPreserved;                                                            \
   } else {                                                                     \
     const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) |   \
                            (isInt << 3) | (isFP << 4);                         \
     /* val is assigned to an equivalence class based on its properties. */     \
-    EquivalenceClasses[Index].push_back(RegX8632::val);                        \
+    EquivalenceClasses[Index].push_back(Traits::RegisterSet::val);             \
   }
   REGX8632_TABLE
 #undef X
@@ -5249,7 +5260,7 @@
     }
   }
 
-  assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM);
+  assert(NumShuffled + NumPreserved == Traits::RegisterSet::Reg_NUM);
 
   if (Func->isVerbose(IceV_Random)) {
     OstreamLocker L(Func->getContext());