De-templatize the IceAssemblerX86Base class

IceAssemblerX86Base<TargetX8664Traits> becomes IceAssemblerX8664.
IceAssemblerX86Base<TargetX8632Traits> becomes IceAssemblerX8632.

Invalid instructions were removed from the respective assembler. Also
note x86-32 does not emit any REX bytes.

Bug: b/192890685
Change-Id: I966ba9aa774f99d61ad874dd761d22f585c72c94
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/55568
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Sean Risser <srisser@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Reactor/BUILD.gn b/src/Reactor/BUILD.gn
index bba64dd..737e00f 100644
--- a/src/Reactor/BUILD.gn
+++ b/src/Reactor/BUILD.gn
@@ -254,11 +254,13 @@
 
     if (current_cpu == "x64") {
       sources += [
+        "$subzero_dir/src/IceAssemblerX8664.cpp",
         "$subzero_dir/src/IceInstX8664.cpp",
         "$subzero_dir/src/IceTargetLoweringX8664.cpp",
       ]
     } else if (current_cpu == "x86") {
       sources += [
+        "$subzero_dir/src/IceAssemblerX8632.cpp",
         "$subzero_dir/src/IceInstX8632.cpp",
         "$subzero_dir/src/IceTargetLoweringX8632.cpp",
       ]
diff --git a/third_party/subzero/CMakeLists.txt b/third_party/subzero/CMakeLists.txt
index 995ac6d..aec878b 100644
--- a/third_party/subzero/CMakeLists.txt
+++ b/third_party/subzero/CMakeLists.txt
@@ -48,12 +48,14 @@
 
 if(ARCH STREQUAL "x86_64")
     list(APPEND SUBZERO_SRC_FILES
+        src/IceAssemblerX8664.cpp
         src/IceInstX8664.cpp
         src/IceTargetLoweringX8664.cpp
     )
     set(SUBZERO_TARGET_CPU X8664)
 elseif(ARCH STREQUAL "x86")
     list(APPEND SUBZERO_SRC_FILES
+        src/IceAssemblerX8632.cpp
         src/IceInstX8632.cpp
         src/IceTargetLoweringX8632.cpp
     )
diff --git a/third_party/subzero/src/IceAssemblerX8632.cpp b/third_party/subzero/src/IceAssemblerX8632.cpp
new file mode 100644
index 0000000..edd155d
--- /dev/null
+++ b/third_party/subzero/src/IceAssemblerX8632.cpp
@@ -0,0 +1,3162 @@
+//===- subzero/src/IceAssemblerX8632Impl.h - base x86 assembler -*- C++ -*-=//
+// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+//
+// Modified by the Subzero authors.
+//
+//===----------------------------------------------------------------------===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Implements the AssemblerX8632 class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IceAssemblerX8632.h"
+
+#include "IceCfg.h"
+#include "IceCfgNode.h"
+#include "IceOperand.h"
+
+namespace Ice {
+namespace X8632 {
+
+AssemblerX8632::~AssemblerX8632() {
+  if (BuildDefs::asserts()) {
+    for (const Label *Label : CfgNodeLabels) {
+      Label->finalCheck();
+    }
+    for (const Label *Label : LocalLabels) {
+      Label->finalCheck();
+    }
+  }
+}
+
+void AssemblerX8632::alignFunction() {
+  const SizeT Align = 1 << getBundleAlignLog2Bytes();
+  SizeT BytesNeeded = Utils::OffsetToAlignment(Buffer.getPosition(), Align);
+  constexpr SizeT HltSize = 1;
+  while (BytesNeeded > 0) {
+    hlt();
+    BytesNeeded -= HltSize;
+  }
+}
+
+typename AssemblerX8632::Label *
+AssemblerX8632::getOrCreateLabel(SizeT Number, LabelVector &Labels) {
+  Label *L = nullptr;
+  if (Number == Labels.size()) {
+    L = new (this->allocate<Label>()) Label();
+    Labels.push_back(L);
+    return L;
+  }
+  if (Number > Labels.size()) {
+    Utils::reserveAndResize(Labels, Number + 1);
+  }
+  L = Labels[Number];
+  if (!L) {
+    L = new (this->allocate<Label>()) Label();
+    Labels[Number] = L;
+  }
+  return L;
+}
+
+Ice::Label *AssemblerX8632::getCfgNodeLabel(SizeT NodeNumber) {
+  assert(NodeNumber < CfgNodeLabels.size());
+  return CfgNodeLabels[NodeNumber];
+}
+
+typename AssemblerX8632::Label *
+AssemblerX8632::getOrCreateCfgNodeLabel(SizeT NodeNumber) {
+  return getOrCreateLabel(NodeNumber, CfgNodeLabels);
+}
+
+typename AssemblerX8632::Label *
+AssemblerX8632::getOrCreateLocalLabel(SizeT Number) {
+  return getOrCreateLabel(Number, LocalLabels);
+}
+
+void AssemblerX8632::bindCfgNodeLabel(const CfgNode *Node) {
+  assert(!getPreliminary());
+  Label *L = getOrCreateCfgNodeLabel(Node->getIndex());
+  this->bind(L);
+}
+
+void AssemblerX8632::bindLocalLabel(SizeT Number) {
+  Label *L = getOrCreateLocalLabel(Number);
+  if (!getPreliminary())
+    this->bind(L);
+}
+
+void AssemblerX8632::call(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitRegisterOperand(2, gprEncoding(reg));
+}
+
+void AssemblerX8632::call(const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitOperand(2, address);
+}
+
+void AssemblerX8632::call(const ConstantRelocatable *label) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  intptr_t call_start = Buffer.getPosition();
+  emitUint8(0xE8);
+  auto *Fixup = this->createFixup(Traits::FK_PcRel, label);
+  Fixup->set_addend(-4);
+  emitFixup(Fixup);
+  emitInt32(0);
+  assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
+  (void)call_start;
+}
+
+void AssemblerX8632::call(const Immediate &abs_address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  intptr_t call_start = Buffer.getPosition();
+  emitUint8(0xE8);
+  auto *Fixup = this->createFixup(Traits::FK_PcRel, AssemblerFixup::NullSymbol);
+  Fixup->set_addend(abs_address.value() - 4);
+  emitFixup(Fixup);
+  emitInt32(0);
+  assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
+  (void)call_start;
+}
+
+void AssemblerX8632::pushl(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x50 + gprEncoding(reg));
+}
+
+void AssemblerX8632::pushl(const Immediate &Imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x68);
+  emitInt32(Imm.value());
+}
+
+void AssemblerX8632::pushl(const ConstantRelocatable *Label) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x68);
+  emitFixup(this->createFixup(Traits::FK_Abs, Label));
+  // In x86-32, the emitted value is an addend to the relocation. Therefore, we
+  // must emit a 0 (because we're pushing an absolute relocation.)
+  // In x86-64, the emitted value does not matter (the addend lives in the
+  // relocation record as an extra field.)
+  emitInt32(0);
+}
+
+void AssemblerX8632::popl(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // Any type that would not force a REX prefix to be emitted can be provided
+  // here.
+  emitUint8(0x58 + gprEncoding(reg));
+}
+
+void AssemblerX8632::popl(const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x8F);
+  emitOperand(0, address);
+}
+
+void AssemblerX8632::pushal() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x60);
+}
+
+void AssemblerX8632::popal() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x61);
+}
+
+void AssemblerX8632::setcc(BrCond condition, ByteRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x90 + condition);
+  emitUint8(0xC0 + gprEncoding(dst));
+}
+
+void AssemblerX8632::setcc(BrCond condition, const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x90 + condition);
+  emitOperand(0, address);
+}
+
+void AssemblerX8632::mov(Type Ty, GPRRegister dst, const Immediate &imm) {
+  assert(Ty != IceType_i64 && "i64 not supported yet.");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0xB0 + gprEncoding(dst));
+    emitUint8(imm.value() & 0xFF);
+  } else {
+    // TODO(jpp): When removing the assertion above ensure that in x86-64 we
+    // emit a 64-bit immediate.
+    emitUint8(0xB8 + gprEncoding(dst));
+    emitImmediate(Ty, imm);
+  }
+}
+
+void AssemblerX8632::mov(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0x88);
+  } else {
+    emitUint8(0x89);
+  }
+  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
+}
+
+void AssemblerX8632::mov(Type Ty, GPRRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0x8A);
+  } else {
+    emitUint8(0x8B);
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::mov(Type Ty, const Address &dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0x88);
+  } else {
+    emitUint8(0x89);
+  }
+  emitOperand(gprEncoding(src), dst);
+}
+
+void AssemblerX8632::mov(Type Ty, const Address &dst, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitUint8(0xC6);
+    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
+    emitOperand(0, dst, OffsetFromNextInstruction);
+    emitUint8(imm.value() & 0xFF);
+  } else {
+    emitUint8(0xC7);
+    const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
+    emitOperand(0, dst, OffsetFromNextInstruction);
+    emitImmediate(Ty, imm);
+  }
+}
+
+void AssemblerX8632::movzx(Type SrcTy, GPRRegister dst, GPRRegister src) {
+  if (Traits::Is64Bit && SrcTy == IceType_i32) {
+    // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
+    // operand to 64-bit.
+    mov(IceType_i32, dst, src);
+    return;
+  }
+
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
+  emitUint8(0x0F);
+  emitUint8(ByteSized ? 0xB6 : 0xB7);
+  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
+}
+
+void AssemblerX8632::movzx(Type SrcTy, GPRRegister dst, const Address &src) {
+  if (Traits::Is64Bit && SrcTy == IceType_i32) {
+    // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
+    // operand to 64-bit.
+    mov(IceType_i32, dst, src);
+    return;
+  }
+
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
+  emitUint8(0x0F);
+  emitUint8(ByteSized ? 0xB6 : 0xB7);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::movsx(Type SrcTy, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  if (ByteSized || SrcTy == IceType_i16) {
+    emitUint8(0x0F);
+    emitUint8(ByteSized ? 0xBE : 0xBF);
+  } else {
+    assert(Traits::Is64Bit && SrcTy == IceType_i32);
+    emitUint8(0x63);
+  }
+  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
+}
+
+void AssemblerX8632::movsx(Type SrcTy, GPRRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  bool ByteSized = isByteSizedType(SrcTy);
+  if (ByteSized || SrcTy == IceType_i16) {
+    emitUint8(0x0F);
+    emitUint8(ByteSized ? 0xBE : 0xBF);
+  } else {
+    assert(Traits::Is64Bit && SrcTy == IceType_i32);
+    emitUint8(0x63);
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::lea(Type Ty, GPRRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
+         (Traits::Is64Bit && Ty == IceType_i64));
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x8D);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::cmov(Type Ty, BrCond cond, GPRRegister dst,
+                          GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  else
+    assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
+  emitUint8(0x0F);
+  emitUint8(0x40 + cond);
+  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
+}
+
+void AssemblerX8632::cmov(Type Ty, BrCond cond, GPRRegister dst,
+                          const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  else
+    assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
+  emitUint8(0x0F);
+  emitUint8(0x40 + cond);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::rep_movsb() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0xA4);
+}
+
+void AssemblerX8632::movss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x10);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::movss(Type Ty, const Address &dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x11);
+  emitOperand(gprEncoding(src), dst);
+}
+
+void AssemblerX8632::movss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x11);
+  emitXmmRegisterOperand(src, dst);
+}
+
+void AssemblerX8632::movd(Type SrcTy, XmmRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x6E);
+  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
+}
+
+void AssemblerX8632::movd(Type SrcTy, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x6E);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::movd(Type DestTy, GPRRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
+}
+
+void AssemblerX8632::movd(Type DestTy, const Address &dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitOperand(gprEncoding(src), dst);
+}
+
+void AssemblerX8632::movq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movq(const Address &dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xD6);
+  emitOperand(gprEncoding(src), dst);
+}
+
+void AssemblerX8632::movq(XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x7E);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::addss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::addss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::subss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::subss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::mulss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::mulss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::divss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::divss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::fld(Type Ty, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
+  emitOperand(0, src);
+}
+
+void AssemblerX8632::fstp(Type Ty, const Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
+  emitOperand(3, dst);
+}
+
+void AssemblerX8632::fstp(RegX8632::X87STRegister st) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDD);
+  emitUint8(0xD8 + st);
+}
+
+void AssemblerX8632::movaps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x28);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movups(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x10);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movups(XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x10);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::movups(const Address &dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x11);
+  emitOperand(gprEncoding(src), dst);
+}
+
+void AssemblerX8632::padd(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xFC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xFD);
+  } else {
+    emitUint8(0xFE);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::padd(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xFC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xFD);
+  } else {
+    emitUint8(0xFE);
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::padds(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xEC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xED);
+  } else {
+    assert(false && "Unexpected padds operand type");
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::padds(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xEC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xED);
+  } else {
+    assert(false && "Unexpected padds operand type");
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::paddus(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xDC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xDD);
+  } else {
+    assert(false && "Unexpected paddus operand type");
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::paddus(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xDC);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xDD);
+  } else {
+    assert(false && "Unexpected paddus operand type");
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::pand(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDB);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pand(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDB);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::pandn(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDF);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pandn(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xDF);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::pmull(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD5);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x38);
+    emitUint8(0x40);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pmull(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD5);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x38);
+    emitUint8(0x40);
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::pmulhw(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xE5);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pmulhw(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xE5);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::pmulhuw(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xE4);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pmulhuw(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xE4);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::pmaddwd(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xF5);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pmaddwd(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  assert(Ty == IceType_v8i16);
+  (void)Ty;
+  emitUint8(0xF5);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::pmuludq(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xF4);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pmuludq(Type /* Ty */, XmmRegister dst,
+                             const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xF4);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::por(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEB);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::por(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEB);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::psub(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xF8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xF9);
+  } else {
+    emitUint8(0xFA);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::psub(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xF8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xF9);
+  } else {
+    emitUint8(0xFA);
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::psubs(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xE8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xE9);
+  } else {
+    assert(false && "Unexpected psubs operand type");
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::psubs(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xE8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xE9);
+  } else {
+    assert(false && "Unexpected psubs operand type");
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::psubus(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xD8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xD9);
+  } else {
+    assert(false && "Unexpected psubus operand type");
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::psubus(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0xD8);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0xD9);
+  } else {
+    assert(false && "Unexpected psubus operand type");
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::pxor(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEF);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pxor(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xEF);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::psll(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xF1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xF2);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::psll(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xF1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xF2);
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::psll(Type Ty, XmmRegister dst, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_int8());
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0x71);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x72);
+  }
+  emitRegisterOperand(6, gprEncoding(dst));
+  emitUint8(imm.value() & 0xFF);
+}
+
+void AssemblerX8632::psra(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xE1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xE2);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::psra(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xE1);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0xE2);
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::psra(Type Ty, XmmRegister dst, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_int8());
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0x71);
+  } else {
+    assert(Ty == IceType_i32);
+    emitUint8(0x72);
+  }
+  emitRegisterOperand(4, gprEncoding(dst));
+  emitUint8(imm.value() & 0xFF);
+}
+
+void AssemblerX8632::psrl(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD1);
+  } else if (Ty == IceType_f64) {
+    emitUint8(0xD3);
+  } else {
+    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
+    emitUint8(0xD2);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::psrl(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xD1);
+  } else if (Ty == IceType_f64) {
+    emitUint8(0xD3);
+  } else {
+    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
+    emitUint8(0xD2);
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::psrl(Type Ty, XmmRegister dst, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_int8());
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0x71);
+  } else if (Ty == IceType_f64) {
+    emitUint8(0x73);
+  } else {
+    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
+    emitUint8(0x72);
+  }
+  emitRegisterOperand(2, gprEncoding(dst));
+  emitUint8(imm.value() & 0xFF);
+}
+
+// {add,sub,mul,div}ps are given a Ty parameter for consistency with
+// {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows addpd, etc.,
+// we can use the Ty parameter to decide on adding a 0x66 prefix.
+
+void AssemblerX8632::addps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::addps(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x58);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::subps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::subps(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5C);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::divps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::divps(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5E);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::mulps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::mulps(Type /* Ty */, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x59);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::minps(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x5D);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::minps(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x5D);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::minss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5D);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::minss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5D);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::maxps(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x5F);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::maxps(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x5F);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::maxss(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5F);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::maxss(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5F);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::andnps(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x55);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::andnps(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x55);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::andps(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x54);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::andps(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x54);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::orps(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x56);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::orps(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x56);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::blendvps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x14);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::blendvps(Type /* Ty */, XmmRegister dst,
+                              const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x14);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::pblendvb(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x10);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pblendvb(Type /* Ty */, XmmRegister dst,
+                              const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x10);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::cmpps(Type Ty, XmmRegister dst, XmmRegister src,
+                           CmppsCond CmpCondition) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_f64)
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xC2);
+  emitXmmRegisterOperand(dst, src);
+  emitUint8(CmpCondition);
+}
+
+void AssemblerX8632::cmpps(Type Ty, XmmRegister dst, const Address &src,
+                           CmppsCond CmpCondition) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_f64)
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0xC2);
+  static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
+  emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
+  emitUint8(CmpCondition);
+}
+
+void AssemblerX8632::sqrtps(XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+void AssemblerX8632::rsqrtps(XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x52);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+void AssemblerX8632::reciprocalps(XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x53);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+void AssemblerX8632::movhlps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x12);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::movlhps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x16);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::unpcklps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x14);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::unpckhps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x15);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::unpcklpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x14);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::unpckhpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x15);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::set1ps(XmmRegister dst, GPRRegister tmp1,
+                            const Immediate &imm) {
+  // Load 32-bit immediate value into tmp1.
+  mov(IceType_i32, tmp1, imm);
+  // Move value from tmp1 into dst.
+  movd(IceType_i32, dst, tmp1);
+  // Broadcast low lane into other three lanes.
+  shufps(RexTypeIrrelevant, dst, dst, Immediate(0x0));
+}
+
+void AssemblerX8632::pshufb(Type /* Ty */, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x00);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pshufb(Type /* Ty */, XmmRegister dst,
+                            const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x00);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::pshufd(Type /* Ty */, XmmRegister dst, XmmRegister src,
+                            const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x70);
+  emitXmmRegisterOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::pshufd(Type /* Ty */, XmmRegister dst, const Address &src,
+                            const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x70);
+  static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
+  emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::punpckl(Type Ty, XmmRegister Dst, XmmRegister Src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+    emitUint8(0x62);
+  } else if (Ty == IceType_v8i16) {
+    emitUint8(0x61);
+  } else if (Ty == IceType_v16i8) {
+    emitUint8(0x60);
+  } else {
+    assert(false && "Unexpected vector unpack operand type");
+  }
+  emitXmmRegisterOperand(Dst, Src);
+}
+
+void AssemblerX8632::punpckl(Type Ty, XmmRegister Dst, const Address &Src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+    emitUint8(0x62);
+  } else if (Ty == IceType_v8i16) {
+    emitUint8(0x61);
+  } else if (Ty == IceType_v16i8) {
+    emitUint8(0x60);
+  } else {
+    assert(false && "Unexpected vector unpack operand type");
+  }
+  emitOperand(gprEncoding(Dst), Src);
+}
+
+void AssemblerX8632::punpckh(Type Ty, XmmRegister Dst, XmmRegister Src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+    emitUint8(0x6A);
+  } else if (Ty == IceType_v8i16) {
+    emitUint8(0x69);
+  } else if (Ty == IceType_v16i8) {
+    emitUint8(0x68);
+  } else {
+    assert(false && "Unexpected vector unpack operand type");
+  }
+  emitXmmRegisterOperand(Dst, Src);
+}
+
+void AssemblerX8632::punpckh(Type Ty, XmmRegister Dst, const Address &Src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+    emitUint8(0x6A);
+  } else if (Ty == IceType_v8i16) {
+    emitUint8(0x69);
+  } else if (Ty == IceType_v16i8) {
+    emitUint8(0x68);
+  } else {
+    assert(false && "Unexpected vector unpack operand type");
+  }
+  emitOperand(gprEncoding(Dst), Src);
+}
+
+void AssemblerX8632::packss(Type Ty, XmmRegister Dst, XmmRegister Src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+    emitUint8(0x6B);
+  } else if (Ty == IceType_v8i16) {
+    emitUint8(0x63);
+  } else {
+    assert(false && "Unexpected vector pack operand type");
+  }
+  emitXmmRegisterOperand(Dst, Src);
+}
+
+void AssemblerX8632::packss(Type Ty, XmmRegister Dst, const Address &Src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+    emitUint8(0x6B);
+  } else if (Ty == IceType_v8i16) {
+    emitUint8(0x63);
+  } else {
+    assert(false && "Unexpected vector pack operand type");
+  }
+  emitOperand(gprEncoding(Dst), Src);
+}
+
+void AssemblerX8632::packus(Type Ty, XmmRegister Dst, XmmRegister Src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+    emitUint8(0x38);
+    emitUint8(0x2B);
+  } else if (Ty == IceType_v8i16) {
+    emitUint8(0x67);
+  } else {
+    assert(false && "Unexpected vector pack operand type");
+  }
+  emitXmmRegisterOperand(Dst, Src);
+}
+
+void AssemblerX8632::packus(Type Ty, XmmRegister Dst, const Address &Src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
+    emitUint8(0x38);
+    emitUint8(0x2B);
+  } else if (Ty == IceType_v8i16) {
+    emitUint8(0x67);
+  } else {
+    assert(false && "Unexpected vector pack operand type");
+  }
+  emitOperand(gprEncoding(Dst), Src);
+}
+
+void AssemblerX8632::shufps(Type /* Ty */, XmmRegister dst, XmmRegister src,
+                            const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC6);
+  emitXmmRegisterOperand(dst, src);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::shufps(Type /* Ty */, XmmRegister dst, const Address &src,
+                            const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xC6);
+  static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
+  emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
+  assert(imm.is_uint8());
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::sqrtpd(XmmRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitXmmRegisterOperand(dst, dst);
+}
+
+void AssemblerX8632::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
+                              XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
+                              const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::cvttps2dq(Type /* Ignore */, XmmRegister dst,
+                               XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvttps2dq(Type /* Ignore */, XmmRegister dst,
+                               const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF3);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::cvtps2dq(Type /* Ignore */, XmmRegister dst,
+                              XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvtps2dq(Type /* Ignore */, XmmRegister dst,
+                              const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x5B);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy,
+                              GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2A);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy,
+                              const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2A);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::cvtfloat2float(Type SrcTy, XmmRegister dst,
+                                    XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // ss2sd or sd2ss
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5A);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvtfloat2float(Type SrcTy, XmmRegister dst,
+                                    const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x5A);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy,
+                               XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2C);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy,
+                               const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2C);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy,
+                              XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2D);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy,
+                              const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x2D);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::ucomiss(Type Ty, XmmRegister a, XmmRegister b) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_f64)
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x2E);
+  emitXmmRegisterOperand(a, b);
+}
+
+void AssemblerX8632::ucomiss(Type Ty, XmmRegister a, const Address &b) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_f64)
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x2E);
+  emitOperand(gprEncoding(a), b);
+}
+
+void AssemblerX8632::movmsk(Type Ty, GPRRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_v16i8) {
+    emitUint8(0x66);
+  } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) {
+    // No operand size prefix
+  } else {
+    assert(false && "Unexpected movmsk operand type");
+  }
+  emitUint8(0x0F);
+  if (Ty == IceType_v16i8) {
+    emitUint8(0xD7);
+  } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) {
+    emitUint8(0x50);
+  } else {
+    assert(false && "Unexpected movmsk operand type");
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::sqrt(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (isScalarFloatingType(Ty))
+    emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::sqrt(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (isScalarFloatingType(Ty))
+    emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+  emitUint8(0x0F);
+  emitUint8(0x51);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::xorps(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x57);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::xorps(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (!isFloat32Asserting32Or64(Ty))
+    emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x57);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::insertps(Type Ty, XmmRegister dst, XmmRegister src,
+                              const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  assert(isVectorFloatingType(Ty));
+  (void)Ty;
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x3A);
+  emitUint8(0x21);
+  emitXmmRegisterOperand(dst, src);
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::insertps(Type Ty, XmmRegister dst, const Address &src,
+                              const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  assert(isVectorFloatingType(Ty));
+  (void)Ty;
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x3A);
+  emitUint8(0x21);
+  static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
+  emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::pinsr(Type Ty, XmmRegister dst, GPRRegister src,
+                           const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xC4);
+  } else {
+    emitUint8(0x3A);
+    emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
+  }
+  emitXmmRegisterOperand(dst, src);
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::pinsr(Type Ty, XmmRegister dst, const Address &src,
+                           const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (Ty == IceType_i16) {
+    emitUint8(0xC4);
+  } else {
+    emitUint8(0x3A);
+    emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
+  }
+  static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
+  emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
+  emitUint8(imm.value());
+}
+
+void AssemblerX8632::pextr(Type Ty, GPRRegister dst, XmmRegister src,
+                           const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(imm.is_uint8());
+  if (Ty == IceType_i16) {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0xC5);
+    emitXmmRegisterOperand(dst, src);
+    emitUint8(imm.value());
+  } else {
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0x3A);
+    emitUint8(isByteSizedType(Ty) ? 0x14 : 0x16);
+    // SSE 4.1 versions are "MRI" because dst can be mem, while pextrw (SSE2)
+    // is RMI because dst must be reg.
+    emitXmmRegisterOperand(src, dst);
+    emitUint8(imm.value());
+  }
+}
+
+void AssemblerX8632::pmovsxdq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x38);
+  emitUint8(0x25);
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pcmpeq(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x74);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x75);
+  } else {
+    emitUint8(0x76);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pcmpeq(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x74);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x75);
+  } else {
+    emitUint8(0x76);
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::pcmpgt(Type Ty, XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x64);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x65);
+  } else {
+    emitUint8(0x66);
+  }
+  emitXmmRegisterOperand(dst, src);
+}
+
+void AssemblerX8632::pcmpgt(Type Ty, XmmRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty)) {
+    emitUint8(0x64);
+  } else if (Ty == IceType_i16) {
+    emitUint8(0x65);
+  } else {
+    emitUint8(0x66);
+  }
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::round(Type Ty, XmmRegister dst, XmmRegister src,
+                           const Immediate &mode) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x3A);
+  switch (Ty) {
+  case IceType_v4f32:
+    emitUint8(0x08);
+    break;
+  case IceType_f32:
+    emitUint8(0x0A);
+    break;
+  case IceType_f64:
+    emitUint8(0x0B);
+    break;
+  default:
+    assert(false && "Unsupported round operand type");
+  }
+  emitXmmRegisterOperand(dst, src);
+  // Mask precision exeption.
+  emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
+}
+
+void AssemblerX8632::round(Type Ty, XmmRegister dst, const Address &src,
+                           const Immediate &mode) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x66);
+  emitUint8(0x0F);
+  emitUint8(0x3A);
+  switch (Ty) {
+  case IceType_v4f32:
+    emitUint8(0x08);
+    break;
+  case IceType_f32:
+    emitUint8(0x0A);
+    break;
+  case IceType_f64:
+    emitUint8(0x0B);
+    break;
+  default:
+    assert(false && "Unsupported round operand type");
+  }
+  emitOperand(gprEncoding(dst), src);
+  // Mask precision exeption.
+  emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
+}
+
+void AssemblerX8632::fnstcw(const Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xD9);
+  emitOperand(7, dst);
+}
+
+void AssemblerX8632::fldcw(const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xD9);
+  emitOperand(5, src);
+}
+
+void AssemblerX8632::fistpl(const Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDF);
+  emitOperand(7, dst);
+}
+
+void AssemblerX8632::fistps(const Address &dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDB);
+  emitOperand(3, dst);
+}
+
+void AssemblerX8632::fildl(const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDF);
+  emitOperand(5, src);
+}
+
+void AssemblerX8632::filds(const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xDB);
+  emitOperand(0, src);
+}
+
+void AssemblerX8632::fincstp() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xD9);
+  emitUint8(0xF7);
+}
+
+template <uint32_t Tag>
+void AssemblerX8632::arith_int(Type Ty, GPRRegister reg, const Immediate &imm) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitComplexI8(Tag, Operand(reg), imm);
+  } else {
+    emitComplex(Ty, Tag, Operand(reg), imm);
+  }
+}
+
+template <uint32_t Tag>
+void AssemblerX8632::arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(Tag * 8 + 2);
+  else
+    emitUint8(Tag * 8 + 3);
+  emitRegisterOperand(gprEncoding(reg0), gprEncoding(reg1));
+}
+
+template <uint32_t Tag>
+void AssemblerX8632::arith_int(Type Ty, GPRRegister reg,
+                               const Address &address) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(Tag * 8 + 2);
+  else
+    emitUint8(Tag * 8 + 3);
+  emitOperand(gprEncoding(reg), address);
+}
+
+template <uint32_t Tag>
+void AssemblerX8632::arith_int(Type Ty, const Address &address,
+                               GPRRegister reg) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(Tag * 8 + 0);
+  else
+    emitUint8(Tag * 8 + 1);
+  emitOperand(gprEncoding(reg), address);
+}
+
+template <uint32_t Tag>
+void AssemblerX8632::arith_int(Type Ty, const Address &address,
+                               const Immediate &imm) {
+  static_assert(Tag < 8, "Tag must be between 0..7");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty)) {
+    emitComplexI8(Tag, address, imm);
+  } else {
+    emitComplex(Ty, Tag, address, imm);
+  }
+}
+
+void AssemblerX8632::cmp(Type Ty, GPRRegister reg, const Immediate &imm) {
+  arith_int<7>(Ty, reg, imm);
+}
+
+void AssemblerX8632::cmp(Type Ty, GPRRegister reg0, GPRRegister reg1) {
+  arith_int<7>(Ty, reg0, reg1);
+}
+
+void AssemblerX8632::cmp(Type Ty, GPRRegister reg, const Address &address) {
+  arith_int<7>(Ty, reg, address);
+}
+
+void AssemblerX8632::cmp(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<7>(Ty, address, reg);
+}
+
+void AssemblerX8632::cmp(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<7>(Ty, address, imm);
+}
+
+void AssemblerX8632::test(Type Ty, GPRRegister reg1, GPRRegister reg2) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(0x84);
+  else
+    emitUint8(0x85);
+  emitRegisterOperand(gprEncoding(reg1), gprEncoding(reg2));
+}
+
+void AssemblerX8632::test(Type Ty, const Address &addr, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    emitUint8(0x84);
+  else
+    emitUint8(0x85);
+  emitOperand(gprEncoding(reg), addr);
+}
+
+void AssemblerX8632::test(Type Ty, GPRRegister reg,
+                          const Immediate &immediate) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // For registers that have a byte variant (EAX, EBX, ECX, and EDX) we only
+  // test the byte register to keep the encoding short. This is legal even if
+  // the register had high bits set since this only sets flags registers based
+  // on the "AND" of the two operands, and the immediate had zeros at those
+  // high bits.
+  if (immediate.is_uint8() && reg <= Traits::Last8BitGPR) {
+    // Use zero-extended 8-bit immediate.
+    if (reg == Traits::Encoded_Reg_Accumulator) {
+      emitUint8(0xA8);
+    } else {
+      emitUint8(0xF6);
+      emitUint8(0xC0 + gprEncoding(reg));
+    }
+    emitUint8(immediate.value() & 0xFF);
+  } else if (reg == Traits::Encoded_Reg_Accumulator) {
+    // Use short form if the destination is EAX.
+    if (Ty == IceType_i16)
+      emitOperandSizeOverride();
+    emitUint8(0xA9);
+    emitImmediate(Ty, immediate);
+  } else {
+    if (Ty == IceType_i16)
+      emitOperandSizeOverride();
+    emitUint8(0xF7);
+    emitRegisterOperand(0, gprEncoding(reg));
+    emitImmediate(Ty, immediate);
+  }
+}
+
+void AssemblerX8632::test(Type Ty, const Address &addr,
+                          const Immediate &immediate) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // If the immediate is short, we only test the byte addr to keep the encoding
+  // short.
+  if (immediate.is_uint8()) {
+    // Use zero-extended 8-bit immediate.
+    emitUint8(0xF6);
+    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
+    emitOperand(0, addr, OffsetFromNextInstruction);
+    emitUint8(immediate.value() & 0xFF);
+  } else {
+    if (Ty == IceType_i16)
+      emitOperandSizeOverride();
+    emitUint8(0xF7);
+    const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
+    emitOperand(0, addr, OffsetFromNextInstruction);
+    emitImmediate(Ty, immediate);
+  }
+}
+
+void AssemblerX8632::And(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<4>(Ty, dst, src);
+}
+
+void AssemblerX8632::And(Type Ty, GPRRegister dst, const Address &address) {
+  arith_int<4>(Ty, dst, address);
+}
+
+void AssemblerX8632::And(Type Ty, GPRRegister dst, const Immediate &imm) {
+  arith_int<4>(Ty, dst, imm);
+}
+
+void AssemblerX8632::And(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<4>(Ty, address, reg);
+}
+
+void AssemblerX8632::And(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<4>(Ty, address, imm);
+}
+
+void AssemblerX8632::Or(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<1>(Ty, dst, src);
+}
+
+void AssemblerX8632::Or(Type Ty, GPRRegister dst, const Address &address) {
+  arith_int<1>(Ty, dst, address);
+}
+
+void AssemblerX8632::Or(Type Ty, GPRRegister dst, const Immediate &imm) {
+  arith_int<1>(Ty, dst, imm);
+}
+
+void AssemblerX8632::Or(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<1>(Ty, address, reg);
+}
+
+void AssemblerX8632::Or(Type Ty, const Address &address, const Immediate &imm) {
+  arith_int<1>(Ty, address, imm);
+}
+
+void AssemblerX8632::Xor(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<6>(Ty, dst, src);
+}
+
+void AssemblerX8632::Xor(Type Ty, GPRRegister dst, const Address &address) {
+  arith_int<6>(Ty, dst, address);
+}
+
+void AssemblerX8632::Xor(Type Ty, GPRRegister dst, const Immediate &imm) {
+  arith_int<6>(Ty, dst, imm);
+}
+
+void AssemblerX8632::Xor(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<6>(Ty, address, reg);
+}
+
+void AssemblerX8632::Xor(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<6>(Ty, address, imm);
+}
+
+void AssemblerX8632::add(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<0>(Ty, dst, src);
+}
+
+void AssemblerX8632::add(Type Ty, GPRRegister reg, const Address &address) {
+  arith_int<0>(Ty, reg, address);
+}
+
+void AssemblerX8632::add(Type Ty, GPRRegister reg, const Immediate &imm) {
+  arith_int<0>(Ty, reg, imm);
+}
+
+void AssemblerX8632::add(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<0>(Ty, address, reg);
+}
+
+void AssemblerX8632::add(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<0>(Ty, address, imm);
+}
+
+void AssemblerX8632::adc(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<2>(Ty, dst, src);
+}
+
+void AssemblerX8632::adc(Type Ty, GPRRegister dst, const Address &address) {
+  arith_int<2>(Ty, dst, address);
+}
+
+void AssemblerX8632::adc(Type Ty, GPRRegister reg, const Immediate &imm) {
+  arith_int<2>(Ty, reg, imm);
+}
+
+void AssemblerX8632::adc(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<2>(Ty, address, reg);
+}
+
+void AssemblerX8632::adc(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<2>(Ty, address, imm);
+}
+
+void AssemblerX8632::sub(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<5>(Ty, dst, src);
+}
+
+void AssemblerX8632::sub(Type Ty, GPRRegister reg, const Address &address) {
+  arith_int<5>(Ty, reg, address);
+}
+
+void AssemblerX8632::sub(Type Ty, GPRRegister reg, const Immediate &imm) {
+  arith_int<5>(Ty, reg, imm);
+}
+
+void AssemblerX8632::sub(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<5>(Ty, address, reg);
+}
+
+void AssemblerX8632::sub(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<5>(Ty, address, imm);
+}
+
+void AssemblerX8632::sbb(Type Ty, GPRRegister dst, GPRRegister src) {
+  arith_int<3>(Ty, dst, src);
+}
+
+void AssemblerX8632::sbb(Type Ty, GPRRegister dst, const Address &address) {
+  arith_int<3>(Ty, dst, address);
+}
+
+void AssemblerX8632::sbb(Type Ty, GPRRegister reg, const Immediate &imm) {
+  arith_int<3>(Ty, reg, imm);
+}
+
+void AssemblerX8632::sbb(Type Ty, const Address &address, GPRRegister reg) {
+  arith_int<3>(Ty, address, reg);
+}
+
+void AssemblerX8632::sbb(Type Ty, const Address &address,
+                         const Immediate &imm) {
+  arith_int<3>(Ty, address, imm);
+}
+
+void AssemblerX8632::cbw() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitOperandSizeOverride();
+  emitUint8(0x98);
+}
+
+void AssemblerX8632::cwd() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitOperandSizeOverride();
+  emitUint8(0x99);
+}
+
+void AssemblerX8632::cdq() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x99);
+}
+
+template <typename T>
+typename std::enable_if<T::Is64Bit, void>::type AssemblerX8632::cqo() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x99);
+}
+
+void AssemblerX8632::div(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(6, gprEncoding(reg));
+}
+
+void AssemblerX8632::div(Type Ty, const Address &addr) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(6, addr);
+}
+
+void AssemblerX8632::idiv(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(7, gprEncoding(reg));
+}
+
+void AssemblerX8632::idiv(Type Ty, const Address &addr) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(7, addr);
+}
+
+void AssemblerX8632::imul(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
+         (Traits::Is64Bit && Ty == IceType_i64));
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAF);
+  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
+}
+
+void AssemblerX8632::imul(Type Ty, GPRRegister reg, const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
+         (Traits::Is64Bit && Ty == IceType_i64));
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAF);
+  emitOperand(gprEncoding(reg), address);
+}
+
+void AssemblerX8632::imul(Type Ty, GPRRegister reg, const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32 || Ty == IceType_i64);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (imm.is_int8()) {
+    emitUint8(0x6B);
+    emitRegisterOperand(gprEncoding(reg), gprEncoding(reg));
+    emitUint8(imm.value() & 0xFF);
+  } else {
+    emitUint8(0x69);
+    emitRegisterOperand(gprEncoding(reg), gprEncoding(reg));
+    emitImmediate(Ty, imm);
+  }
+}
+
+void AssemblerX8632::imul(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(5, gprEncoding(reg));
+}
+
+void AssemblerX8632::imul(Type Ty, const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(5, address);
+}
+
+void AssemblerX8632::imul(Type Ty, GPRRegister dst, GPRRegister src,
+                          const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (imm.is_int8()) {
+    emitUint8(0x6B);
+    emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
+    emitUint8(imm.value() & 0xFF);
+  } else {
+    emitUint8(0x69);
+    emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
+    emitImmediate(Ty, imm);
+  }
+}
+
+void AssemblerX8632::imul(Type Ty, GPRRegister dst, const Address &address,
+                          const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (imm.is_int8()) {
+    emitUint8(0x6B);
+    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
+    emitOperand(gprEncoding(dst), address, OffsetFromNextInstruction);
+    emitUint8(imm.value() & 0xFF);
+  } else {
+    emitUint8(0x69);
+    const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
+    emitOperand(gprEncoding(dst), address, OffsetFromNextInstruction);
+    emitImmediate(Ty, imm);
+  }
+}
+
+void AssemblerX8632::mul(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(4, gprEncoding(reg));
+}
+
+void AssemblerX8632::mul(Type Ty, const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(4, address);
+}
+
+template <typename, typename> void AssemblerX8632::incl(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x40 + reg);
+}
+
+void AssemblerX8632::incl(const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitOperand(0, address);
+}
+
+template <typename, typename> void AssemblerX8632::decl(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x48 + reg);
+}
+
+void AssemblerX8632::decl(const Address &address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitOperand(1, address);
+}
+
+void AssemblerX8632::rol(Type Ty, GPRRegister reg, const Immediate &imm) {
+  emitGenericShift(0, Ty, reg, imm);
+}
+
+void AssemblerX8632::rol(Type Ty, GPRRegister operand, GPRRegister shifter) {
+  emitGenericShift(0, Ty, Operand(operand), shifter);
+}
+
+void AssemblerX8632::rol(Type Ty, const Address &operand, GPRRegister shifter) {
+  emitGenericShift(0, Ty, operand, shifter);
+}
+
+void AssemblerX8632::shl(Type Ty, GPRRegister reg, const Immediate &imm) {
+  emitGenericShift(4, Ty, reg, imm);
+}
+
+void AssemblerX8632::shl(Type Ty, GPRRegister operand, GPRRegister shifter) {
+  emitGenericShift(4, Ty, Operand(operand), shifter);
+}
+
+void AssemblerX8632::shl(Type Ty, const Address &operand, GPRRegister shifter) {
+  emitGenericShift(4, Ty, operand, shifter);
+}
+
+void AssemblerX8632::shr(Type Ty, GPRRegister reg, const Immediate &imm) {
+  emitGenericShift(5, Ty, reg, imm);
+}
+
+void AssemblerX8632::shr(Type Ty, GPRRegister operand, GPRRegister shifter) {
+  emitGenericShift(5, Ty, Operand(operand), shifter);
+}
+
+void AssemblerX8632::shr(Type Ty, const Address &operand, GPRRegister shifter) {
+  emitGenericShift(5, Ty, operand, shifter);
+}
+
+void AssemblerX8632::sar(Type Ty, GPRRegister reg, const Immediate &imm) {
+  emitGenericShift(7, Ty, reg, imm);
+}
+
+void AssemblerX8632::sar(Type Ty, GPRRegister operand, GPRRegister shifter) {
+  emitGenericShift(7, Ty, Operand(operand), shifter);
+}
+
+void AssemblerX8632::sar(Type Ty, const Address &address, GPRRegister shifter) {
+  emitGenericShift(7, Ty, address, shifter);
+}
+
+void AssemblerX8632::shld(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xA5);
+  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
+}
+
+void AssemblerX8632::shld(Type Ty, GPRRegister dst, GPRRegister src,
+                          const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  assert(imm.is_int8());
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xA4);
+  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
+  emitUint8(imm.value() & 0xFF);
+}
+
+void AssemblerX8632::shld(Type Ty, const Address &operand, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xA5);
+  emitOperand(gprEncoding(src), operand);
+}
+
+void AssemblerX8632::shrd(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAD);
+  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
+}
+
+void AssemblerX8632::shrd(Type Ty, GPRRegister dst, GPRRegister src,
+                          const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  assert(imm.is_int8());
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAC);
+  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
+  emitUint8(imm.value() & 0xFF);
+}
+
+void AssemblerX8632::shrd(Type Ty, const Address &dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xAD);
+  emitOperand(gprEncoding(src), dst);
+}
+
+void AssemblerX8632::neg(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitRegisterOperand(3, gprEncoding(reg));
+}
+
+void AssemblerX8632::neg(Type Ty, const Address &addr) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xF6);
+  else
+    emitUint8(0xF7);
+  emitOperand(3, addr);
+}
+
+void AssemblerX8632::notl(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF7);
+  emitUint8(0xD0 | gprEncoding(reg));
+}
+
+void AssemblerX8632::bswap(Type Ty, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
+  emitUint8(0x0F);
+  emitUint8(0xC8 | gprEncoding(reg));
+}
+
+void AssemblerX8632::bsf(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
+         (Traits::Is64Bit && Ty == IceType_i64));
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBC);
+  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
+}
+
+void AssemblerX8632::bsf(Type Ty, GPRRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
+         (Traits::Is64Bit && Ty == IceType_i64));
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBC);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::bsr(Type Ty, GPRRegister dst, GPRRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
+         (Traits::Is64Bit && Ty == IceType_i64));
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBD);
+  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
+}
+
+void AssemblerX8632::bsr(Type Ty, GPRRegister dst, const Address &src) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
+         (Traits::Is64Bit && Ty == IceType_i64));
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(0x0F);
+  emitUint8(0xBD);
+  emitOperand(gprEncoding(dst), src);
+}
+
+void AssemblerX8632::bt(GPRRegister base, GPRRegister offset) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xA3);
+  emitRegisterOperand(gprEncoding(offset), gprEncoding(base));
+}
+
+void AssemblerX8632::ret() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xC3);
+}
+
+void AssemblerX8632::ret(const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xC2);
+  assert(imm.is_uint16());
+  emitUint8(imm.value() & 0xFF);
+  emitUint8((imm.value() >> 8) & 0xFF);
+}
+
+void AssemblerX8632::nop(int size) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // There are nops up to size 15, but for now just provide up to size 8.
+  assert(0 < size && size <= MAX_NOP_SIZE);
+  switch (size) {
+  case 1:
+    emitUint8(0x90);
+    break;
+  case 2:
+    emitUint8(0x66);
+    emitUint8(0x90);
+    break;
+  case 3:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x00);
+    break;
+  case 4:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x40);
+    emitUint8(0x00);
+    break;
+  case 5:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x44);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  case 6:
+    emitUint8(0x66);
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x44);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  case 7:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x80);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  case 8:
+    emitUint8(0x0F);
+    emitUint8(0x1F);
+    emitUint8(0x84);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    emitUint8(0x00);
+    break;
+  default:
+    llvm_unreachable("Unimplemented");
+  }
+}
+
+void AssemblerX8632::int3() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xCC);
+}
+
+void AssemblerX8632::hlt() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF4);
+}
+
+void AssemblerX8632::ud2() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x0B);
+}
+
+void AssemblerX8632::j(BrCond condition, Label *label, bool near) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (label->isBound()) {
+    static const int kShortSize = 2;
+    static const int kLongSize = 6;
+    intptr_t offset = label->getPosition() - Buffer.size();
+    assert(offset <= 0);
+    if (Utils::IsInt(8, offset - kShortSize)) {
+      emitUint8(0x70 + condition);
+      emitUint8((offset - kShortSize) & 0xFF);
+    } else {
+      emitUint8(0x0F);
+      emitUint8(0x80 + condition);
+      emitInt32(offset - kLongSize);
+    }
+  } else if (near) {
+    emitUint8(0x70 + condition);
+    emitNearLabelLink(label);
+  } else {
+    emitUint8(0x0F);
+    emitUint8(0x80 + condition);
+    emitLabelLink(label);
+  }
+}
+
+void AssemblerX8632::j(BrCond condition, const ConstantRelocatable *label) {
+  llvm::report_fatal_error("Untested - please verify and then reenable.");
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x80 + condition);
+  auto *Fixup = this->createFixup(Traits::FK_PcRel, label);
+  Fixup->set_addend(-4);
+  emitFixup(Fixup);
+  emitInt32(0);
+}
+
+void AssemblerX8632::jmp(GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xFF);
+  emitRegisterOperand(4, gprEncoding(reg));
+}
+
+void AssemblerX8632::jmp(Label *label, bool near) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (label->isBound()) {
+    static const int kShortSize = 2;
+    static const int kLongSize = 5;
+    intptr_t offset = label->getPosition() - Buffer.size();
+    assert(offset <= 0);
+    if (Utils::IsInt(8, offset - kShortSize)) {
+      emitUint8(0xEB);
+      emitUint8((offset - kShortSize) & 0xFF);
+    } else {
+      emitUint8(0xE9);
+      emitInt32(offset - kLongSize);
+    }
+  } else if (near) {
+    emitUint8(0xEB);
+    emitNearLabelLink(label);
+  } else {
+    emitUint8(0xE9);
+    emitLabelLink(label);
+  }
+}
+
+void AssemblerX8632::jmp(const ConstantRelocatable *label) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xE9);
+  auto *Fixup = this->createFixup(Traits::FK_PcRel, label);
+  Fixup->set_addend(-4);
+  emitFixup(Fixup);
+  emitInt32(0);
+}
+
+void AssemblerX8632::jmp(const Immediate &abs_address) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xE9);
+  AssemblerFixup *Fixup =
+      createFixup(Traits::FK_PcRel, AssemblerFixup::NullSymbol);
+  Fixup->set_addend(abs_address.value() - 4);
+  emitFixup(Fixup);
+  emitInt32(0);
+}
+
+void AssemblerX8632::mfence() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0xAE);
+  emitUint8(0xF0);
+}
+
+void AssemblerX8632::lock() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0xF0);
+}
+
+void AssemblerX8632::cmpxchg(Type Ty, const Address &address, GPRRegister reg,
+                             bool Locked) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (Locked)
+    emitUint8(0xF0);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xB0);
+  else
+    emitUint8(0xB1);
+  emitOperand(gprEncoding(reg), address);
+}
+
+void AssemblerX8632::cmpxchg8b(const Address &address, bool Locked) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Locked)
+    emitUint8(0xF0);
+  emitUint8(0x0F);
+  emitUint8(0xC7);
+  emitOperand(1, address);
+}
+
+void AssemblerX8632::xadd(Type Ty, const Address &addr, GPRRegister reg,
+                          bool Locked) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (Locked)
+    emitUint8(0xF0);
+  emitUint8(0x0F);
+  if (isByteSizedArithType(Ty))
+    emitUint8(0xC0);
+  else
+    emitUint8(0xC1);
+  emitOperand(gprEncoding(reg), addr);
+}
+
+void AssemblerX8632::xchg(Type Ty, GPRRegister reg0, GPRRegister reg1) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  // Use short form if either register is EAX.
+  if (reg0 == Traits::Encoded_Reg_Accumulator) {
+    emitUint8(0x90 + gprEncoding(reg1));
+  } else if (reg1 == Traits::Encoded_Reg_Accumulator) {
+    emitUint8(0x90 + gprEncoding(reg0));
+  } else {
+    if (isByteSizedArithType(Ty))
+      emitUint8(0x86);
+    else
+      emitUint8(0x87);
+    emitRegisterOperand(gprEncoding(reg0), gprEncoding(reg1));
+  }
+}
+
+void AssemblerX8632::xchg(Type Ty, const Address &addr, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (isByteSizedArithType(Ty))
+    emitUint8(0x86);
+  else
+    emitUint8(0x87);
+  emitOperand(gprEncoding(reg), addr);
+}
+
+void AssemblerX8632::iaca_start() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(0x0F);
+  emitUint8(0x0B);
+
+  // mov $111, ebx
+  constexpr GPRRegister dst = Traits::GPRRegister::Encoded_Reg_ebx;
+  constexpr Type Ty = IceType_i32;
+  emitUint8(0xB8 + gprEncoding(dst));
+  emitImmediate(Ty, Immediate(111));
+
+  emitUint8(0x64);
+  emitUint8(0x67);
+  emitUint8(0x90);
+}
+
+void AssemblerX8632::iaca_end() {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+
+  // mov $222, ebx
+  constexpr GPRRegister dst = Traits::GPRRegister::Encoded_Reg_ebx;
+  constexpr Type Ty = IceType_i32;
+  emitUint8(0xB8 + gprEncoding(dst));
+  emitImmediate(Ty, Immediate(222));
+
+  emitUint8(0x64);
+  emitUint8(0x67);
+  emitUint8(0x90);
+
+  emitUint8(0x0F);
+  emitUint8(0x0B);
+}
+
+void AssemblerX8632::emitSegmentOverride(uint8_t prefix) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  emitUint8(prefix);
+}
+
+void AssemblerX8632::align(intptr_t alignment, intptr_t offset) {
+  assert(llvm::isPowerOf2_32(alignment));
+  intptr_t pos = offset + Buffer.getPosition();
+  intptr_t mod = pos & (alignment - 1);
+  if (mod == 0) {
+    return;
+  }
+  intptr_t bytes_needed = alignment - mod;
+  while (bytes_needed > MAX_NOP_SIZE) {
+    nop(MAX_NOP_SIZE);
+    bytes_needed -= MAX_NOP_SIZE;
+  }
+  if (bytes_needed) {
+    nop(bytes_needed);
+  }
+  assert(((offset + Buffer.getPosition()) & (alignment - 1)) == 0);
+}
+
+void AssemblerX8632::bind(Label *L) {
+  const intptr_t Bound = Buffer.size();
+  assert(!L->isBound()); // Labels can only be bound once.
+  while (L->isLinked()) {
+    const intptr_t Position = L->getLinkPosition();
+    const intptr_t Next = Buffer.load<int32_t>(Position);
+    const intptr_t Offset = Bound - (Position + 4);
+    Buffer.store<int32_t>(Position, Offset);
+    L->Position = Next;
+  }
+  while (L->hasNear()) {
+    intptr_t Position = L->getNearPosition();
+    const intptr_t Offset = Bound - (Position + 1);
+    assert(Utils::IsInt(8, Offset));
+    Buffer.store<int8_t>(Position, Offset);
+  }
+  L->bindTo(Bound);
+}
+
+void AssemblerX8632::emitOperand(int rm, const Operand &operand,
+                                 RelocOffsetT Addend) {
+  assert(rm >= 0 && rm < 8);
+  const intptr_t length = operand.length_;
+  assert(length > 0);
+  intptr_t displacement_start = 1;
+  // Emit the ModRM byte updated with the given RM value.
+  assert((operand.encoding_[0] & 0x38) == 0);
+  emitUint8(operand.encoding_[0] + (rm << 3));
+  // Whenever the addressing mode is not register indirect, using esp == 0x4
+  // as the register operation indicates an SIB byte follows.
+  if (((operand.encoding_[0] & 0xc0) != 0xc0) &&
+      ((operand.encoding_[0] & 0x07) == 0x04)) {
+    emitUint8(operand.encoding_[1]);
+    displacement_start = 2;
+  }
+
+  AssemblerFixup *Fixup = operand.fixup();
+  if (Fixup == nullptr) {
+    for (intptr_t i = displacement_start; i < length; i++) {
+      emitUint8(operand.encoding_[i]);
+    }
+    return;
+  }
+
+  // Emit the fixup, and a dummy 4-byte immediate. Note that the Disp32 in
+  // operand.encoding_[i, i+1, i+2, i+3] is part of the constant relocatable
+  // used to create the fixup, so there's no need to add it to the addend.
+  assert(length - displacement_start == 4);
+  if (fixupIsPCRel(Fixup->kind())) {
+    Fixup->set_addend(Fixup->get_addend() - Addend);
+  } else {
+    Fixup->set_addend(Fixup->get_addend());
+  }
+  emitFixup(Fixup);
+  emitInt32(0);
+}
+
+void AssemblerX8632::emitImmediate(Type Ty, const Immediate &imm) {
+  auto *const Fixup = imm.fixup();
+  if (Ty == IceType_i16) {
+    assert(Fixup == nullptr);
+    emitInt16(imm.value());
+    return;
+  }
+
+  if (Fixup == nullptr) {
+    emitInt32(imm.value());
+    return;
+  }
+
+  Fixup->set_addend(Fixup->get_addend() + imm.value());
+  emitFixup(Fixup);
+  emitInt32(0);
+}
+
+void AssemblerX8632::emitComplexI8(int rm, const Operand &operand,
+                                   const Immediate &immediate) {
+  assert(rm >= 0 && rm < 8);
+  assert(immediate.is_int8());
+  if (operand.IsRegister(Traits::Encoded_Reg_Accumulator)) {
+    // Use short form if the destination is al.
+    emitUint8(0x04 + (rm << 3));
+    emitUint8(immediate.value() & 0xFF);
+  } else {
+    // Use sign-extended 8-bit immediate.
+    emitUint8(0x80);
+    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
+    emitOperand(rm, operand, OffsetFromNextInstruction);
+    emitUint8(immediate.value() & 0xFF);
+  }
+}
+
+void AssemblerX8632::emitComplex(Type Ty, int rm, const Operand &operand,
+                                 const Immediate &immediate) {
+  assert(rm >= 0 && rm < 8);
+  if (immediate.is_int8()) {
+    // Use sign-extended 8-bit immediate.
+    emitUint8(0x83);
+    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
+    emitOperand(rm, operand, OffsetFromNextInstruction);
+    emitUint8(immediate.value() & 0xFF);
+  } else if (operand.IsRegister(Traits::Encoded_Reg_Accumulator)) {
+    // Use short form if the destination is eax.
+    emitUint8(0x05 + (rm << 3));
+    emitImmediate(Ty, immediate);
+  } else {
+    emitUint8(0x81);
+    const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
+    emitOperand(rm, operand, OffsetFromNextInstruction);
+    emitImmediate(Ty, immediate);
+  }
+}
+
+void AssemblerX8632::emitLabel(Label *label, intptr_t instruction_size) {
+  if (label->isBound()) {
+    intptr_t offset = label->getPosition() - Buffer.size();
+    assert(offset <= 0);
+    emitInt32(offset - instruction_size);
+  } else {
+    emitLabelLink(label);
+  }
+}
+
+void AssemblerX8632::emitLabelLink(Label *Label) {
+  assert(!Label->isBound());
+  intptr_t Position = Buffer.size();
+  emitInt32(Label->Position);
+  Label->linkTo(*this, Position);
+}
+
+void AssemblerX8632::emitNearLabelLink(Label *Label) {
+  assert(!Label->isBound());
+  intptr_t Position = Buffer.size();
+  emitUint8(0);
+  Label->nearLinkTo(*this, Position);
+}
+
+void AssemblerX8632::emitGenericShift(int rm, Type Ty, GPRRegister reg,
+                                      const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  // We don't assert that imm fits into 8 bits; instead, it gets masked below.
+  // Note that we don't mask it further (e.g. to 5 bits) because we want the
+  // same processor behavior regardless of whether it's an immediate (masked to
+  // 8 bits) or in register cl (essentially ecx masked to 8 bits).
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  if (imm.value() == 1) {
+    emitUint8(isByteSizedArithType(Ty) ? 0xD0 : 0xD1);
+    emitOperand(rm, Operand(reg));
+  } else {
+    emitUint8(isByteSizedArithType(Ty) ? 0xC0 : 0xC1);
+    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
+    emitOperand(rm, Operand(reg), OffsetFromNextInstruction);
+    emitUint8(imm.value() & 0xFF);
+  }
+}
+
+void AssemblerX8632::emitGenericShift(int rm, Type Ty, const Operand &operand,
+                                      GPRRegister shifter) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(shifter == Traits::Encoded_Reg_Counter);
+  (void)shifter;
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitUint8(isByteSizedArithType(Ty) ? 0xD2 : 0xD3);
+  emitOperand(rm, operand);
+}
+
+} // namespace X8632
+} // end of namespace Ice
diff --git a/third_party/subzero/src/IceAssemblerX8632.h b/third_party/subzero/src/IceAssemblerX8632.h
index 27daf2e..beb26c7 100644
--- a/third_party/subzero/src/IceAssemblerX8632.h
+++ b/third_party/subzero/src/IceAssemblerX8632.h
@@ -16,20 +16,780 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// \brief Instantiates the Assembler for X86-32.
+/// \brief Declares the Assembler class for X86-32.
 ///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEASSEMBLERX8632_H
 #define SUBZERO_SRC_ICEASSEMBLERX8632_H
 
-#include "IceAssemblerX8632Base.h"
+#include "IceAssembler.h"
+#include "IceDefs.h"
+#include "IceOperand.h"
+#include "IceTypes.h"
+#include "IceUtils.h"
+
 #include "IceTargetLoweringX8632Traits.h"
 
 namespace Ice {
 namespace X8632 {
 
-using AssemblerX8632 = AssemblerX86Base<X8632::Traits>;
+class AssemblerX8632 : public ::Ice::Assembler {
+  AssemblerX8632(const AssemblerX8632 &) = delete;
+  AssemblerX8632 &operator=(const AssemblerX8632 &) = delete;
+
+protected:
+  explicit AssemblerX8632() : Assembler(Traits::AsmKind) {}
+
+public:
+  using Traits = TargetX8632Traits;
+  using Address = typename Traits::Address;
+  using ByteRegister = typename Traits::ByteRegister;
+  using BrCond = typename Traits::Cond::BrCond;
+  using CmppsCond = typename Traits::Cond::CmppsCond;
+  using GPRRegister = typename Traits::GPRRegister;
+  using Operand = typename Traits::Operand;
+  using XmmRegister = typename Traits::XmmRegister;
+
+  static constexpr int MAX_NOP_SIZE = 8;
+
+  static bool classof(const Assembler *Asm) {
+    return Asm->getKind() == Traits::AsmKind;
+  }
+
+  class Immediate {
+    Immediate(const Immediate &) = delete;
+    Immediate &operator=(const Immediate &) = delete;
+
+  public:
+    explicit Immediate(int32_t value) : value_(value) {}
+
+    explicit Immediate(AssemblerFixup *fixup) : fixup_(fixup) {}
+
+    int32_t value() const { return value_; }
+    AssemblerFixup *fixup() const { return fixup_; }
+
+    bool is_int8() const {
+      // We currently only allow 32-bit fixups, and they usually have value = 0,
+      // so if fixup_ != nullptr, it shouldn't be classified as int8/16.
+      return fixup_ == nullptr && Utils::IsInt(8, value_);
+    }
+    bool is_uint8() const {
+      return fixup_ == nullptr && Utils::IsUint(8, value_);
+    }
+    bool is_uint16() const {
+      return fixup_ == nullptr && Utils::IsUint(16, value_);
+    }
+
+  private:
+    const int32_t value_ = 0;
+    AssemblerFixup *fixup_ = nullptr;
+  };
+
+  /// X86 allows near and far jumps.
+  class Label final : public Ice::Label {
+    Label(const Label &) = delete;
+    Label &operator=(const Label &) = delete;
+
+  public:
+    Label() = default;
+    ~Label() = default;
+
+    void finalCheck() const override {
+      Ice::Label::finalCheck();
+      assert(!hasNear());
+    }
+
+    /// Returns the position of an earlier branch instruction which assumes that
+    /// this label is "near", and bumps iterator to the next near position.
+    intptr_t getNearPosition() {
+      assert(hasNear());
+      intptr_t Pos = UnresolvedNearPositions.back();
+      UnresolvedNearPositions.pop_back();
+      return Pos;
+    }
+
+    bool hasNear() const { return !UnresolvedNearPositions.empty(); }
+    bool isUnused() const override {
+      return Ice::Label::isUnused() && !hasNear();
+    }
+
+  private:
+    friend class AssemblerX8632;
+
+    void nearLinkTo(const Assembler &Asm, intptr_t position) {
+      if (Asm.getPreliminary())
+        return;
+      assert(!isBound());
+      UnresolvedNearPositions.push_back(position);
+    }
+
+    llvm::SmallVector<intptr_t, 20> UnresolvedNearPositions;
+  };
+
+public:
+  ~AssemblerX8632() override;
+
+  static const bool kNearJump = true;
+  static const bool kFarJump = false;
+
+  void alignFunction() override;
+
+  SizeT getBundleAlignLog2Bytes() const override { return 5; }
+
+  const char *getAlignDirective() const override { return ".p2align"; }
+
+  llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const override {
+    static const uint8_t Padding[] = {0xF4};
+    return llvm::ArrayRef<uint8_t>(Padding, 1);
+  }
+
+  void padWithNop(intptr_t Padding) override {
+    while (Padding > MAX_NOP_SIZE) {
+      nop(MAX_NOP_SIZE);
+      Padding -= MAX_NOP_SIZE;
+    }
+    if (Padding)
+      nop(Padding);
+  }
+
+  Ice::Label *getCfgNodeLabel(SizeT NodeNumber) override;
+  void bindCfgNodeLabel(const CfgNode *Node) override;
+  Label *getOrCreateCfgNodeLabel(SizeT Number);
+  Label *getOrCreateLocalLabel(SizeT Number);
+  void bindLocalLabel(SizeT Number);
+
+  bool fixupIsPCRel(FixupKind Kind) const override {
+    // Currently assuming this is the only PC-rel relocation type used.
+    // TODO(jpp): Traits.PcRelTypes.count(Kind) != 0
+    return Kind == Traits::FK_PcRel;
+  }
+
+  // Operations to emit GPR instructions (and dispatch on operand type).
+  using TypedEmitGPR = void (AssemblerX8632::*)(Type, GPRRegister);
+  using TypedEmitAddr = void (AssemblerX8632::*)(Type, const Address &);
+  struct GPREmitterOneOp {
+    TypedEmitGPR Reg;
+    TypedEmitAddr Addr;
+  };
+
+  using TypedEmitGPRGPR = void (AssemblerX8632::*)(Type, GPRRegister,
+                                                   GPRRegister);
+  using TypedEmitGPRAddr = void (AssemblerX8632::*)(Type, GPRRegister,
+                                                    const Address &);
+  using TypedEmitGPRImm = void (AssemblerX8632::*)(Type, GPRRegister,
+                                                   const Immediate &);
+  struct GPREmitterRegOp {
+    TypedEmitGPRGPR GPRGPR;
+    TypedEmitGPRAddr GPRAddr;
+    TypedEmitGPRImm GPRImm;
+  };
+
+  struct GPREmitterShiftOp {
+    // Technically, Addr/GPR and Addr/Imm are also allowed, but */Addr are
+    // not. In practice, we always normalize the Dest to a Register first.
+    TypedEmitGPRGPR GPRGPR;
+    TypedEmitGPRImm GPRImm;
+  };
+
+  using TypedEmitGPRGPRImm = void (AssemblerX8632::*)(Type, GPRRegister,
+                                                      GPRRegister,
+                                                      const Immediate &);
+  struct GPREmitterShiftD {
+    // Technically AddrGPR and AddrGPRImm are also allowed, but in practice we
+    // always normalize Dest to a Register first.
+    TypedEmitGPRGPR GPRGPR;
+    TypedEmitGPRGPRImm GPRGPRImm;
+  };
+
+  using TypedEmitAddrGPR = void (AssemblerX8632::*)(Type, const Address &,
+                                                    GPRRegister);
+  using TypedEmitAddrImm = void (AssemblerX8632::*)(Type, const Address &,
+                                                    const Immediate &);
+  struct GPREmitterAddrOp {
+    TypedEmitAddrGPR AddrGPR;
+    TypedEmitAddrImm AddrImm;
+  };
+
+  // Operations to emit XMM instructions (and dispatch on operand type).
+  using TypedEmitXmmXmm = void (AssemblerX8632::*)(Type, XmmRegister,
+                                                   XmmRegister);
+  using TypedEmitXmmAddr = void (AssemblerX8632::*)(Type, XmmRegister,
+                                                    const Address &);
+  struct XmmEmitterRegOp {
+    TypedEmitXmmXmm XmmXmm;
+    TypedEmitXmmAddr XmmAddr;
+  };
+
+  using EmitXmmXmm = void (AssemblerX8632::*)(XmmRegister, XmmRegister);
+  using EmitXmmAddr = void (AssemblerX8632::*)(XmmRegister, const Address &);
+  using EmitAddrXmm = void (AssemblerX8632::*)(const Address &, XmmRegister);
+  struct XmmEmitterMovOps {
+    EmitXmmXmm XmmXmm;
+    EmitXmmAddr XmmAddr;
+    EmitAddrXmm AddrXmm;
+  };
+
+  using TypedEmitXmmImm = void (AssemblerX8632::*)(Type, XmmRegister,
+                                                   const Immediate &);
+
+  struct XmmEmitterShiftOp {
+    TypedEmitXmmXmm XmmXmm;
+    TypedEmitXmmAddr XmmAddr;
+    TypedEmitXmmImm XmmImm;
+  };
+
+  // Cross Xmm/GPR cast instructions.
+  template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
+    using TypedEmitRegs = void (AssemblerX8632::*)(Type, DReg_t, Type, SReg_t);
+    using TypedEmitAddr = void (AssemblerX8632::*)(Type, DReg_t, Type,
+                                                   const Address &);
+
+    TypedEmitRegs RegReg;
+    TypedEmitAddr RegAddr;
+  };
+
+  // Three operand (potentially) cross Xmm/GPR instructions. The last operand
+  // must be an immediate.
+  template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
+    using TypedEmitRegRegImm = void (AssemblerX8632::*)(Type, DReg_t, SReg_t,
+                                                        const Immediate &);
+    using TypedEmitRegAddrImm = void (AssemblerX8632::*)(Type, DReg_t,
+                                                         const Address &,
+                                                         const Immediate &);
+
+    TypedEmitRegRegImm RegRegImm;
+    TypedEmitRegAddrImm RegAddrImm;
+  };
+
+  /*
+   * Emit Machine Instructions.
+   */
+  void call(GPRRegister reg);
+  void call(const Address &address);
+  void call(const ConstantRelocatable *label); // not testable.
+  void call(const Immediate &abs_address);
+
+  static const intptr_t kCallExternalLabelSize = 5;
+
+  void pushl(GPRRegister reg);
+  void pushl(const Immediate &Imm);
+  void pushl(const ConstantRelocatable *Label);
+
+  void popl(GPRRegister reg);
+  void popl(const Address &address);
+
+  void pushal();
+  void popal();
+
+  void setcc(BrCond condition, ByteRegister dst);
+  void setcc(BrCond condition, const Address &address);
+
+  void mov(Type Ty, GPRRegister dst, const Immediate &src);
+  void mov(Type Ty, GPRRegister dst, GPRRegister src);
+  void mov(Type Ty, GPRRegister dst, const Address &src);
+  void mov(Type Ty, const Address &dst, GPRRegister src);
+  void mov(Type Ty, const Address &dst, const Immediate &imm);
+
+  void movzx(Type Ty, GPRRegister dst, GPRRegister src);
+  void movzx(Type Ty, GPRRegister dst, const Address &src);
+  void movsx(Type Ty, GPRRegister dst, GPRRegister src);
+  void movsx(Type Ty, GPRRegister dst, const Address &src);
+
+  void lea(Type Ty, GPRRegister dst, const Address &src);
+
+  void cmov(Type Ty, BrCond cond, GPRRegister dst, GPRRegister src);
+  void cmov(Type Ty, BrCond cond, GPRRegister dst, const Address &src);
+
+  void rep_movsb();
+
+  void movss(Type Ty, XmmRegister dst, const Address &src);
+  void movss(Type Ty, const Address &dst, XmmRegister src);
+  void movss(Type Ty, XmmRegister dst, XmmRegister src);
+
+  void movd(Type SrcTy, XmmRegister dst, GPRRegister src);
+  void movd(Type SrcTy, XmmRegister dst, const Address &src);
+  void movd(Type DestTy, GPRRegister dst, XmmRegister src);
+  void movd(Type DestTy, const Address &dst, XmmRegister src);
+
+  void movq(XmmRegister dst, XmmRegister src);
+  void movq(const Address &dst, XmmRegister src);
+  void movq(XmmRegister dst, const Address &src);
+
+  void addss(Type Ty, XmmRegister dst, XmmRegister src);
+  void addss(Type Ty, XmmRegister dst, const Address &src);
+  void subss(Type Ty, XmmRegister dst, XmmRegister src);
+  void subss(Type Ty, XmmRegister dst, const Address &src);
+  void mulss(Type Ty, XmmRegister dst, XmmRegister src);
+  void mulss(Type Ty, XmmRegister dst, const Address &src);
+  void divss(Type Ty, XmmRegister dst, XmmRegister src);
+  void divss(Type Ty, XmmRegister dst, const Address &src);
+
+  void movaps(XmmRegister dst, XmmRegister src);
+
+  void movups(XmmRegister dst, XmmRegister src);
+  void movups(XmmRegister dst, const Address &src);
+  void movups(const Address &dst, XmmRegister src);
+
+  void padd(Type Ty, XmmRegister dst, XmmRegister src);
+  void padd(Type Ty, XmmRegister dst, const Address &src);
+  void padds(Type Ty, XmmRegister dst, XmmRegister src);
+  void padds(Type Ty, XmmRegister dst, const Address &src);
+  void paddus(Type Ty, XmmRegister dst, XmmRegister src);
+  void paddus(Type Ty, XmmRegister dst, const Address &src);
+  void pand(Type Ty, XmmRegister dst, XmmRegister src);
+  void pand(Type Ty, XmmRegister dst, const Address &src);
+  void pandn(Type Ty, XmmRegister dst, XmmRegister src);
+  void pandn(Type Ty, XmmRegister dst, const Address &src);
+  void pmull(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmull(Type Ty, XmmRegister dst, const Address &src);
+  void pmulhw(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmulhw(Type Ty, XmmRegister dst, const Address &src);
+  void pmulhuw(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmulhuw(Type Ty, XmmRegister dst, const Address &src);
+  void pmaddwd(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmaddwd(Type Ty, XmmRegister dst, const Address &src);
+  void pmuludq(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmuludq(Type Ty, XmmRegister dst, const Address &src);
+  void por(Type Ty, XmmRegister dst, XmmRegister src);
+  void por(Type Ty, XmmRegister dst, const Address &src);
+  void psub(Type Ty, XmmRegister dst, XmmRegister src);
+  void psub(Type Ty, XmmRegister dst, const Address &src);
+  void psubs(Type Ty, XmmRegister dst, XmmRegister src);
+  void psubs(Type Ty, XmmRegister dst, const Address &src);
+  void psubus(Type Ty, XmmRegister dst, XmmRegister src);
+  void psubus(Type Ty, XmmRegister dst, const Address &src);
+  void pxor(Type Ty, XmmRegister dst, XmmRegister src);
+  void pxor(Type Ty, XmmRegister dst, const Address &src);
+
+  void psll(Type Ty, XmmRegister dst, XmmRegister src);
+  void psll(Type Ty, XmmRegister dst, const Address &src);
+  void psll(Type Ty, XmmRegister dst, const Immediate &src);
+
+  void psra(Type Ty, XmmRegister dst, XmmRegister src);
+  void psra(Type Ty, XmmRegister dst, const Address &src);
+  void psra(Type Ty, XmmRegister dst, const Immediate &src);
+  void psrl(Type Ty, XmmRegister dst, XmmRegister src);
+  void psrl(Type Ty, XmmRegister dst, const Address &src);
+  void psrl(Type Ty, XmmRegister dst, const Immediate &src);
+
+  void addps(Type Ty, XmmRegister dst, XmmRegister src);
+  void addps(Type Ty, XmmRegister dst, const Address &src);
+  void subps(Type Ty, XmmRegister dst, XmmRegister src);
+  void subps(Type Ty, XmmRegister dst, const Address &src);
+  void divps(Type Ty, XmmRegister dst, XmmRegister src);
+  void divps(Type Ty, XmmRegister dst, const Address &src);
+  void mulps(Type Ty, XmmRegister dst, XmmRegister src);
+  void mulps(Type Ty, XmmRegister dst, const Address &src);
+  void minps(Type Ty, XmmRegister dst, const Address &src);
+  void minps(Type Ty, XmmRegister dst, XmmRegister src);
+  void minss(Type Ty, XmmRegister dst, const Address &src);
+  void minss(Type Ty, XmmRegister dst, XmmRegister src);
+  void maxps(Type Ty, XmmRegister dst, const Address &src);
+  void maxps(Type Ty, XmmRegister dst, XmmRegister src);
+  void maxss(Type Ty, XmmRegister dst, const Address &src);
+  void maxss(Type Ty, XmmRegister dst, XmmRegister src);
+  void andnps(Type Ty, XmmRegister dst, const Address &src);
+  void andnps(Type Ty, XmmRegister dst, XmmRegister src);
+  void andps(Type Ty, XmmRegister dst, const Address &src);
+  void andps(Type Ty, XmmRegister dst, XmmRegister src);
+  void orps(Type Ty, XmmRegister dst, const Address &src);
+  void orps(Type Ty, XmmRegister dst, XmmRegister src);
+
+  void blendvps(Type Ty, XmmRegister dst, XmmRegister src);
+  void blendvps(Type Ty, XmmRegister dst, const Address &src);
+  void pblendvb(Type Ty, XmmRegister dst, XmmRegister src);
+  void pblendvb(Type Ty, XmmRegister dst, const Address &src);
+
+  void cmpps(Type Ty, XmmRegister dst, XmmRegister src, CmppsCond CmpCondition);
+  void cmpps(Type Ty, XmmRegister dst, const Address &src,
+             CmppsCond CmpCondition);
+
+  void sqrtps(XmmRegister dst);
+  void rsqrtps(XmmRegister dst);
+  void reciprocalps(XmmRegister dst);
+
+  void movhlps(XmmRegister dst, XmmRegister src);
+  void movlhps(XmmRegister dst, XmmRegister src);
+  void unpcklps(XmmRegister dst, XmmRegister src);
+  void unpckhps(XmmRegister dst, XmmRegister src);
+  void unpcklpd(XmmRegister dst, XmmRegister src);
+  void unpckhpd(XmmRegister dst, XmmRegister src);
+
+  void set1ps(XmmRegister dst, GPRRegister tmp, const Immediate &imm);
+
+  void sqrtpd(XmmRegister dst);
+
+  void pshufb(Type Ty, XmmRegister dst, XmmRegister src);
+  void pshufb(Type Ty, XmmRegister dst, const Address &src);
+  void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
+  void pshufd(Type Ty, XmmRegister dst, const Address &src,
+              const Immediate &mask);
+  void punpckl(Type Ty, XmmRegister Dst, XmmRegister Src);
+  void punpckl(Type Ty, XmmRegister Dst, const Address &Src);
+  void punpckh(Type Ty, XmmRegister Dst, XmmRegister Src);
+  void punpckh(Type Ty, XmmRegister Dst, const Address &Src);
+  void packss(Type Ty, XmmRegister Dst, XmmRegister Src);
+  void packss(Type Ty, XmmRegister Dst, const Address &Src);
+  void packus(Type Ty, XmmRegister Dst, XmmRegister Src);
+  void packus(Type Ty, XmmRegister Dst, const Address &Src);
+  void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
+  void shufps(Type Ty, XmmRegister dst, const Address &src,
+              const Immediate &mask);
+
+  void cvtdq2ps(Type, XmmRegister dst, XmmRegister src);
+  void cvtdq2ps(Type, XmmRegister dst, const Address &src);
+
+  void cvttps2dq(Type, XmmRegister dst, XmmRegister src);
+  void cvttps2dq(Type, XmmRegister dst, const Address &src);
+
+  void cvtps2dq(Type, XmmRegister dst, XmmRegister src);
+  void cvtps2dq(Type, XmmRegister dst, const Address &src);
+
+  void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, GPRRegister src);
+  void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, const Address &src);
+
+  void cvtfloat2float(Type SrcTy, XmmRegister dst, XmmRegister src);
+  void cvtfloat2float(Type SrcTy, XmmRegister dst, const Address &src);
+
+  void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
+  void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
+
+  void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
+  void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
+
+  void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
+  void ucomiss(Type Ty, XmmRegister a, const Address &b);
+
+  void movmsk(Type Ty, GPRRegister dst, XmmRegister src);
+
+  void sqrt(Type Ty, XmmRegister dst, const Address &src);
+  void sqrt(Type Ty, XmmRegister dst, XmmRegister src);
+
+  void xorps(Type Ty, XmmRegister dst, const Address &src);
+  void xorps(Type Ty, XmmRegister dst, XmmRegister src);
+
+  void insertps(Type Ty, XmmRegister dst, XmmRegister src,
+                const Immediate &imm);
+  void insertps(Type Ty, XmmRegister dst, const Address &src,
+                const Immediate &imm);
+
+  void pinsr(Type Ty, XmmRegister dst, GPRRegister src, const Immediate &imm);
+  void pinsr(Type Ty, XmmRegister dst, const Address &src,
+             const Immediate &imm);
+
+  void pextr(Type Ty, GPRRegister dst, XmmRegister src, const Immediate &imm);
+
+  void pmovsxdq(XmmRegister dst, XmmRegister src);
+
+  void pcmpeq(Type Ty, XmmRegister dst, XmmRegister src);
+  void pcmpeq(Type Ty, XmmRegister dst, const Address &src);
+  void pcmpgt(Type Ty, XmmRegister dst, XmmRegister src);
+  void pcmpgt(Type Ty, XmmRegister dst, const Address &src);
+
+  enum RoundingMode {
+    kRoundToNearest = 0x0,
+    kRoundDown = 0x1,
+    kRoundUp = 0x2,
+    kRoundToZero = 0x3
+  };
+  void round(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mode);
+  void round(Type Ty, XmmRegister dst, const Address &src,
+             const Immediate &mode);
+
+  //----------------------------------------------------------------------------
+  //
+  // Begin: X87 instructions.
+  //
+  //----------------------------------------------------------------------------
+  void fld(Type Ty, const Address &src);
+  void fstp(Type Ty, const Address &dst);
+  void fstp(RegX8632::X87STRegister st);
+
+  void fnstcw(const Address &dst);
+  void fldcw(const Address &src);
+
+  void fistpl(const Address &dst);
+  void fistps(const Address &dst);
+  void fildl(const Address &src);
+  void filds(const Address &src);
+
+  void fincstp();
+  //----------------------------------------------------------------------------
+  //
+  // End: X87 instructions.
+  //
+  //----------------------------------------------------------------------------
+
+  void cmp(Type Ty, GPRRegister reg0, GPRRegister reg1);
+  void cmp(Type Ty, GPRRegister reg, const Address &address);
+  void cmp(Type Ty, GPRRegister reg, const Immediate &imm);
+  void cmp(Type Ty, const Address &address, GPRRegister reg);
+  void cmp(Type Ty, const Address &address, const Immediate &imm);
+
+  void test(Type Ty, GPRRegister reg0, GPRRegister reg1);
+  void test(Type Ty, GPRRegister reg, const Immediate &imm);
+  void test(Type Ty, const Address &address, GPRRegister reg);
+  void test(Type Ty, const Address &address, const Immediate &imm);
+
+  void And(Type Ty, GPRRegister dst, GPRRegister src);
+  void And(Type Ty, GPRRegister dst, const Address &address);
+  void And(Type Ty, GPRRegister dst, const Immediate &imm);
+  void And(Type Ty, const Address &address, GPRRegister reg);
+  void And(Type Ty, const Address &address, const Immediate &imm);
+
+  void Or(Type Ty, GPRRegister dst, GPRRegister src);
+  void Or(Type Ty, GPRRegister dst, const Address &address);
+  void Or(Type Ty, GPRRegister dst, const Immediate &imm);
+  void Or(Type Ty, const Address &address, GPRRegister reg);
+  void Or(Type Ty, const Address &address, const Immediate &imm);
+
+  void Xor(Type Ty, GPRRegister dst, GPRRegister src);
+  void Xor(Type Ty, GPRRegister dst, const Address &address);
+  void Xor(Type Ty, GPRRegister dst, const Immediate &imm);
+  void Xor(Type Ty, const Address &address, GPRRegister reg);
+  void Xor(Type Ty, const Address &address, const Immediate &imm);
+
+  void add(Type Ty, GPRRegister dst, GPRRegister src);
+  void add(Type Ty, GPRRegister reg, const Address &address);
+  void add(Type Ty, GPRRegister reg, const Immediate &imm);
+  void add(Type Ty, const Address &address, GPRRegister reg);
+  void add(Type Ty, const Address &address, const Immediate &imm);
+
+  void adc(Type Ty, GPRRegister dst, GPRRegister src);
+  void adc(Type Ty, GPRRegister dst, const Address &address);
+  void adc(Type Ty, GPRRegister reg, const Immediate &imm);
+  void adc(Type Ty, const Address &address, GPRRegister reg);
+  void adc(Type Ty, const Address &address, const Immediate &imm);
+
+  void sub(Type Ty, GPRRegister dst, GPRRegister src);
+  void sub(Type Ty, GPRRegister reg, const Address &address);
+  void sub(Type Ty, GPRRegister reg, const Immediate &imm);
+  void sub(Type Ty, const Address &address, GPRRegister reg);
+  void sub(Type Ty, const Address &address, const Immediate &imm);
+
+  void sbb(Type Ty, GPRRegister dst, GPRRegister src);
+  void sbb(Type Ty, GPRRegister reg, const Address &address);
+  void sbb(Type Ty, GPRRegister reg, const Immediate &imm);
+  void sbb(Type Ty, const Address &address, GPRRegister reg);
+  void sbb(Type Ty, const Address &address, const Immediate &imm);
+
+  void cbw();
+  void cwd();
+  void cdq();
+  template <typename T = Traits>
+  typename std::enable_if<T::Is64Bit, void>::type cqo();
+  template <typename T = Traits>
+  typename std::enable_if<!T::Is64Bit, void>::type cqo() {
+    llvm::report_fatal_error("CQO is only available in 64-bit x86 backends.");
+  }
+
+  void div(Type Ty, GPRRegister reg);
+  void div(Type Ty, const Address &address);
+
+  void idiv(Type Ty, GPRRegister reg);
+  void idiv(Type Ty, const Address &address);
+
+  void imul(Type Ty, GPRRegister dst, GPRRegister src);
+  void imul(Type Ty, GPRRegister reg, const Immediate &imm);
+  void imul(Type Ty, GPRRegister reg, const Address &address);
+
+  void imul(Type Ty, GPRRegister reg);
+  void imul(Type Ty, const Address &address);
+
+  void imul(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
+  void imul(Type Ty, GPRRegister dst, const Address &address,
+            const Immediate &imm);
+
+  void mul(Type Ty, GPRRegister reg);
+  void mul(Type Ty, const Address &address);
+
+  template <class T = Traits,
+            typename = typename std::enable_if<!T::Is64Bit>::type>
+  void incl(GPRRegister reg);
+  void incl(const Address &address);
+
+  template <class T = Traits,
+            typename = typename std::enable_if<!T::Is64Bit>::type>
+  void decl(GPRRegister reg);
+  void decl(const Address &address);
+
+  void rol(Type Ty, GPRRegister reg, const Immediate &imm);
+  void rol(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void rol(Type Ty, const Address &operand, GPRRegister shifter);
+
+  void shl(Type Ty, GPRRegister reg, const Immediate &imm);
+  void shl(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void shl(Type Ty, const Address &operand, GPRRegister shifter);
+
+  void shr(Type Ty, GPRRegister reg, const Immediate &imm);
+  void shr(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void shr(Type Ty, const Address &operand, GPRRegister shifter);
+
+  void sar(Type Ty, GPRRegister reg, const Immediate &imm);
+  void sar(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void sar(Type Ty, const Address &address, GPRRegister shifter);
+
+  void shld(Type Ty, GPRRegister dst, GPRRegister src);
+  void shld(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
+  void shld(Type Ty, const Address &operand, GPRRegister src);
+  void shrd(Type Ty, GPRRegister dst, GPRRegister src);
+  void shrd(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
+  void shrd(Type Ty, const Address &dst, GPRRegister src);
+
+  void neg(Type Ty, GPRRegister reg);
+  void neg(Type Ty, const Address &addr);
+  void notl(GPRRegister reg);
+
+  void bsf(Type Ty, GPRRegister dst, GPRRegister src);
+  void bsf(Type Ty, GPRRegister dst, const Address &src);
+  void bsr(Type Ty, GPRRegister dst, GPRRegister src);
+  void bsr(Type Ty, GPRRegister dst, const Address &src);
+
+  void bswap(Type Ty, GPRRegister reg);
+
+  void bt(GPRRegister base, GPRRegister offset);
+
+  void ret();
+  void ret(const Immediate &imm);
+
+  // 'size' indicates size in bytes and must be in the range 1..8.
+  void nop(int size = 1);
+  void int3();
+  void hlt();
+  void ud2();
+
+  // j(Label) is fully tested.
+  void j(BrCond condition, Label *label, bool near = kFarJump);
+  void j(BrCond condition, const ConstantRelocatable *label); // not testable.
+
+  void jmp(GPRRegister reg);
+  void jmp(Label *label, bool near = kFarJump);
+  void jmp(const ConstantRelocatable *label); // not testable.
+  void jmp(const Immediate &abs_address);
+
+  void mfence();
+
+  void lock();
+  void cmpxchg(Type Ty, const Address &address, GPRRegister reg, bool Locked);
+  void cmpxchg8b(const Address &address, bool Locked);
+  void xadd(Type Ty, const Address &address, GPRRegister reg, bool Locked);
+  void xchg(Type Ty, GPRRegister reg0, GPRRegister reg1);
+  void xchg(Type Ty, const Address &address, GPRRegister reg);
+
+  /// \name Intel Architecture Code Analyzer markers.
+  /// @{
+  void iaca_start();
+  void iaca_end();
+  /// @}
+
+  void emitSegmentOverride(uint8_t prefix);
+
+  intptr_t preferredLoopAlignment() { return 16; }
+  void align(intptr_t alignment, intptr_t offset);
+  void bind(Label *label);
+
+  intptr_t CodeSize() const { return Buffer.size(); }
+
+protected:
+  inline void emitUint8(uint8_t value);
+
+private:
+  ENABLE_MAKE_UNIQUE;
+
+  static constexpr Type RexTypeIrrelevant = IceType_i32;
+  static constexpr Type RexTypeForceRexW = IceType_i64;
+  static constexpr GPRRegister RexRegIrrelevant =
+      Traits::GPRRegister::Encoded_Reg_eax;
+
+  inline void emitInt16(int16_t value);
+  inline void emitInt32(int32_t value);
+  inline void emitRegisterOperand(int rm, int reg);
+  template <typename RegType, typename RmType>
+  inline void emitXmmRegisterOperand(RegType reg, RmType rm);
+  inline void emitOperandSizeOverride();
+
+  void emitOperand(int rm, const Operand &operand, RelocOffsetT Addend = 0);
+  void emitImmediate(Type ty, const Immediate &imm);
+  void emitComplexI8(int rm, const Operand &operand,
+                     const Immediate &immediate);
+  void emitComplex(Type Ty, int rm, const Operand &operand,
+                   const Immediate &immediate);
+  void emitLabel(Label *label, intptr_t instruction_size);
+  void emitLabelLink(Label *label);
+  void emitNearLabelLink(Label *label);
+
+  void emitGenericShift(int rm, Type Ty, GPRRegister reg, const Immediate &imm);
+  void emitGenericShift(int rm, Type Ty, const Operand &operand,
+                        GPRRegister shifter);
+
+  using LabelVector = std::vector<Label *>;
+  // A vector of pool-allocated x86 labels for CFG nodes.
+  LabelVector CfgNodeLabels;
+  // A vector of pool-allocated x86 labels for Local labels.
+  LabelVector LocalLabels;
+
+  Label *getOrCreateLabel(SizeT Number, LabelVector &Labels);
+
+  // The arith_int() methods factor out the commonality between the encodings
+  // of add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag
+  // parameter is statically asserted to be less than 8.
+  template <uint32_t Tag>
+  void arith_int(Type Ty, GPRRegister reg, const Immediate &imm);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, GPRRegister reg, const Address &address);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, const Address &address, GPRRegister reg);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, const Address &address, const Immediate &imm);
+
+  // gprEncoding returns Reg encoding for operand emission. For x86-64 we mask
+  // out the 4th bit as it is encoded in the REX.[RXB] bits. No other bits are
+  // touched because we don't want to mask errors.
+  template <typename RegType, typename T = Traits>
+  typename std::enable_if<T::Is64Bit, typename T::GPRRegister>::type
+  gprEncoding(const RegType Reg) {
+    return static_cast<GPRRegister>(static_cast<uint8_t>(Reg) & ~0x08);
+  }
+
+  template <typename RegType, typename T = Traits>
+  typename std::enable_if<!T::Is64Bit, typename T::GPRRegister>::type
+  gprEncoding(const RegType Reg) {
+    return static_cast<typename T::GPRRegister>(Reg);
+  }
+};
+
+inline void AssemblerX8632::emitUint8(uint8_t value) {
+  Buffer.emit<uint8_t>(value);
+}
+
+inline void AssemblerX8632::emitInt16(int16_t value) {
+  Buffer.emit<int16_t>(value);
+}
+
+inline void AssemblerX8632::emitInt32(int32_t value) {
+  Buffer.emit<int32_t>(value);
+}
+
+inline void AssemblerX8632::emitRegisterOperand(int reg, int rm) {
+  assert(reg >= 0 && reg < 8);
+  assert(rm >= 0 && rm < 8);
+  Buffer.emit<uint8_t>(0xC0 + (reg << 3) + rm);
+}
+
+template <typename RegType, typename RmType>
+inline void AssemblerX8632::emitXmmRegisterOperand(RegType reg, RmType rm) {
+  emitRegisterOperand(gprEncoding(reg), gprEncoding(rm));
+}
+
+inline void AssemblerX8632::emitOperandSizeOverride() { emitUint8(0x66); }
+
 using Label = AssemblerX8632::Label;
 using Immediate = AssemblerX8632::Immediate;
 
diff --git a/third_party/subzero/src/IceAssemblerX8632Base.h b/third_party/subzero/src/IceAssemblerX8632Base.h
deleted file mode 100644
index 91e18b8..0000000
--- a/third_party/subzero/src/IceAssemblerX8632Base.h
+++ /dev/null
@@ -1,930 +0,0 @@
-//===- subzero/src/IceAssemblerX8632Base.h - base x86 assembler -*- C++
-//-*---===//
-//
-// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
-// for details. All rights reserved. Use of this source code is governed by a
-// BSD-style license that can be found in the LICENSE file.
-//
-// Modified by the Subzero authors.
-//
-//===----------------------------------------------------------------------===//
-//
-//                        The Subzero Code Generator
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Defines the AssemblerX86 template class for x86, the base of all X86
-/// assemblers.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SUBZERO_SRC_ICEASSEMBLERX8632BASE_H
-#define SUBZERO_SRC_ICEASSEMBLERX8632BASE_H
-
-#include "IceAssembler.h"
-#include "IceDefs.h"
-#include "IceOperand.h"
-#include "IceTypes.h"
-#include "IceUtils.h"
-
-namespace Ice {
-namespace X8632 {
-
-template <typename TraitsType>
-class AssemblerX86Base : public ::Ice::Assembler {
-  AssemblerX86Base(const AssemblerX86Base &) = delete;
-  AssemblerX86Base &operator=(const AssemblerX86Base &) = delete;
-
-protected:
-  explicit AssemblerX86Base() : Assembler(Traits::AsmKind) {}
-
-public:
-  using Traits = TraitsType;
-  using Address = typename Traits::Address;
-  using ByteRegister = typename Traits::ByteRegister;
-  using BrCond = typename Traits::Cond::BrCond;
-  using CmppsCond = typename Traits::Cond::CmppsCond;
-  using GPRRegister = typename Traits::GPRRegister;
-  using Operand = typename Traits::Operand;
-  using XmmRegister = typename Traits::XmmRegister;
-
-  static constexpr int MAX_NOP_SIZE = 8;
-
-  static bool classof(const Assembler *Asm) {
-    return Asm->getKind() == Traits::AsmKind;
-  }
-
-  class Immediate {
-    Immediate(const Immediate &) = delete;
-    Immediate &operator=(const Immediate &) = delete;
-
-  public:
-    explicit Immediate(int32_t value) : value_(value) {}
-
-    explicit Immediate(AssemblerFixup *fixup) : fixup_(fixup) {}
-
-    int32_t value() const { return value_; }
-    AssemblerFixup *fixup() const { return fixup_; }
-
-    bool is_int8() const {
-      // We currently only allow 32-bit fixups, and they usually have value = 0,
-      // so if fixup_ != nullptr, it shouldn't be classified as int8/16.
-      return fixup_ == nullptr && Utils::IsInt(8, value_);
-    }
-    bool is_uint8() const {
-      return fixup_ == nullptr && Utils::IsUint(8, value_);
-    }
-    bool is_uint16() const {
-      return fixup_ == nullptr && Utils::IsUint(16, value_);
-    }
-
-  private:
-    const int32_t value_ = 0;
-    AssemblerFixup *fixup_ = nullptr;
-  };
-
-  /// X86 allows near and far jumps.
-  class Label final : public Ice::Label {
-    Label(const Label &) = delete;
-    Label &operator=(const Label &) = delete;
-
-  public:
-    Label() = default;
-    ~Label() = default;
-
-    void finalCheck() const override {
-      Ice::Label::finalCheck();
-      assert(!hasNear());
-    }
-
-    /// Returns the position of an earlier branch instruction which assumes that
-    /// this label is "near", and bumps iterator to the next near position.
-    intptr_t getNearPosition() {
-      assert(hasNear());
-      intptr_t Pos = UnresolvedNearPositions.back();
-      UnresolvedNearPositions.pop_back();
-      return Pos;
-    }
-
-    bool hasNear() const { return !UnresolvedNearPositions.empty(); }
-    bool isUnused() const override {
-      return Ice::Label::isUnused() && !hasNear();
-    }
-
-  private:
-    friend class AssemblerX86Base<TraitsType>;
-
-    void nearLinkTo(const Assembler &Asm, intptr_t position) {
-      if (Asm.getPreliminary())
-        return;
-      assert(!isBound());
-      UnresolvedNearPositions.push_back(position);
-    }
-
-    llvm::SmallVector<intptr_t, 20> UnresolvedNearPositions;
-  };
-
-public:
-  ~AssemblerX86Base() override;
-
-  static const bool kNearJump = true;
-  static const bool kFarJump = false;
-
-  void alignFunction() override;
-
-  SizeT getBundleAlignLog2Bytes() const override { return 5; }
-
-  const char *getAlignDirective() const override { return ".p2align"; }
-
-  llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const override {
-    static const uint8_t Padding[] = {0xF4};
-    return llvm::ArrayRef<uint8_t>(Padding, 1);
-  }
-
-  void padWithNop(intptr_t Padding) override {
-    while (Padding > MAX_NOP_SIZE) {
-      nop(MAX_NOP_SIZE);
-      Padding -= MAX_NOP_SIZE;
-    }
-    if (Padding)
-      nop(Padding);
-  }
-
-  Ice::Label *getCfgNodeLabel(SizeT NodeNumber) override;
-  void bindCfgNodeLabel(const CfgNode *Node) override;
-  Label *getOrCreateCfgNodeLabel(SizeT Number);
-  Label *getOrCreateLocalLabel(SizeT Number);
-  void bindLocalLabel(SizeT Number);
-
-  bool fixupIsPCRel(FixupKind Kind) const override {
-    // Currently assuming this is the only PC-rel relocation type used.
-    // TODO(jpp): Traits.PcRelTypes.count(Kind) != 0
-    return Kind == Traits::FK_PcRel;
-  }
-
-  // Operations to emit GPR instructions (and dispatch on operand type).
-  using TypedEmitGPR = void (AssemblerX86Base::*)(Type, GPRRegister);
-  using TypedEmitAddr = void (AssemblerX86Base::*)(Type, const Address &);
-  struct GPREmitterOneOp {
-    TypedEmitGPR Reg;
-    TypedEmitAddr Addr;
-  };
-
-  using TypedEmitGPRGPR = void (AssemblerX86Base::*)(Type, GPRRegister,
-                                                     GPRRegister);
-  using TypedEmitGPRAddr = void (AssemblerX86Base::*)(Type, GPRRegister,
-                                                      const Address &);
-  using TypedEmitGPRImm = void (AssemblerX86Base::*)(Type, GPRRegister,
-                                                     const Immediate &);
-  struct GPREmitterRegOp {
-    TypedEmitGPRGPR GPRGPR;
-    TypedEmitGPRAddr GPRAddr;
-    TypedEmitGPRImm GPRImm;
-  };
-
-  struct GPREmitterShiftOp {
-    // Technically, Addr/GPR and Addr/Imm are also allowed, but */Addr are
-    // not. In practice, we always normalize the Dest to a Register first.
-    TypedEmitGPRGPR GPRGPR;
-    TypedEmitGPRImm GPRImm;
-  };
-
-  using TypedEmitGPRGPRImm = void (AssemblerX86Base::*)(Type, GPRRegister,
-                                                        GPRRegister,
-                                                        const Immediate &);
-  struct GPREmitterShiftD {
-    // Technically AddrGPR and AddrGPRImm are also allowed, but in practice we
-    // always normalize Dest to a Register first.
-    TypedEmitGPRGPR GPRGPR;
-    TypedEmitGPRGPRImm GPRGPRImm;
-  };
-
-  using TypedEmitAddrGPR = void (AssemblerX86Base::*)(Type, const Address &,
-                                                      GPRRegister);
-  using TypedEmitAddrImm = void (AssemblerX86Base::*)(Type, const Address &,
-                                                      const Immediate &);
-  struct GPREmitterAddrOp {
-    TypedEmitAddrGPR AddrGPR;
-    TypedEmitAddrImm AddrImm;
-  };
-
-  // Operations to emit XMM instructions (and dispatch on operand type).
-  using TypedEmitXmmXmm = void (AssemblerX86Base::*)(Type, XmmRegister,
-                                                     XmmRegister);
-  using TypedEmitXmmAddr = void (AssemblerX86Base::*)(Type, XmmRegister,
-                                                      const Address &);
-  struct XmmEmitterRegOp {
-    TypedEmitXmmXmm XmmXmm;
-    TypedEmitXmmAddr XmmAddr;
-  };
-
-  using EmitXmmXmm = void (AssemblerX86Base::*)(XmmRegister, XmmRegister);
-  using EmitXmmAddr = void (AssemblerX86Base::*)(XmmRegister, const Address &);
-  using EmitAddrXmm = void (AssemblerX86Base::*)(const Address &, XmmRegister);
-  struct XmmEmitterMovOps {
-    EmitXmmXmm XmmXmm;
-    EmitXmmAddr XmmAddr;
-    EmitAddrXmm AddrXmm;
-  };
-
-  using TypedEmitXmmImm = void (AssemblerX86Base::*)(Type, XmmRegister,
-                                                     const Immediate &);
-
-  struct XmmEmitterShiftOp {
-    TypedEmitXmmXmm XmmXmm;
-    TypedEmitXmmAddr XmmAddr;
-    TypedEmitXmmImm XmmImm;
-  };
-
-  // Cross Xmm/GPR cast instructions.
-  template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
-    using TypedEmitRegs = void (AssemblerX86Base::*)(Type, DReg_t, Type,
-                                                     SReg_t);
-    using TypedEmitAddr = void (AssemblerX86Base::*)(Type, DReg_t, Type,
-                                                     const Address &);
-
-    TypedEmitRegs RegReg;
-    TypedEmitAddr RegAddr;
-  };
-
-  // Three operand (potentially) cross Xmm/GPR instructions. The last operand
-  // must be an immediate.
-  template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
-    using TypedEmitRegRegImm = void (AssemblerX86Base::*)(Type, DReg_t, SReg_t,
-                                                          const Immediate &);
-    using TypedEmitRegAddrImm = void (AssemblerX86Base::*)(Type, DReg_t,
-                                                           const Address &,
-                                                           const Immediate &);
-
-    TypedEmitRegRegImm RegRegImm;
-    TypedEmitRegAddrImm RegAddrImm;
-  };
-
-  /*
-   * Emit Machine Instructions.
-   */
-  void call(GPRRegister reg);
-  void call(const Address &address);
-  void call(const ConstantRelocatable *label); // not testable.
-  void call(const Immediate &abs_address);
-
-  static const intptr_t kCallExternalLabelSize = 5;
-
-  void pushl(GPRRegister reg);
-  void pushl(const Immediate &Imm);
-  void pushl(const ConstantRelocatable *Label);
-
-  void popl(GPRRegister reg);
-  void popl(const Address &address);
-
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::HasPusha>::type>
-  void pushal();
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::HasPopa>::type>
-  void popal();
-
-  void setcc(BrCond condition, ByteRegister dst);
-  void setcc(BrCond condition, const Address &address);
-
-  void mov(Type Ty, GPRRegister dst, const Immediate &src);
-  void mov(Type Ty, GPRRegister dst, GPRRegister src);
-  void mov(Type Ty, GPRRegister dst, const Address &src);
-  void mov(Type Ty, const Address &dst, GPRRegister src);
-  void mov(Type Ty, const Address &dst, const Immediate &imm);
-
-  template <typename T = Traits>
-  typename std::enable_if<T::Is64Bit, void>::type movabs(const GPRRegister Dst,
-                                                         uint64_t Imm64);
-  template <typename T = Traits>
-  typename std::enable_if<!T::Is64Bit, void>::type movabs(const GPRRegister,
-                                                          uint64_t) {
-    llvm::report_fatal_error("movabs is only supported in 64-bit x86 targets.");
-  }
-
-  void movzx(Type Ty, GPRRegister dst, GPRRegister src);
-  void movzx(Type Ty, GPRRegister dst, const Address &src);
-  void movsx(Type Ty, GPRRegister dst, GPRRegister src);
-  void movsx(Type Ty, GPRRegister dst, const Address &src);
-
-  void lea(Type Ty, GPRRegister dst, const Address &src);
-
-  void cmov(Type Ty, BrCond cond, GPRRegister dst, GPRRegister src);
-  void cmov(Type Ty, BrCond cond, GPRRegister dst, const Address &src);
-
-  void rep_movsb();
-
-  void movss(Type Ty, XmmRegister dst, const Address &src);
-  void movss(Type Ty, const Address &dst, XmmRegister src);
-  void movss(Type Ty, XmmRegister dst, XmmRegister src);
-
-  void movd(Type SrcTy, XmmRegister dst, GPRRegister src);
-  void movd(Type SrcTy, XmmRegister dst, const Address &src);
-  void movd(Type DestTy, GPRRegister dst, XmmRegister src);
-  void movd(Type DestTy, const Address &dst, XmmRegister src);
-
-  void movq(XmmRegister dst, XmmRegister src);
-  void movq(const Address &dst, XmmRegister src);
-  void movq(XmmRegister dst, const Address &src);
-
-  void addss(Type Ty, XmmRegister dst, XmmRegister src);
-  void addss(Type Ty, XmmRegister dst, const Address &src);
-  void subss(Type Ty, XmmRegister dst, XmmRegister src);
-  void subss(Type Ty, XmmRegister dst, const Address &src);
-  void mulss(Type Ty, XmmRegister dst, XmmRegister src);
-  void mulss(Type Ty, XmmRegister dst, const Address &src);
-  void divss(Type Ty, XmmRegister dst, XmmRegister src);
-  void divss(Type Ty, XmmRegister dst, const Address &src);
-
-  void movaps(XmmRegister dst, XmmRegister src);
-
-  void movups(XmmRegister dst, XmmRegister src);
-  void movups(XmmRegister dst, const Address &src);
-  void movups(const Address &dst, XmmRegister src);
-
-  void padd(Type Ty, XmmRegister dst, XmmRegister src);
-  void padd(Type Ty, XmmRegister dst, const Address &src);
-  void padds(Type Ty, XmmRegister dst, XmmRegister src);
-  void padds(Type Ty, XmmRegister dst, const Address &src);
-  void paddus(Type Ty, XmmRegister dst, XmmRegister src);
-  void paddus(Type Ty, XmmRegister dst, const Address &src);
-  void pand(Type Ty, XmmRegister dst, XmmRegister src);
-  void pand(Type Ty, XmmRegister dst, const Address &src);
-  void pandn(Type Ty, XmmRegister dst, XmmRegister src);
-  void pandn(Type Ty, XmmRegister dst, const Address &src);
-  void pmull(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmull(Type Ty, XmmRegister dst, const Address &src);
-  void pmulhw(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmulhw(Type Ty, XmmRegister dst, const Address &src);
-  void pmulhuw(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmulhuw(Type Ty, XmmRegister dst, const Address &src);
-  void pmaddwd(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmaddwd(Type Ty, XmmRegister dst, const Address &src);
-  void pmuludq(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmuludq(Type Ty, XmmRegister dst, const Address &src);
-  void por(Type Ty, XmmRegister dst, XmmRegister src);
-  void por(Type Ty, XmmRegister dst, const Address &src);
-  void psub(Type Ty, XmmRegister dst, XmmRegister src);
-  void psub(Type Ty, XmmRegister dst, const Address &src);
-  void psubs(Type Ty, XmmRegister dst, XmmRegister src);
-  void psubs(Type Ty, XmmRegister dst, const Address &src);
-  void psubus(Type Ty, XmmRegister dst, XmmRegister src);
-  void psubus(Type Ty, XmmRegister dst, const Address &src);
-  void pxor(Type Ty, XmmRegister dst, XmmRegister src);
-  void pxor(Type Ty, XmmRegister dst, const Address &src);
-
-  void psll(Type Ty, XmmRegister dst, XmmRegister src);
-  void psll(Type Ty, XmmRegister dst, const Address &src);
-  void psll(Type Ty, XmmRegister dst, const Immediate &src);
-
-  void psra(Type Ty, XmmRegister dst, XmmRegister src);
-  void psra(Type Ty, XmmRegister dst, const Address &src);
-  void psra(Type Ty, XmmRegister dst, const Immediate &src);
-  void psrl(Type Ty, XmmRegister dst, XmmRegister src);
-  void psrl(Type Ty, XmmRegister dst, const Address &src);
-  void psrl(Type Ty, XmmRegister dst, const Immediate &src);
-
-  void addps(Type Ty, XmmRegister dst, XmmRegister src);
-  void addps(Type Ty, XmmRegister dst, const Address &src);
-  void subps(Type Ty, XmmRegister dst, XmmRegister src);
-  void subps(Type Ty, XmmRegister dst, const Address &src);
-  void divps(Type Ty, XmmRegister dst, XmmRegister src);
-  void divps(Type Ty, XmmRegister dst, const Address &src);
-  void mulps(Type Ty, XmmRegister dst, XmmRegister src);
-  void mulps(Type Ty, XmmRegister dst, const Address &src);
-  void minps(Type Ty, XmmRegister dst, const Address &src);
-  void minps(Type Ty, XmmRegister dst, XmmRegister src);
-  void minss(Type Ty, XmmRegister dst, const Address &src);
-  void minss(Type Ty, XmmRegister dst, XmmRegister src);
-  void maxps(Type Ty, XmmRegister dst, const Address &src);
-  void maxps(Type Ty, XmmRegister dst, XmmRegister src);
-  void maxss(Type Ty, XmmRegister dst, const Address &src);
-  void maxss(Type Ty, XmmRegister dst, XmmRegister src);
-  void andnps(Type Ty, XmmRegister dst, const Address &src);
-  void andnps(Type Ty, XmmRegister dst, XmmRegister src);
-  void andps(Type Ty, XmmRegister dst, const Address &src);
-  void andps(Type Ty, XmmRegister dst, XmmRegister src);
-  void orps(Type Ty, XmmRegister dst, const Address &src);
-  void orps(Type Ty, XmmRegister dst, XmmRegister src);
-
-  void blendvps(Type Ty, XmmRegister dst, XmmRegister src);
-  void blendvps(Type Ty, XmmRegister dst, const Address &src);
-  void pblendvb(Type Ty, XmmRegister dst, XmmRegister src);
-  void pblendvb(Type Ty, XmmRegister dst, const Address &src);
-
-  void cmpps(Type Ty, XmmRegister dst, XmmRegister src, CmppsCond CmpCondition);
-  void cmpps(Type Ty, XmmRegister dst, const Address &src,
-             CmppsCond CmpCondition);
-
-  void sqrtps(XmmRegister dst);
-  void rsqrtps(XmmRegister dst);
-  void reciprocalps(XmmRegister dst);
-
-  void movhlps(XmmRegister dst, XmmRegister src);
-  void movlhps(XmmRegister dst, XmmRegister src);
-  void unpcklps(XmmRegister dst, XmmRegister src);
-  void unpckhps(XmmRegister dst, XmmRegister src);
-  void unpcklpd(XmmRegister dst, XmmRegister src);
-  void unpckhpd(XmmRegister dst, XmmRegister src);
-
-  void set1ps(XmmRegister dst, GPRRegister tmp, const Immediate &imm);
-
-  void sqrtpd(XmmRegister dst);
-
-  void pshufb(Type Ty, XmmRegister dst, XmmRegister src);
-  void pshufb(Type Ty, XmmRegister dst, const Address &src);
-  void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
-  void pshufd(Type Ty, XmmRegister dst, const Address &src,
-              const Immediate &mask);
-  void punpckl(Type Ty, XmmRegister Dst, XmmRegister Src);
-  void punpckl(Type Ty, XmmRegister Dst, const Address &Src);
-  void punpckh(Type Ty, XmmRegister Dst, XmmRegister Src);
-  void punpckh(Type Ty, XmmRegister Dst, const Address &Src);
-  void packss(Type Ty, XmmRegister Dst, XmmRegister Src);
-  void packss(Type Ty, XmmRegister Dst, const Address &Src);
-  void packus(Type Ty, XmmRegister Dst, XmmRegister Src);
-  void packus(Type Ty, XmmRegister Dst, const Address &Src);
-  void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
-  void shufps(Type Ty, XmmRegister dst, const Address &src,
-              const Immediate &mask);
-
-  void cvtdq2ps(Type, XmmRegister dst, XmmRegister src);
-  void cvtdq2ps(Type, XmmRegister dst, const Address &src);
-
-  void cvttps2dq(Type, XmmRegister dst, XmmRegister src);
-  void cvttps2dq(Type, XmmRegister dst, const Address &src);
-
-  void cvtps2dq(Type, XmmRegister dst, XmmRegister src);
-  void cvtps2dq(Type, XmmRegister dst, const Address &src);
-
-  void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, GPRRegister src);
-  void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, const Address &src);
-
-  void cvtfloat2float(Type SrcTy, XmmRegister dst, XmmRegister src);
-  void cvtfloat2float(Type SrcTy, XmmRegister dst, const Address &src);
-
-  void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
-  void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
-
-  void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
-  void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
-
-  void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
-  void ucomiss(Type Ty, XmmRegister a, const Address &b);
-
-  void movmsk(Type Ty, GPRRegister dst, XmmRegister src);
-
-  void sqrt(Type Ty, XmmRegister dst, const Address &src);
-  void sqrt(Type Ty, XmmRegister dst, XmmRegister src);
-
-  void xorps(Type Ty, XmmRegister dst, const Address &src);
-  void xorps(Type Ty, XmmRegister dst, XmmRegister src);
-
-  void insertps(Type Ty, XmmRegister dst, XmmRegister src,
-                const Immediate &imm);
-  void insertps(Type Ty, XmmRegister dst, const Address &src,
-                const Immediate &imm);
-
-  void pinsr(Type Ty, XmmRegister dst, GPRRegister src, const Immediate &imm);
-  void pinsr(Type Ty, XmmRegister dst, const Address &src,
-             const Immediate &imm);
-
-  void pextr(Type Ty, GPRRegister dst, XmmRegister src, const Immediate &imm);
-
-  void pmovsxdq(XmmRegister dst, XmmRegister src);
-
-  void pcmpeq(Type Ty, XmmRegister dst, XmmRegister src);
-  void pcmpeq(Type Ty, XmmRegister dst, const Address &src);
-  void pcmpgt(Type Ty, XmmRegister dst, XmmRegister src);
-  void pcmpgt(Type Ty, XmmRegister dst, const Address &src);
-
-  enum RoundingMode {
-    kRoundToNearest = 0x0,
-    kRoundDown = 0x1,
-    kRoundUp = 0x2,
-    kRoundToZero = 0x3
-  };
-  void round(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mode);
-  void round(Type Ty, XmmRegister dst, const Address &src,
-             const Immediate &mode);
-
-  //----------------------------------------------------------------------------
-  //
-  // Begin: X87 instructions. Only available when Traits::UsesX87.
-  //
-  //----------------------------------------------------------------------------
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fld(Type Ty, const typename T::Address &src);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fstp(Type Ty, const typename T::Address &dst);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fstp(typename T::X87STRegister st);
-
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fnstcw(const typename T::Address &dst);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fldcw(const typename T::Address &src);
-
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fistpl(const typename T::Address &dst);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fistps(const typename T::Address &dst);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fildl(const typename T::Address &src);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void filds(const typename T::Address &src);
-
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fincstp();
-  //----------------------------------------------------------------------------
-  //
-  // End: X87 instructions.
-  //
-  //----------------------------------------------------------------------------
-
-  void cmp(Type Ty, GPRRegister reg0, GPRRegister reg1);
-  void cmp(Type Ty, GPRRegister reg, const Address &address);
-  void cmp(Type Ty, GPRRegister reg, const Immediate &imm);
-  void cmp(Type Ty, const Address &address, GPRRegister reg);
-  void cmp(Type Ty, const Address &address, const Immediate &imm);
-
-  void test(Type Ty, GPRRegister reg0, GPRRegister reg1);
-  void test(Type Ty, GPRRegister reg, const Immediate &imm);
-  void test(Type Ty, const Address &address, GPRRegister reg);
-  void test(Type Ty, const Address &address, const Immediate &imm);
-
-  void And(Type Ty, GPRRegister dst, GPRRegister src);
-  void And(Type Ty, GPRRegister dst, const Address &address);
-  void And(Type Ty, GPRRegister dst, const Immediate &imm);
-  void And(Type Ty, const Address &address, GPRRegister reg);
-  void And(Type Ty, const Address &address, const Immediate &imm);
-
-  void Or(Type Ty, GPRRegister dst, GPRRegister src);
-  void Or(Type Ty, GPRRegister dst, const Address &address);
-  void Or(Type Ty, GPRRegister dst, const Immediate &imm);
-  void Or(Type Ty, const Address &address, GPRRegister reg);
-  void Or(Type Ty, const Address &address, const Immediate &imm);
-
-  void Xor(Type Ty, GPRRegister dst, GPRRegister src);
-  void Xor(Type Ty, GPRRegister dst, const Address &address);
-  void Xor(Type Ty, GPRRegister dst, const Immediate &imm);
-  void Xor(Type Ty, const Address &address, GPRRegister reg);
-  void Xor(Type Ty, const Address &address, const Immediate &imm);
-
-  void add(Type Ty, GPRRegister dst, GPRRegister src);
-  void add(Type Ty, GPRRegister reg, const Address &address);
-  void add(Type Ty, GPRRegister reg, const Immediate &imm);
-  void add(Type Ty, const Address &address, GPRRegister reg);
-  void add(Type Ty, const Address &address, const Immediate &imm);
-
-  void adc(Type Ty, GPRRegister dst, GPRRegister src);
-  void adc(Type Ty, GPRRegister dst, const Address &address);
-  void adc(Type Ty, GPRRegister reg, const Immediate &imm);
-  void adc(Type Ty, const Address &address, GPRRegister reg);
-  void adc(Type Ty, const Address &address, const Immediate &imm);
-
-  void sub(Type Ty, GPRRegister dst, GPRRegister src);
-  void sub(Type Ty, GPRRegister reg, const Address &address);
-  void sub(Type Ty, GPRRegister reg, const Immediate &imm);
-  void sub(Type Ty, const Address &address, GPRRegister reg);
-  void sub(Type Ty, const Address &address, const Immediate &imm);
-
-  void sbb(Type Ty, GPRRegister dst, GPRRegister src);
-  void sbb(Type Ty, GPRRegister reg, const Address &address);
-  void sbb(Type Ty, GPRRegister reg, const Immediate &imm);
-  void sbb(Type Ty, const Address &address, GPRRegister reg);
-  void sbb(Type Ty, const Address &address, const Immediate &imm);
-
-  void cbw();
-  void cwd();
-  void cdq();
-  template <typename T = Traits>
-  typename std::enable_if<T::Is64Bit, void>::type cqo();
-  template <typename T = Traits>
-  typename std::enable_if<!T::Is64Bit, void>::type cqo() {
-    llvm::report_fatal_error("CQO is only available in 64-bit x86 backends.");
-  }
-
-  void div(Type Ty, GPRRegister reg);
-  void div(Type Ty, const Address &address);
-
-  void idiv(Type Ty, GPRRegister reg);
-  void idiv(Type Ty, const Address &address);
-
-  void imul(Type Ty, GPRRegister dst, GPRRegister src);
-  void imul(Type Ty, GPRRegister reg, const Immediate &imm);
-  void imul(Type Ty, GPRRegister reg, const Address &address);
-
-  void imul(Type Ty, GPRRegister reg);
-  void imul(Type Ty, const Address &address);
-
-  void imul(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
-  void imul(Type Ty, GPRRegister dst, const Address &address,
-            const Immediate &imm);
-
-  void mul(Type Ty, GPRRegister reg);
-  void mul(Type Ty, const Address &address);
-
-  template <class T = Traits,
-            typename = typename std::enable_if<!T::Is64Bit>::type>
-  void incl(GPRRegister reg);
-  void incl(const Address &address);
-
-  template <class T = Traits,
-            typename = typename std::enable_if<!T::Is64Bit>::type>
-  void decl(GPRRegister reg);
-  void decl(const Address &address);
-
-  void rol(Type Ty, GPRRegister reg, const Immediate &imm);
-  void rol(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void rol(Type Ty, const Address &operand, GPRRegister shifter);
-
-  void shl(Type Ty, GPRRegister reg, const Immediate &imm);
-  void shl(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void shl(Type Ty, const Address &operand, GPRRegister shifter);
-
-  void shr(Type Ty, GPRRegister reg, const Immediate &imm);
-  void shr(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void shr(Type Ty, const Address &operand, GPRRegister shifter);
-
-  void sar(Type Ty, GPRRegister reg, const Immediate &imm);
-  void sar(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void sar(Type Ty, const Address &address, GPRRegister shifter);
-
-  void shld(Type Ty, GPRRegister dst, GPRRegister src);
-  void shld(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
-  void shld(Type Ty, const Address &operand, GPRRegister src);
-  void shrd(Type Ty, GPRRegister dst, GPRRegister src);
-  void shrd(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
-  void shrd(Type Ty, const Address &dst, GPRRegister src);
-
-  void neg(Type Ty, GPRRegister reg);
-  void neg(Type Ty, const Address &addr);
-  void notl(GPRRegister reg);
-
-  void bsf(Type Ty, GPRRegister dst, GPRRegister src);
-  void bsf(Type Ty, GPRRegister dst, const Address &src);
-  void bsr(Type Ty, GPRRegister dst, GPRRegister src);
-  void bsr(Type Ty, GPRRegister dst, const Address &src);
-
-  void bswap(Type Ty, GPRRegister reg);
-
-  void bt(GPRRegister base, GPRRegister offset);
-
-  void ret();
-  void ret(const Immediate &imm);
-
-  // 'size' indicates size in bytes and must be in the range 1..8.
-  void nop(int size = 1);
-  void int3();
-  void hlt();
-  void ud2();
-
-  // j(Label) is fully tested.
-  void j(BrCond condition, Label *label, bool near = kFarJump);
-  void j(BrCond condition, const ConstantRelocatable *label); // not testable.
-
-  void jmp(GPRRegister reg);
-  void jmp(Label *label, bool near = kFarJump);
-  void jmp(const ConstantRelocatable *label); // not testable.
-  void jmp(const Immediate &abs_address);
-
-  void mfence();
-
-  void lock();
-  void cmpxchg(Type Ty, const Address &address, GPRRegister reg, bool Locked);
-  void cmpxchg8b(const Address &address, bool Locked);
-  void xadd(Type Ty, const Address &address, GPRRegister reg, bool Locked);
-  void xchg(Type Ty, GPRRegister reg0, GPRRegister reg1);
-  void xchg(Type Ty, const Address &address, GPRRegister reg);
-
-  /// \name Intel Architecture Code Analyzer markers.
-  /// @{
-  void iaca_start();
-  void iaca_end();
-  /// @}
-
-  void emitSegmentOverride(uint8_t prefix);
-
-  intptr_t preferredLoopAlignment() { return 16; }
-  void align(intptr_t alignment, intptr_t offset);
-  void bind(Label *label);
-
-  intptr_t CodeSize() const { return Buffer.size(); }
-
-protected:
-  inline void emitUint8(uint8_t value);
-
-private:
-  ENABLE_MAKE_UNIQUE;
-
-  static constexpr Type RexTypeIrrelevant = IceType_i32;
-  static constexpr Type RexTypeForceRexW = IceType_i64;
-  static constexpr GPRRegister RexRegIrrelevant =
-      Traits::GPRRegister::Encoded_Reg_eax;
-
-  inline void emitInt16(int16_t value);
-  inline void emitInt32(int32_t value);
-  inline void emitRegisterOperand(int rm, int reg);
-  template <typename RegType, typename RmType>
-  inline void emitXmmRegisterOperand(RegType reg, RmType rm);
-  inline void emitOperandSizeOverride();
-
-  void emitOperand(int rm, const Operand &operand, RelocOffsetT Addend = 0);
-  void emitImmediate(Type ty, const Immediate &imm);
-  void emitComplexI8(int rm, const Operand &operand,
-                     const Immediate &immediate);
-  void emitComplex(Type Ty, int rm, const Operand &operand,
-                   const Immediate &immediate);
-  void emitLabel(Label *label, intptr_t instruction_size);
-  void emitLabelLink(Label *label);
-  void emitNearLabelLink(Label *label);
-
-  void emitGenericShift(int rm, Type Ty, GPRRegister reg, const Immediate &imm);
-  void emitGenericShift(int rm, Type Ty, const Operand &operand,
-                        GPRRegister shifter);
-
-  using LabelVector = std::vector<Label *>;
-  // A vector of pool-allocated x86 labels for CFG nodes.
-  LabelVector CfgNodeLabels;
-  // A vector of pool-allocated x86 labels for Local labels.
-  LabelVector LocalLabels;
-
-  Label *getOrCreateLabel(SizeT Number, LabelVector &Labels);
-
-  // The arith_int() methods factor out the commonality between the encodings
-  // of add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag
-  // parameter is statically asserted to be less than 8.
-  template <uint32_t Tag>
-  void arith_int(Type Ty, GPRRegister reg, const Immediate &imm);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, GPRRegister reg, const Address &address);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, const Address &address, GPRRegister reg);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, const Address &address, const Immediate &imm);
-
-  // gprEncoding returns Reg encoding for operand emission. For x86-64 we mask
-  // out the 4th bit as it is encoded in the REX.[RXB] bits. No other bits are
-  // touched because we don't want to mask errors.
-  template <typename RegType, typename T = Traits>
-  typename std::enable_if<T::Is64Bit, typename T::GPRRegister>::type
-  gprEncoding(const RegType Reg) {
-    return static_cast<GPRRegister>(static_cast<uint8_t>(Reg) & ~0x08);
-  }
-
-  template <typename RegType, typename T = Traits>
-  typename std::enable_if<!T::Is64Bit, typename T::GPRRegister>::type
-  gprEncoding(const RegType Reg) {
-    return static_cast<typename T::GPRRegister>(Reg);
-  }
-
-  template <typename RegType>
-  bool is8BitRegisterRequiringRex(const Type Ty, const RegType Reg) {
-    static constexpr bool IsGPR =
-        std::is_same<typename std::decay<RegType>::type, ByteRegister>::value ||
-        std::is_same<typename std::decay<RegType>::type, GPRRegister>::value;
-
-    // At this point in the assembler, we have encoded regs, so it is not
-    // possible to distinguish between the "new" low byte registers introduced
-    // in x86-64 and the legacy [abcd]h registers. Because x86, we may still
-    // see ah (div) in the assembler, so we allow it here.
-    //
-    // The "local" uint32_t Encoded_Reg_ah is needed because RegType is an
-    // enum that is not necessarily the same type of
-    // Traits::RegisterSet::Encoded_Reg_ah.
-    constexpr uint32_t Encoded_Reg_ah = Traits::RegisterSet::Encoded_Reg_ah;
-    return IsGPR && (Reg & 0x04) != 0 && (Reg & 0x08) == 0 &&
-           isByteSizedType(Ty) && (Reg != Encoded_Reg_ah);
-  }
-
-  // assembleAndEmitRex is used for determining which (if any) rex prefix
-  // should be emitted for the current instruction. It allows different types
-  // for Reg and Rm because they could be of different types (e.g., in
-  // mov[sz]x instructions.) If Addr is not nullptr, then Rm is ignored, and
-  // Rex.B is determined by Addr instead. TyRm is still used to determine
-  // Addr's size.
-  template <typename RegType, typename RmType, typename T = Traits>
-  typename std::enable_if<T::Is64Bit, void>::type
-  assembleAndEmitRex(const Type TyReg, const RegType Reg, const Type TyRm,
-                     const RmType Rm,
-                     const typename T::Address *Addr = nullptr) {
-    const uint8_t W = (TyReg == IceType_i64 || TyRm == IceType_i64)
-                          ? T::Operand::RexW
-                          : T::Operand::RexNone;
-    const uint8_t R = (Reg & 0x08) ? T::Operand::RexR : T::Operand::RexNone;
-    const uint8_t X = (Addr != nullptr)
-                          ? (typename T::Operand::RexBits)Addr->rexX()
-                          : T::Operand::RexNone;
-    const uint8_t B = (Addr != nullptr)
-                          ? (typename T::Operand::RexBits)Addr->rexB()
-                      : (Rm & 0x08) ? T::Operand::RexB
-                                    : T::Operand::RexNone;
-    const uint8_t Prefix = W | R | X | B;
-    if (Prefix != T::Operand::RexNone) {
-      emitUint8(Prefix);
-    } else if (is8BitRegisterRequiringRex(TyReg, Reg) ||
-               (Addr == nullptr && is8BitRegisterRequiringRex(TyRm, Rm))) {
-      emitUint8(T::Operand::RexBase);
-    }
-  }
-
-  template <typename RegType, typename RmType, typename T = Traits>
-  typename std::enable_if<!T::Is64Bit, void>::type
-  assembleAndEmitRex(const Type, const RegType, const Type, const RmType,
-                     const typename T::Address * = nullptr) {}
-
-  // emitRexRB is used for emitting a Rex prefix instructions with two
-  // explicit register operands in its mod-rm byte.
-  template <typename RegType, typename RmType>
-  void emitRexRB(const Type Ty, const RegType Reg, const RmType Rm) {
-    assembleAndEmitRex(Ty, Reg, Ty, Rm);
-  }
-
-  template <typename RegType, typename RmType>
-  void emitRexRB(const Type TyReg, const RegType Reg, const Type TyRm,
-                 const RmType Rm) {
-    assembleAndEmitRex(TyReg, Reg, TyRm, Rm);
-  }
-
-  // emitRexB is used for emitting a Rex prefix if one is needed on encoding
-  // the Reg field in an x86 instruction. It is invoked by the template when
-  // Reg is the single register operand in the instruction (e.g., push Reg.)
-  template <typename RmType> void emitRexB(const Type Ty, const RmType Rm) {
-    emitRexRB(Ty, RexRegIrrelevant, Ty, Rm);
-  }
-
-  // emitRex is used for emitting a Rex prefix for an address and a GPR. The
-  // address may contain zero, one, or two registers.
-  template <typename RegType>
-  void emitRex(const Type Ty, const Address &Addr, const RegType Reg) {
-    assembleAndEmitRex(Ty, Reg, Ty, RexRegIrrelevant, &Addr);
-  }
-
-  template <typename RegType>
-  void emitRex(const Type AddrTy, const Address &Addr, const Type TyReg,
-               const RegType Reg) {
-    assembleAndEmitRex(TyReg, Reg, AddrTy, RexRegIrrelevant, &Addr);
-  }
-};
-
-template <typename TraitsType>
-inline void AssemblerX86Base<TraitsType>::emitUint8(uint8_t value) {
-  Buffer.emit<uint8_t>(value);
-}
-
-template <typename TraitsType>
-inline void AssemblerX86Base<TraitsType>::emitInt16(int16_t value) {
-  Buffer.emit<int16_t>(value);
-}
-
-template <typename TraitsType>
-inline void AssemblerX86Base<TraitsType>::emitInt32(int32_t value) {
-  Buffer.emit<int32_t>(value);
-}
-
-template <typename TraitsType>
-inline void AssemblerX86Base<TraitsType>::emitRegisterOperand(int reg, int rm) {
-  assert(reg >= 0 && reg < 8);
-  assert(rm >= 0 && rm < 8);
-  Buffer.emit<uint8_t>(0xC0 + (reg << 3) + rm);
-}
-
-template <typename TraitsType>
-template <typename RegType, typename RmType>
-inline void AssemblerX86Base<TraitsType>::emitXmmRegisterOperand(RegType reg,
-                                                                 RmType rm) {
-  emitRegisterOperand(gprEncoding(reg), gprEncoding(rm));
-}
-
-template <typename TraitsType>
-inline void AssemblerX86Base<TraitsType>::emitOperandSizeOverride() {
-  emitUint8(0x66);
-}
-
-} // end of namespace X8632
-} // end of namespace Ice
-
-#include "IceAssemblerX8632BaseImpl.h"
-
-#endif // SUBZERO_SRC_ICEASSEMBLERX8632BASE_H
diff --git a/third_party/subzero/src/IceAssemblerX8632BaseImpl.h b/third_party/subzero/src/IceAssemblerX8632BaseImpl.h
deleted file mode 100644
index d323623..0000000
--- a/third_party/subzero/src/IceAssemblerX8632BaseImpl.h
+++ /dev/null
@@ -1,3917 +0,0 @@
-//===- subzero/src/IceAssemblerX86BaseImpl.h - base x86 assembler -*- C++ -*-=//
-// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
-// for details. All rights reserved. Use of this source code is governed by a
-// BSD-style license that can be found in the LICENSE file.
-//
-// Modified by the Subzero authors.
-//
-//===----------------------------------------------------------------------===//
-//
-//                        The Subzero Code Generator
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Implements the AssemblerX8632Base template class, which is the base
-/// Assembler class for X8632 assemblers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "IceAssemblerX8632Base.h"
-
-#include "IceCfg.h"
-#include "IceCfgNode.h"
-#include "IceOperand.h"
-
-namespace Ice {
-namespace X8632 {
-
-template <typename TraitsType>
-AssemblerX86Base<TraitsType>::~AssemblerX86Base() {
-  if (BuildDefs::asserts()) {
-    for (const Label *Label : CfgNodeLabels) {
-      Label->finalCheck();
-    }
-    for (const Label *Label : LocalLabels) {
-      Label->finalCheck();
-    }
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::alignFunction() {
-  const SizeT Align = 1 << getBundleAlignLog2Bytes();
-  SizeT BytesNeeded = Utils::OffsetToAlignment(Buffer.getPosition(), Align);
-  constexpr SizeT HltSize = 1;
-  while (BytesNeeded > 0) {
-    hlt();
-    BytesNeeded -= HltSize;
-  }
-}
-
-template <typename TraitsType>
-typename AssemblerX86Base<TraitsType>::Label *
-AssemblerX86Base<TraitsType>::getOrCreateLabel(SizeT Number,
-                                               LabelVector &Labels) {
-  Label *L = nullptr;
-  if (Number == Labels.size()) {
-    L = new (this->allocate<Label>()) Label();
-    Labels.push_back(L);
-    return L;
-  }
-  if (Number > Labels.size()) {
-    Utils::reserveAndResize(Labels, Number + 1);
-  }
-  L = Labels[Number];
-  if (!L) {
-    L = new (this->allocate<Label>()) Label();
-    Labels[Number] = L;
-  }
-  return L;
-}
-
-template <typename TraitsType>
-Ice::Label *AssemblerX86Base<TraitsType>::getCfgNodeLabel(SizeT NodeNumber) {
-  assert(NodeNumber < CfgNodeLabels.size());
-  return CfgNodeLabels[NodeNumber];
-}
-
-template <typename TraitsType>
-typename AssemblerX86Base<TraitsType>::Label *
-AssemblerX86Base<TraitsType>::getOrCreateCfgNodeLabel(SizeT NodeNumber) {
-  return getOrCreateLabel(NodeNumber, CfgNodeLabels);
-}
-
-template <typename TraitsType>
-typename AssemblerX86Base<TraitsType>::Label *
-AssemblerX86Base<TraitsType>::getOrCreateLocalLabel(SizeT Number) {
-  return getOrCreateLabel(Number, LocalLabels);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bindCfgNodeLabel(const CfgNode *Node) {
-  assert(!getPreliminary());
-  Label *L = getOrCreateCfgNodeLabel(Node->getIndex());
-  this->bind(L);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bindLocalLabel(SizeT Number) {
-  Label *L = getOrCreateLocalLabel(Number);
-  if (!getPreliminary())
-    this->bind(L);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::call(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexB(RexTypeIrrelevant, reg);
-  emitUint8(0xFF);
-  emitRegisterOperand(2, gprEncoding(reg));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::call(const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
-  emitUint8(0xFF);
-  emitOperand(2, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::call(const ConstantRelocatable *label) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  intptr_t call_start = Buffer.getPosition();
-  emitUint8(0xE8);
-  auto *Fixup = this->createFixup(Traits::FK_PcRel, label);
-  Fixup->set_addend(-4);
-  emitFixup(Fixup);
-  emitInt32(0);
-  assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
-  (void)call_start;
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::call(const Immediate &abs_address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  intptr_t call_start = Buffer.getPosition();
-  emitUint8(0xE8);
-  auto *Fixup = this->createFixup(Traits::FK_PcRel, AssemblerFixup::NullSymbol);
-  Fixup->set_addend(abs_address.value() - 4);
-  emitFixup(Fixup);
-  emitInt32(0);
-  assert((Buffer.getPosition() - call_start) == kCallExternalLabelSize);
-  (void)call_start;
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pushl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexB(RexTypeIrrelevant, reg);
-  emitUint8(0x50 + gprEncoding(reg));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pushl(const Immediate &Imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x68);
-  emitInt32(Imm.value());
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pushl(const ConstantRelocatable *Label) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x68);
-  emitFixup(this->createFixup(Traits::FK_Abs, Label));
-  // In x86-32, the emitted value is an addend to the relocation. Therefore, we
-  // must emit a 0 (because we're pushing an absolute relocation.)
-  // In x86-64, the emitted value does not matter (the addend lives in the
-  // relocation record as an extra field.)
-  emitInt32(0);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::popl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // Any type that would not force a REX prefix to be emitted can be provided
-  // here.
-  emitRexB(RexTypeIrrelevant, reg);
-  emitUint8(0x58 + gprEncoding(reg));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::popl(const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
-  emitUint8(0x8F);
-  emitOperand(0, address);
-}
-
-template <typename TraitsType>
-template <typename, typename>
-void AssemblerX86Base<TraitsType>::pushal() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x60);
-}
-
-template <typename TraitsType>
-template <typename, typename>
-void AssemblerX86Base<TraitsType>::popal() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x61);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::setcc(BrCond condition, ByteRegister dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexB(IceType_i8, dst);
-  emitUint8(0x0F);
-  emitUint8(0x90 + condition);
-  emitUint8(0xC0 + gprEncoding(dst));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::setcc(BrCond condition,
-                                         const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
-  emitUint8(0x0F);
-  emitUint8(0x90 + condition);
-  emitOperand(0, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
-                                       const Immediate &imm) {
-  assert(Ty != IceType_i64 && "i64 not supported yet.");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexB(Ty, dst);
-  if (isByteSizedType(Ty)) {
-    emitUint8(0xB0 + gprEncoding(dst));
-    emitUint8(imm.value() & 0xFF);
-  } else {
-    // TODO(jpp): When removing the assertion above ensure that in x86-64 we
-    // emit a 64-bit immediate.
-    emitUint8(0xB8 + gprEncoding(dst));
-    emitImmediate(Ty, imm);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, src, dst);
-  if (isByteSizedType(Ty)) {
-    emitUint8(0x88);
-  } else {
-    emitUint8(0x89);
-  }
-  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
-                                       const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, src, dst);
-  if (isByteSizedType(Ty)) {
-    emitUint8(0x8A);
-  } else {
-    emitUint8(0x8B);
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mov(Type Ty, const Address &dst,
-                                       GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, dst, src);
-  if (isByteSizedType(Ty)) {
-    emitUint8(0x88);
-  } else {
-    emitUint8(0x89);
-  }
-  emitOperand(gprEncoding(src), dst);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mov(Type Ty, const Address &dst,
-                                       const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, dst, RexRegIrrelevant);
-  if (isByteSizedType(Ty)) {
-    emitUint8(0xC6);
-    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
-    emitOperand(0, dst, OffsetFromNextInstruction);
-    emitUint8(imm.value() & 0xFF);
-  } else {
-    emitUint8(0xC7);
-    const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
-    emitOperand(0, dst, OffsetFromNextInstruction);
-    emitImmediate(Ty, imm);
-  }
-}
-
-template <typename TraitsType>
-template <typename T>
-typename std::enable_if<T::Is64Bit, void>::type
-AssemblerX86Base<TraitsType>::movabs(const GPRRegister Dst, uint64_t Imm64) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  const bool NeedsRexW = (Imm64 & ~0xFFFFFFFFull) != 0;
-  const Type RexType = NeedsRexW ? RexTypeForceRexW : RexTypeIrrelevant;
-  emitRexB(RexType, Dst);
-  emitUint8(0xB8 | gprEncoding(Dst));
-  // When emitting Imm64, we don't have to mask out the upper 32 bits for
-  // emitInt32 will/should only emit a 32-bit constant. In reality, we are
-  // paranoid, so we go ahead an mask the upper bits out anyway.
-  emitInt32(Imm64 & 0xFFFFFFFF);
-  if (NeedsRexW)
-    emitInt32((Imm64 >> 32) & 0xFFFFFFFF);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movzx(Type SrcTy, GPRRegister dst,
-                                         GPRRegister src) {
-  if (Traits::Is64Bit && SrcTy == IceType_i32) {
-    // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
-    // operand to 64-bit.
-    mov(IceType_i32, dst, src);
-    return;
-  }
-
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  bool ByteSized = isByteSizedType(SrcTy);
-  assert(ByteSized || SrcTy == IceType_i16);
-  emitRexRB(RexTypeIrrelevant, dst, SrcTy, src);
-  emitUint8(0x0F);
-  emitUint8(ByteSized ? 0xB6 : 0xB7);
-  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movzx(Type SrcTy, GPRRegister dst,
-                                         const Address &src) {
-  if (Traits::Is64Bit && SrcTy == IceType_i32) {
-    // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
-    // operand to 64-bit.
-    mov(IceType_i32, dst, src);
-    return;
-  }
-
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  bool ByteSized = isByteSizedType(SrcTy);
-  assert(ByteSized || SrcTy == IceType_i16);
-  emitRex(SrcTy, src, RexTypeIrrelevant, dst);
-  emitUint8(0x0F);
-  emitUint8(ByteSized ? 0xB6 : 0xB7);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movsx(Type SrcTy, GPRRegister dst,
-                                         GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  bool ByteSized = isByteSizedType(SrcTy);
-  emitRexRB(RexTypeForceRexW, dst, SrcTy, src);
-  if (ByteSized || SrcTy == IceType_i16) {
-    emitUint8(0x0F);
-    emitUint8(ByteSized ? 0xBE : 0xBF);
-  } else {
-    assert(Traits::Is64Bit && SrcTy == IceType_i32);
-    emitUint8(0x63);
-  }
-  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movsx(Type SrcTy, GPRRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  bool ByteSized = isByteSizedType(SrcTy);
-  emitRex(SrcTy, src, RexTypeForceRexW, dst);
-  if (ByteSized || SrcTy == IceType_i16) {
-    emitUint8(0x0F);
-    emitUint8(ByteSized ? 0xBE : 0xBF);
-  } else {
-    assert(Traits::Is64Bit && SrcTy == IceType_i32);
-    emitUint8(0x63);
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::lea(Type Ty, GPRRegister dst,
-                                       const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
-         (Traits::Is64Bit && Ty == IceType_i64));
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, src, dst);
-  emitUint8(0x8D);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmov(Type Ty, BrCond cond, GPRRegister dst,
-                                        GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  else
-    assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
-  emitRexRB(Ty, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x40 + cond);
-  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmov(Type Ty, BrCond cond, GPRRegister dst,
-                                        const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  else
-    assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
-  emitRex(Ty, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x40 + cond);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::rep_movsb() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF3);
-  emitUint8(0xA4);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x10);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movss(Type Ty, const Address &dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRex(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x11);
-  emitOperand(gprEncoding(src), dst);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRexRB(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x11);
-  emitXmmRegisterOperand(src, dst);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movd(Type SrcTy, XmmRegister dst,
-                                        GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(SrcTy, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x6E);
-  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movd(Type SrcTy, XmmRegister dst,
-                                        const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(SrcTy, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x6E);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movd(Type DestTy, GPRRegister dst,
-                                        XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(DestTy, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x7E);
-  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movd(Type DestTy, const Address &dst,
-                                        XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(DestTy, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x7E);
-  emitOperand(gprEncoding(src), dst);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movq(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF3);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x7E);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movq(const Address &dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xD6);
-  emitOperand(gprEncoding(src), dst);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movq(XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF3);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x7E);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::addss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x58);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::addss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x58);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::subss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5C);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::subss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5C);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mulss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x59);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mulss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x59);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::divss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5E);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::divss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5E);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fld(Type Ty,
-                                       const typename T::Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
-  emitOperand(0, src);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fstp(Type Ty,
-                                        const typename T::Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
-  emitOperand(3, dst);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fstp(typename T::X87STRegister st) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDD);
-  emitUint8(0xD8 + st);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movaps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x28);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movups(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x10);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movups(XmmRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x10);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movups(const Address &dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x11);
-  emitOperand(gprEncoding(src), dst);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::padd(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xFC);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xFD);
-  } else {
-    emitUint8(0xFE);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::padd(Type Ty, XmmRegister dst,
-                                        const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xFC);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xFD);
-  } else {
-    emitUint8(0xFE);
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xEC);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xED);
-  } else {
-    assert(false && "Unexpected padds operand type");
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xEC);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xED);
-  } else {
-    assert(false && "Unexpected padds operand type");
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xDC);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xDD);
-  } else {
-    assert(false && "Unexpected paddus operand type");
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
-                                          const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xDC);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xDD);
-  } else {
-    assert(false && "Unexpected paddus operand type");
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pand(Type /* Ty */, XmmRegister dst,
-                                        XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xDB);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pand(Type /* Ty */, XmmRegister dst,
-                                        const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xDB);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pandn(Type /* Ty */, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xDF);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pandn(Type /* Ty */, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xDF);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmull(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xD5);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0x38);
-    emitUint8(0x40);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmull(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xD5);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0x38);
-    emitUint8(0x40);
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmulhw(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  assert(Ty == IceType_v8i16);
-  (void)Ty;
-  emitUint8(0xE5);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmulhw(Type Ty, XmmRegister dst,
-                                          const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  assert(Ty == IceType_v8i16);
-  (void)Ty;
-  emitUint8(0xE5);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmulhuw(Type Ty, XmmRegister dst,
-                                           XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  assert(Ty == IceType_v8i16);
-  (void)Ty;
-  emitUint8(0xE4);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmulhuw(Type Ty, XmmRegister dst,
-                                           const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  assert(Ty == IceType_v8i16);
-  (void)Ty;
-  emitUint8(0xE4);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmaddwd(Type Ty, XmmRegister dst,
-                                           XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  assert(Ty == IceType_v8i16);
-  (void)Ty;
-  emitUint8(0xF5);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmaddwd(Type Ty, XmmRegister dst,
-                                           const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  assert(Ty == IceType_v8i16);
-  (void)Ty;
-  emitUint8(0xF5);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmuludq(Type /* Ty */, XmmRegister dst,
-                                           XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xF4);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmuludq(Type /* Ty */, XmmRegister dst,
-                                           const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xF4);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::por(Type /* Ty */, XmmRegister dst,
-                                       XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xEB);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::por(Type /* Ty */, XmmRegister dst,
-                                       const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xEB);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psub(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xF8);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xF9);
-  } else {
-    emitUint8(0xFA);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psub(Type Ty, XmmRegister dst,
-                                        const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xF8);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xF9);
-  } else {
-    emitUint8(0xFA);
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xE8);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xE9);
-  } else {
-    assert(false && "Unexpected psubs operand type");
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xE8);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xE9);
-  } else {
-    assert(false && "Unexpected psubs operand type");
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xD8);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xD9);
-  } else {
-    assert(false && "Unexpected psubus operand type");
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
-                                          const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0xD8);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0xD9);
-  } else {
-    assert(false && "Unexpected psubus operand type");
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pxor(Type /* Ty */, XmmRegister dst,
-                                        XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xEF);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pxor(Type /* Ty */, XmmRegister dst,
-                                        const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xEF);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xF1);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0xF2);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
-                                        const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xF1);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0xF2);
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
-                                        const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_int8());
-  emitUint8(0x66);
-  emitRexB(RexTypeIrrelevant, dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0x71);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0x72);
-  }
-  emitRegisterOperand(6, gprEncoding(dst));
-  emitUint8(imm.value() & 0xFF);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xE1);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0xE2);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
-                                        const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xE1);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0xE2);
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
-                                        const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_int8());
-  emitUint8(0x66);
-  emitRexB(RexTypeIrrelevant, dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0x71);
-  } else {
-    assert(Ty == IceType_i32);
-    emitUint8(0x72);
-  }
-  emitRegisterOperand(4, gprEncoding(dst));
-  emitUint8(imm.value() & 0xFF);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xD1);
-  } else if (Ty == IceType_f64) {
-    emitUint8(0xD3);
-  } else {
-    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
-    emitUint8(0xD2);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
-                                        const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xD1);
-  } else if (Ty == IceType_f64) {
-    emitUint8(0xD3);
-  } else {
-    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
-    emitUint8(0xD2);
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
-                                        const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_int8());
-  emitUint8(0x66);
-  emitRexB(RexTypeIrrelevant, dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0x71);
-  } else if (Ty == IceType_f64) {
-    emitUint8(0x73);
-  } else {
-    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_v4f32);
-    emitUint8(0x72);
-  }
-  emitRegisterOperand(2, gprEncoding(dst));
-  emitUint8(imm.value() & 0xFF);
-}
-
-// {add,sub,mul,div}ps are given a Ty parameter for consistency with
-// {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows addpd, etc.,
-// we can use the Ty parameter to decide on adding a 0x66 prefix.
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::addps(Type /* Ty */, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x58);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::addps(Type /* Ty */, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x58);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::subps(Type /* Ty */, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5C);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::subps(Type /* Ty */, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5C);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::divps(Type /* Ty */, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5E);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::divps(Type /* Ty */, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5E);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mulps(Type /* Ty */, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x59);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mulps(Type /* Ty */, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x59);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::minps(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5D);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::minps(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5D);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::minss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5D);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::minss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5D);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::maxps(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5F);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::maxps(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5F);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::maxss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5F);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::maxss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5F);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::andnps(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x55);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::andnps(Type Ty, XmmRegister dst,
-                                          const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x55);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::andps(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x54);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::andps(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x54);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::orps(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x56);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::orps(Type Ty, XmmRegister dst,
-                                        const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x56);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::blendvps(Type /* Ty */, XmmRegister dst,
-                                            XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x14);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::blendvps(Type /* Ty */, XmmRegister dst,
-                                            const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x14);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pblendvb(Type /* Ty */, XmmRegister dst,
-                                            XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x10);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pblendvb(Type /* Ty */, XmmRegister dst,
-                                            const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x10);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmpps(Type Ty, XmmRegister dst,
-                                         XmmRegister src,
-                                         CmppsCond CmpCondition) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_f64)
-    emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xC2);
-  emitXmmRegisterOperand(dst, src);
-  emitUint8(CmpCondition);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmpps(Type Ty, XmmRegister dst,
-                                         const Address &src,
-                                         CmppsCond CmpCondition) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_f64)
-    emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xC2);
-  static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
-  emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
-  emitUint8(CmpCondition);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrtps(XmmRegister dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, dst);
-  emitUint8(0x0F);
-  emitUint8(0x51);
-  emitXmmRegisterOperand(dst, dst);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::rsqrtps(XmmRegister dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, dst);
-  emitUint8(0x0F);
-  emitUint8(0x52);
-  emitXmmRegisterOperand(dst, dst);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::reciprocalps(XmmRegister dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, dst);
-  emitUint8(0x0F);
-  emitUint8(0x53);
-  emitXmmRegisterOperand(dst, dst);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movhlps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x12);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movlhps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x16);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::unpcklps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x14);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::unpckhps(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x15);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::unpcklpd(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x14);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::unpckhpd(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x15);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::set1ps(XmmRegister dst, GPRRegister tmp1,
-                                          const Immediate &imm) {
-  // Load 32-bit immediate value into tmp1.
-  mov(IceType_i32, tmp1, imm);
-  // Move value from tmp1 into dst.
-  movd(IceType_i32, dst, tmp1);
-  // Broadcast low lane into other three lanes.
-  shufps(RexTypeIrrelevant, dst, dst, Immediate(0x0));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pshufb(Type /* Ty */, XmmRegister dst,
-                                          XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x00);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pshufb(Type /* Ty */, XmmRegister dst,
-                                          const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x00);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst,
-                                          XmmRegister src,
-                                          const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x70);
-  emitXmmRegisterOperand(dst, src);
-  assert(imm.is_uint8());
-  emitUint8(imm.value());
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst,
-                                          const Address &src,
-                                          const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x70);
-  static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
-  emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
-  assert(imm.is_uint8());
-  emitUint8(imm.value());
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::punpckl(Type Ty, XmmRegister Dst,
-                                           XmmRegister Src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, Dst, Src);
-  emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
-    emitUint8(0x62);
-  } else if (Ty == IceType_v8i16) {
-    emitUint8(0x61);
-  } else if (Ty == IceType_v16i8) {
-    emitUint8(0x60);
-  } else {
-    assert(false && "Unexpected vector unpack operand type");
-  }
-  emitXmmRegisterOperand(Dst, Src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::punpckl(Type Ty, XmmRegister Dst,
-                                           const Address &Src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, Src, Dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
-    emitUint8(0x62);
-  } else if (Ty == IceType_v8i16) {
-    emitUint8(0x61);
-  } else if (Ty == IceType_v16i8) {
-    emitUint8(0x60);
-  } else {
-    assert(false && "Unexpected vector unpack operand type");
-  }
-  emitOperand(gprEncoding(Dst), Src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::punpckh(Type Ty, XmmRegister Dst,
-                                           XmmRegister Src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, Dst, Src);
-  emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
-    emitUint8(0x6A);
-  } else if (Ty == IceType_v8i16) {
-    emitUint8(0x69);
-  } else if (Ty == IceType_v16i8) {
-    emitUint8(0x68);
-  } else {
-    assert(false && "Unexpected vector unpack operand type");
-  }
-  emitXmmRegisterOperand(Dst, Src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::punpckh(Type Ty, XmmRegister Dst,
-                                           const Address &Src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, Src, Dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
-    emitUint8(0x6A);
-  } else if (Ty == IceType_v8i16) {
-    emitUint8(0x69);
-  } else if (Ty == IceType_v16i8) {
-    emitUint8(0x68);
-  } else {
-    assert(false && "Unexpected vector unpack operand type");
-  }
-  emitOperand(gprEncoding(Dst), Src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
-                                          XmmRegister Src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, Dst, Src);
-  emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
-    emitUint8(0x6B);
-  } else if (Ty == IceType_v8i16) {
-    emitUint8(0x63);
-  } else {
-    assert(false && "Unexpected vector pack operand type");
-  }
-  emitXmmRegisterOperand(Dst, Src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
-                                          const Address &Src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, Src, Dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
-    emitUint8(0x6B);
-  } else if (Ty == IceType_v8i16) {
-    emitUint8(0x63);
-  } else {
-    assert(false && "Unexpected vector pack operand type");
-  }
-  emitOperand(gprEncoding(Dst), Src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
-                                          XmmRegister Src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, Dst, Src);
-  emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
-    emitUint8(0x38);
-    emitUint8(0x2B);
-  } else if (Ty == IceType_v8i16) {
-    emitUint8(0x67);
-  } else {
-    assert(false && "Unexpected vector pack operand type");
-  }
-  emitXmmRegisterOperand(Dst, Src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
-                                          const Address &Src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, Src, Dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_v4i32 || Ty == IceType_v4f32) {
-    emitUint8(0x38);
-    emitUint8(0x2B);
-  } else if (Ty == IceType_v8i16) {
-    emitUint8(0x67);
-  } else {
-    assert(false && "Unexpected vector pack operand type");
-  }
-  emitOperand(gprEncoding(Dst), Src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
-                                          XmmRegister src,
-                                          const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xC6);
-  emitXmmRegisterOperand(dst, src);
-  assert(imm.is_uint8());
-  emitUint8(imm.value());
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
-                                          const Address &src,
-                                          const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xC6);
-  static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
-  emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
-  assert(imm.is_uint8());
-  emitUint8(imm.value());
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrtpd(XmmRegister dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, dst);
-  emitUint8(0x0F);
-  emitUint8(0x51);
-  emitXmmRegisterOperand(dst, dst);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
-                                            XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5B);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
-                                            const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5B);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvttps2dq(Type /* Ignore */, XmmRegister dst,
-                                             XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF3);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5B);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvttps2dq(Type /* Ignore */, XmmRegister dst,
-                                             const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF3);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5B);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtps2dq(Type /* Ignore */, XmmRegister dst,
-                                            XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5B);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtps2dq(Type /* Ignore */, XmmRegister dst,
-                                            const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5B);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtsi2ss(Type DestTy, XmmRegister dst,
-                                            Type SrcTy, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
-  emitRexRB(SrcTy, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x2A);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtsi2ss(Type DestTy, XmmRegister dst,
-                                            Type SrcTy, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
-  emitRex(SrcTy, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x2A);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtfloat2float(Type SrcTy, XmmRegister dst,
-                                                  XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // ss2sd or sd2ss
-  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x5A);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtfloat2float(Type SrcTy, XmmRegister dst,
-                                                  const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x5A);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvttss2si(Type DestTy, GPRRegister dst,
-                                             Type SrcTy, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
-  emitRexRB(DestTy, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x2C);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvttss2si(Type DestTy, GPRRegister dst,
-                                             Type SrcTy, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
-  emitRex(DestTy, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x2C);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtss2si(Type DestTy, GPRRegister dst,
-                                            Type SrcTy, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
-  emitRexRB(DestTy, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x2D);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtss2si(Type DestTy, GPRRegister dst,
-                                            Type SrcTy, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
-  emitRex(DestTy, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x2D);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::ucomiss(Type Ty, XmmRegister a,
-                                           XmmRegister b) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_f64)
-    emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, a, b);
-  emitUint8(0x0F);
-  emitUint8(0x2E);
-  emitXmmRegisterOperand(a, b);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::ucomiss(Type Ty, XmmRegister a,
-                                           const Address &b) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_f64)
-    emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, b, a);
-  emitUint8(0x0F);
-  emitUint8(0x2E);
-  emitOperand(gprEncoding(a), b);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movmsk(Type Ty, GPRRegister dst,
-                                          XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_v16i8) {
-    emitUint8(0x66);
-  } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) {
-    // No operand size prefix
-  } else {
-    assert(false && "Unexpected movmsk operand type");
-  }
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (Ty == IceType_v16i8) {
-    emitUint8(0xD7);
-  } else if (Ty == IceType_v4f32 || Ty == IceType_v4i32) {
-    emitUint8(0x50);
-  } else {
-    assert(false && "Unexpected movmsk operand type");
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrt(Type Ty, XmmRegister dst,
-                                        const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (isScalarFloatingType(Ty))
-    emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x51);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrt(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (isScalarFloatingType(Ty))
-    emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x51);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::xorps(Type Ty, XmmRegister dst,
-                                         const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x57);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::xorps(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (!isFloat32Asserting32Or64(Ty))
-    emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x57);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::insertps(Type Ty, XmmRegister dst,
-                                            XmmRegister src,
-                                            const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_uint8());
-  assert(isVectorFloatingType(Ty));
-  (void)Ty;
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x3A);
-  emitUint8(0x21);
-  emitXmmRegisterOperand(dst, src);
-  emitUint8(imm.value());
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::insertps(Type Ty, XmmRegister dst,
-                                            const Address &src,
-                                            const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_uint8());
-  assert(isVectorFloatingType(Ty));
-  (void)Ty;
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x3A);
-  emitUint8(0x21);
-  static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
-  emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
-  emitUint8(imm.value());
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pinsr(Type Ty, XmmRegister dst,
-                                         GPRRegister src,
-                                         const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_uint8());
-  emitUint8(0x66);
-  emitRexRB(Ty, dst, src);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xC4);
-  } else {
-    emitUint8(0x3A);
-    emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
-  }
-  emitXmmRegisterOperand(dst, src);
-  emitUint8(imm.value());
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pinsr(Type Ty, XmmRegister dst,
-                                         const Address &src,
-                                         const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_uint8());
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (Ty == IceType_i16) {
-    emitUint8(0xC4);
-  } else {
-    emitUint8(0x3A);
-    emitUint8(isByteSizedType(Ty) ? 0x20 : 0x22);
-  }
-  static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
-  emitOperand(gprEncoding(dst), src, OffsetFromNextInstruction);
-  emitUint8(imm.value());
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pextr(Type Ty, GPRRegister dst,
-                                         XmmRegister src,
-                                         const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(imm.is_uint8());
-  if (Ty == IceType_i16) {
-    emitUint8(0x66);
-    emitRexRB(Ty, dst, src);
-    emitUint8(0x0F);
-    emitUint8(0xC5);
-    emitXmmRegisterOperand(dst, src);
-    emitUint8(imm.value());
-  } else {
-    emitUint8(0x66);
-    emitRexRB(Ty, src, dst);
-    emitUint8(0x0F);
-    emitUint8(0x3A);
-    emitUint8(isByteSizedType(Ty) ? 0x14 : 0x16);
-    // SSE 4.1 versions are "MRI" because dst can be mem, while pextrw (SSE2)
-    // is RMI because dst must be reg.
-    emitXmmRegisterOperand(src, dst);
-    emitUint8(imm.value());
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmovsxdq(XmmRegister dst, XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x38);
-  emitUint8(0x25);
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pcmpeq(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0x74);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0x75);
-  } else {
-    emitUint8(0x76);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pcmpeq(Type Ty, XmmRegister dst,
-                                          const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0x74);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0x75);
-  } else {
-    emitUint8(0x76);
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0x64);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0x65);
-  } else {
-    emitUint8(0x66);
-  }
-  emitXmmRegisterOperand(dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst,
-                                          const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty)) {
-    emitUint8(0x64);
-  } else if (Ty == IceType_i16) {
-    emitUint8(0x65);
-  } else {
-    emitUint8(0x66);
-  }
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
-                                         XmmRegister src,
-                                         const Immediate &mode) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRexRB(RexTypeIrrelevant, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0x3A);
-  switch (Ty) {
-  case IceType_v4f32:
-    emitUint8(0x08);
-    break;
-  case IceType_f32:
-    emitUint8(0x0A);
-    break;
-  case IceType_f64:
-    emitUint8(0x0B);
-    break;
-  default:
-    assert(false && "Unsupported round operand type");
-  }
-  emitXmmRegisterOperand(dst, src);
-  // Mask precision exeption.
-  emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
-                                         const Address &src,
-                                         const Immediate &mode) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x66);
-  emitRex(RexTypeIrrelevant, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0x3A);
-  switch (Ty) {
-  case IceType_v4f32:
-    emitUint8(0x08);
-    break;
-  case IceType_f32:
-    emitUint8(0x0A);
-    break;
-  case IceType_f64:
-    emitUint8(0x0B);
-    break;
-  default:
-    assert(false && "Unsupported round operand type");
-  }
-  emitOperand(gprEncoding(dst), src);
-  // Mask precision exeption.
-  emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fnstcw(const typename T::Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xD9);
-  emitOperand(7, dst);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fldcw(const typename T::Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xD9);
-  emitOperand(5, src);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fistpl(const typename T::Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDF);
-  emitOperand(7, dst);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fistps(const typename T::Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDB);
-  emitOperand(3, dst);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fildl(const typename T::Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDF);
-  emitOperand(5, src);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::filds(const typename T::Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDB);
-  emitOperand(0, src);
-}
-
-template <typename TraitsType>
-template <typename, typename>
-void AssemblerX86Base<TraitsType>::fincstp() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xD9);
-  emitUint8(0xF7);
-}
-
-template <typename TraitsType>
-template <uint32_t Tag>
-void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg,
-                                             const Immediate &imm) {
-  static_assert(Tag < 8, "Tag must be between 0..7");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexB(Ty, reg);
-  if (isByteSizedType(Ty)) {
-    emitComplexI8(Tag, Operand(reg), imm);
-  } else {
-    emitComplex(Ty, Tag, Operand(reg), imm);
-  }
-}
-
-template <typename TraitsType>
-template <uint32_t Tag>
-void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg0,
-                                             GPRRegister reg1) {
-  static_assert(Tag < 8, "Tag must be between 0..7");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, reg0, reg1);
-  if (isByteSizedType(Ty))
-    emitUint8(Tag * 8 + 2);
-  else
-    emitUint8(Tag * 8 + 3);
-  emitRegisterOperand(gprEncoding(reg0), gprEncoding(reg1));
-}
-
-template <typename TraitsType>
-template <uint32_t Tag>
-void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg,
-                                             const Address &address) {
-  static_assert(Tag < 8, "Tag must be between 0..7");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, address, reg);
-  if (isByteSizedType(Ty))
-    emitUint8(Tag * 8 + 2);
-  else
-    emitUint8(Tag * 8 + 3);
-  emitOperand(gprEncoding(reg), address);
-}
-
-template <typename TraitsType>
-template <uint32_t Tag>
-void AssemblerX86Base<TraitsType>::arith_int(Type Ty, const Address &address,
-                                             GPRRegister reg) {
-  static_assert(Tag < 8, "Tag must be between 0..7");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, address, reg);
-  if (isByteSizedType(Ty))
-    emitUint8(Tag * 8 + 0);
-  else
-    emitUint8(Tag * 8 + 1);
-  emitOperand(gprEncoding(reg), address);
-}
-
-template <typename TraitsType>
-template <uint32_t Tag>
-void AssemblerX86Base<TraitsType>::arith_int(Type Ty, const Address &address,
-                                             const Immediate &imm) {
-  static_assert(Tag < 8, "Tag must be between 0..7");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, address, RexRegIrrelevant);
-  if (isByteSizedType(Ty)) {
-    emitComplexI8(Tag, address, imm);
-  } else {
-    emitComplex(Ty, Tag, address, imm);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
-  arith_int<7>(Ty, reg, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg0,
-                                       GPRRegister reg1) {
-  arith_int<7>(Ty, reg0, reg1);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg,
-                                       const Address &address) {
-  arith_int<7>(Ty, reg, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmp(Type Ty, const Address &address,
-                                       GPRRegister reg) {
-  arith_int<7>(Ty, address, reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmp(Type Ty, const Address &address,
-                                       const Immediate &imm) {
-  arith_int<7>(Ty, address, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::test(Type Ty, GPRRegister reg1,
-                                        GPRRegister reg2) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, reg1, reg2);
-  if (isByteSizedType(Ty))
-    emitUint8(0x84);
-  else
-    emitUint8(0x85);
-  emitRegisterOperand(gprEncoding(reg1), gprEncoding(reg2));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::test(Type Ty, const Address &addr,
-                                        GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, addr, reg);
-  if (isByteSizedType(Ty))
-    emitUint8(0x84);
-  else
-    emitUint8(0x85);
-  emitOperand(gprEncoding(reg), addr);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::test(Type Ty, GPRRegister reg,
-                                        const Immediate &immediate) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // For registers that have a byte variant (EAX, EBX, ECX, and EDX) we only
-  // test the byte register to keep the encoding short. This is legal even if
-  // the register had high bits set since this only sets flags registers based
-  // on the "AND" of the two operands, and the immediate had zeros at those
-  // high bits.
-  if (immediate.is_uint8() && reg <= Traits::Last8BitGPR) {
-    // Use zero-extended 8-bit immediate.
-    emitRexB(Ty, reg);
-    if (reg == Traits::Encoded_Reg_Accumulator) {
-      emitUint8(0xA8);
-    } else {
-      emitUint8(0xF6);
-      emitUint8(0xC0 + gprEncoding(reg));
-    }
-    emitUint8(immediate.value() & 0xFF);
-  } else if (reg == Traits::Encoded_Reg_Accumulator) {
-    // Use short form if the destination is EAX.
-    if (Ty == IceType_i16)
-      emitOperandSizeOverride();
-    emitUint8(0xA9);
-    emitImmediate(Ty, immediate);
-  } else {
-    if (Ty == IceType_i16)
-      emitOperandSizeOverride();
-    emitRexB(Ty, reg);
-    emitUint8(0xF7);
-    emitRegisterOperand(0, gprEncoding(reg));
-    emitImmediate(Ty, immediate);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::test(Type Ty, const Address &addr,
-                                        const Immediate &immediate) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // If the immediate is short, we only test the byte addr to keep the encoding
-  // short.
-  if (immediate.is_uint8()) {
-    // Use zero-extended 8-bit immediate.
-    emitRex(Ty, addr, RexRegIrrelevant);
-    emitUint8(0xF6);
-    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
-    emitOperand(0, addr, OffsetFromNextInstruction);
-    emitUint8(immediate.value() & 0xFF);
-  } else {
-    if (Ty == IceType_i16)
-      emitOperandSizeOverride();
-    emitRex(Ty, addr, RexRegIrrelevant);
-    emitUint8(0xF7);
-    const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
-    emitOperand(0, addr, OffsetFromNextInstruction);
-    emitImmediate(Ty, immediate);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
-  arith_int<4>(Ty, dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
-                                       const Address &address) {
-  arith_int<4>(Ty, dst, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
-                                       const Immediate &imm) {
-  arith_int<4>(Ty, dst, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::And(Type Ty, const Address &address,
-                                       GPRRegister reg) {
-  arith_int<4>(Ty, address, reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::And(Type Ty, const Address &address,
-                                       const Immediate &imm) {
-  arith_int<4>(Ty, address, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
-                                      GPRRegister src) {
-  arith_int<1>(Ty, dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
-                                      const Address &address) {
-  arith_int<1>(Ty, dst, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
-                                      const Immediate &imm) {
-  arith_int<1>(Ty, dst, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Or(Type Ty, const Address &address,
-                                      GPRRegister reg) {
-  arith_int<1>(Ty, address, reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Or(Type Ty, const Address &address,
-                                      const Immediate &imm) {
-  arith_int<1>(Ty, address, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
-  arith_int<6>(Ty, dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
-                                       const Address &address) {
-  arith_int<6>(Ty, dst, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
-                                       const Immediate &imm) {
-  arith_int<6>(Ty, dst, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Xor(Type Ty, const Address &address,
-                                       GPRRegister reg) {
-  arith_int<6>(Ty, address, reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Xor(Type Ty, const Address &address,
-                                       const Immediate &imm) {
-  arith_int<6>(Ty, address, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
-  arith_int<0>(Ty, dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister reg,
-                                       const Address &address) {
-  arith_int<0>(Ty, reg, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
-  arith_int<0>(Ty, reg, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::add(Type Ty, const Address &address,
-                                       GPRRegister reg) {
-  arith_int<0>(Ty, address, reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::add(Type Ty, const Address &address,
-                                       const Immediate &imm) {
-  arith_int<0>(Ty, address, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
-  arith_int<2>(Ty, dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister dst,
-                                       const Address &address) {
-  arith_int<2>(Ty, dst, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
-  arith_int<2>(Ty, reg, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::adc(Type Ty, const Address &address,
-                                       GPRRegister reg) {
-  arith_int<2>(Ty, address, reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::adc(Type Ty, const Address &address,
-                                       const Immediate &imm) {
-  arith_int<2>(Ty, address, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
-  arith_int<5>(Ty, dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister reg,
-                                       const Address &address) {
-  arith_int<5>(Ty, reg, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
-  arith_int<5>(Ty, reg, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sub(Type Ty, const Address &address,
-                                       GPRRegister reg) {
-  arith_int<5>(Ty, address, reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sub(Type Ty, const Address &address,
-                                       const Immediate &imm) {
-  arith_int<5>(Ty, address, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
-  arith_int<3>(Ty, dst, src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister dst,
-                                       const Address &address) {
-  arith_int<3>(Ty, dst, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
-  arith_int<3>(Ty, reg, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sbb(Type Ty, const Address &address,
-                                       GPRRegister reg) {
-  arith_int<3>(Ty, address, reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sbb(Type Ty, const Address &address,
-                                       const Immediate &imm) {
-  arith_int<3>(Ty, address, imm);
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::cbw() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitOperandSizeOverride();
-  emitUint8(0x98);
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::cwd() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitOperandSizeOverride();
-  emitUint8(0x99);
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::cdq() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x99);
-}
-
-template <typename TraitsType>
-template <typename T>
-typename std::enable_if<T::Is64Bit, void>::type
-AssemblerX86Base<TraitsType>::cqo() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexB(RexTypeForceRexW, RexRegIrrelevant);
-  emitUint8(0x99);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::div(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexB(Ty, reg);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitRegisterOperand(6, gprEncoding(reg));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::div(Type Ty, const Address &addr) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, addr, RexRegIrrelevant);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitOperand(6, addr);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::idiv(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexB(Ty, reg);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitRegisterOperand(7, gprEncoding(reg));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::idiv(Type Ty, const Address &addr) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, addr, RexRegIrrelevant);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitOperand(7, addr);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
-                                        GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
-         (Traits::Is64Bit && Ty == IceType_i64));
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xAF);
-  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg,
-                                        const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
-         (Traits::Is64Bit && Ty == IceType_i64));
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, address, reg);
-  emitUint8(0x0F);
-  emitUint8(0xAF);
-  emitOperand(gprEncoding(reg), address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg,
-                                        const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32 || Ty == IceType_i64);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, reg, reg);
-  if (imm.is_int8()) {
-    emitUint8(0x6B);
-    emitRegisterOperand(gprEncoding(reg), gprEncoding(reg));
-    emitUint8(imm.value() & 0xFF);
-  } else {
-    emitUint8(0x69);
-    emitRegisterOperand(gprEncoding(reg), gprEncoding(reg));
-    emitImmediate(Ty, imm);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexB(Ty, reg);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitRegisterOperand(5, gprEncoding(reg));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, address, RexRegIrrelevant);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitOperand(5, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
-                                        GPRRegister src, const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, dst, src);
-  if (imm.is_int8()) {
-    emitUint8(0x6B);
-    emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
-    emitUint8(imm.value() & 0xFF);
-  } else {
-    emitUint8(0x69);
-    emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
-    emitImmediate(Ty, imm);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
-                                        const Address &address,
-                                        const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, address, dst);
-  if (imm.is_int8()) {
-    emitUint8(0x6B);
-    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
-    emitOperand(gprEncoding(dst), address, OffsetFromNextInstruction);
-    emitUint8(imm.value() & 0xFF);
-  } else {
-    emitUint8(0x69);
-    const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
-    emitOperand(gprEncoding(dst), address, OffsetFromNextInstruction);
-    emitImmediate(Ty, imm);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mul(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexB(Ty, reg);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitRegisterOperand(4, gprEncoding(reg));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mul(Type Ty, const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, address, RexRegIrrelevant);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitOperand(4, address);
-}
-
-template <typename TraitsType>
-template <typename, typename>
-void AssemblerX86Base<TraitsType>::incl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x40 + reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::incl(const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(IceType_i32, address, RexRegIrrelevant);
-  emitUint8(0xFF);
-  emitOperand(0, address);
-}
-
-template <typename TraitsType>
-template <typename, typename>
-void AssemblerX86Base<TraitsType>::decl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x48 + reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::decl(const Address &address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRex(IceType_i32, address, RexRegIrrelevant);
-  emitUint8(0xFF);
-  emitOperand(1, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::rol(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
-  emitGenericShift(0, Ty, reg, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::rol(Type Ty, GPRRegister operand,
-                                       GPRRegister shifter) {
-  emitGenericShift(0, Ty, Operand(operand), shifter);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::rol(Type Ty, const Address &operand,
-                                       GPRRegister shifter) {
-  emitGenericShift(0, Ty, operand, shifter);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shl(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
-  emitGenericShift(4, Ty, reg, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shl(Type Ty, GPRRegister operand,
-                                       GPRRegister shifter) {
-  emitGenericShift(4, Ty, Operand(operand), shifter);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shl(Type Ty, const Address &operand,
-                                       GPRRegister shifter) {
-  emitGenericShift(4, Ty, operand, shifter);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shr(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
-  emitGenericShift(5, Ty, reg, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shr(Type Ty, GPRRegister operand,
-                                       GPRRegister shifter) {
-  emitGenericShift(5, Ty, Operand(operand), shifter);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shr(Type Ty, const Address &operand,
-                                       GPRRegister shifter) {
-  emitGenericShift(5, Ty, operand, shifter);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sar(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
-  emitGenericShift(7, Ty, reg, imm);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sar(Type Ty, GPRRegister operand,
-                                       GPRRegister shifter) {
-  emitGenericShift(7, Ty, Operand(operand), shifter);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sar(Type Ty, const Address &address,
-                                       GPRRegister shifter) {
-  emitGenericShift(7, Ty, address, shifter);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shld(Type Ty, GPRRegister dst,
-                                        GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xA5);
-  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shld(Type Ty, GPRRegister dst,
-                                        GPRRegister src, const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  assert(imm.is_int8());
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xA4);
-  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
-  emitUint8(imm.value() & 0xFF);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shld(Type Ty, const Address &operand,
-                                        GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, operand, src);
-  emitUint8(0x0F);
-  emitUint8(0xA5);
-  emitOperand(gprEncoding(src), operand);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shrd(Type Ty, GPRRegister dst,
-                                        GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xAD);
-  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shrd(Type Ty, GPRRegister dst,
-                                        GPRRegister src, const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  assert(imm.is_int8());
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xAC);
-  emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
-  emitUint8(imm.value() & 0xFF);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shrd(Type Ty, const Address &dst,
-                                        GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xAD);
-  emitOperand(gprEncoding(src), dst);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::neg(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexB(Ty, reg);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitRegisterOperand(3, gprEncoding(reg));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::neg(Type Ty, const Address &addr) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, addr, RexRegIrrelevant);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xF6);
-  else
-    emitUint8(0xF7);
-  emitOperand(3, addr);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::notl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexB(IceType_i32, reg);
-  emitUint8(0xF7);
-  emitUint8(0xD0 | gprEncoding(reg));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bswap(Type Ty, GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
-  emitRexB(Ty, reg);
-  emitUint8(0x0F);
-  emitUint8(0xC8 | gprEncoding(reg));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bsf(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
-         (Traits::Is64Bit && Ty == IceType_i64));
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xBC);
-  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bsf(Type Ty, GPRRegister dst,
-                                       const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
-         (Traits::Is64Bit && Ty == IceType_i64));
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xBC);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bsr(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
-         (Traits::Is64Bit && Ty == IceType_i64));
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexRB(Ty, dst, src);
-  emitUint8(0x0F);
-  emitUint8(0xBD);
-  emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bsr(Type Ty, GPRRegister dst,
-                                       const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(Ty == IceType_i16 || Ty == IceType_i32 ||
-         (Traits::Is64Bit && Ty == IceType_i64));
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, src, dst);
-  emitUint8(0x0F);
-  emitUint8(0xBD);
-  emitOperand(gprEncoding(dst), src);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bt(GPRRegister base, GPRRegister offset) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexRB(IceType_i32, offset, base);
-  emitUint8(0x0F);
-  emitUint8(0xA3);
-  emitRegisterOperand(gprEncoding(offset), gprEncoding(base));
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::ret() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xC3);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::ret(const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xC2);
-  assert(imm.is_uint16());
-  emitUint8(imm.value() & 0xFF);
-  emitUint8((imm.value() >> 8) & 0xFF);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::nop(int size) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // There are nops up to size 15, but for now just provide up to size 8.
-  assert(0 < size && size <= MAX_NOP_SIZE);
-  switch (size) {
-  case 1:
-    emitUint8(0x90);
-    break;
-  case 2:
-    emitUint8(0x66);
-    emitUint8(0x90);
-    break;
-  case 3:
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x00);
-    break;
-  case 4:
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x40);
-    emitUint8(0x00);
-    break;
-  case 5:
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x44);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    break;
-  case 6:
-    emitUint8(0x66);
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x44);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    break;
-  case 7:
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x80);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    break;
-  case 8:
-    emitUint8(0x0F);
-    emitUint8(0x1F);
-    emitUint8(0x84);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    emitUint8(0x00);
-    break;
-  default:
-    llvm_unreachable("Unimplemented");
-  }
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::int3() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xCC);
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::hlt() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF4);
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::ud2() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x0B);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::j(BrCond condition, Label *label,
-                                     bool near) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (label->isBound()) {
-    static const int kShortSize = 2;
-    static const int kLongSize = 6;
-    intptr_t offset = label->getPosition() - Buffer.size();
-    assert(offset <= 0);
-    if (Utils::IsInt(8, offset - kShortSize)) {
-      emitUint8(0x70 + condition);
-      emitUint8((offset - kShortSize) & 0xFF);
-    } else {
-      emitUint8(0x0F);
-      emitUint8(0x80 + condition);
-      emitInt32(offset - kLongSize);
-    }
-  } else if (near) {
-    emitUint8(0x70 + condition);
-    emitNearLabelLink(label);
-  } else {
-    emitUint8(0x0F);
-    emitUint8(0x80 + condition);
-    emitLabelLink(label);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::j(BrCond condition,
-                                     const ConstantRelocatable *label) {
-  llvm::report_fatal_error("Untested - please verify and then reenable.");
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x80 + condition);
-  auto *Fixup = this->createFixup(Traits::FK_PcRel, label);
-  Fixup->set_addend(-4);
-  emitFixup(Fixup);
-  emitInt32(0);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::jmp(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitRexB(RexTypeIrrelevant, reg);
-  emitUint8(0xFF);
-  emitRegisterOperand(4, gprEncoding(reg));
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::jmp(Label *label, bool near) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (label->isBound()) {
-    static const int kShortSize = 2;
-    static const int kLongSize = 5;
-    intptr_t offset = label->getPosition() - Buffer.size();
-    assert(offset <= 0);
-    if (Utils::IsInt(8, offset - kShortSize)) {
-      emitUint8(0xEB);
-      emitUint8((offset - kShortSize) & 0xFF);
-    } else {
-      emitUint8(0xE9);
-      emitInt32(offset - kLongSize);
-    }
-  } else if (near) {
-    emitUint8(0xEB);
-    emitNearLabelLink(label);
-  } else {
-    emitUint8(0xE9);
-    emitLabelLink(label);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::jmp(const ConstantRelocatable *label) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xE9);
-  auto *Fixup = this->createFixup(Traits::FK_PcRel, label);
-  Fixup->set_addend(-4);
-  emitFixup(Fixup);
-  emitInt32(0);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::jmp(const Immediate &abs_address) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xE9);
-  AssemblerFixup *Fixup =
-      createFixup(Traits::FK_PcRel, AssemblerFixup::NullSymbol);
-  Fixup->set_addend(abs_address.value() - 4);
-  emitFixup(Fixup);
-  emitInt32(0);
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::mfence() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0xAE);
-  emitUint8(0xF0);
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::lock() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xF0);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmpxchg(Type Ty, const Address &address,
-                                           GPRRegister reg, bool Locked) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (Locked)
-    emitUint8(0xF0);
-  emitRex(Ty, address, reg);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xB0);
-  else
-    emitUint8(0xB1);
-  emitOperand(gprEncoding(reg), address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmpxchg8b(const Address &address,
-                                             bool Locked) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Locked)
-    emitUint8(0xF0);
-  emitRex(IceType_i32, address, RexRegIrrelevant);
-  emitUint8(0x0F);
-  emitUint8(0xC7);
-  emitOperand(1, address);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::xadd(Type Ty, const Address &addr,
-                                        GPRRegister reg, bool Locked) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  if (Locked)
-    emitUint8(0xF0);
-  emitRex(Ty, addr, reg);
-  emitUint8(0x0F);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0xC0);
-  else
-    emitUint8(0xC1);
-  emitOperand(gprEncoding(reg), addr);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::xchg(Type Ty, GPRRegister reg0,
-                                        GPRRegister reg1) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  // Use short form if either register is EAX.
-  if (reg0 == Traits::Encoded_Reg_Accumulator) {
-    emitRexB(Ty, reg1);
-    emitUint8(0x90 + gprEncoding(reg1));
-  } else if (reg1 == Traits::Encoded_Reg_Accumulator) {
-    emitRexB(Ty, reg0);
-    emitUint8(0x90 + gprEncoding(reg0));
-  } else {
-    emitRexRB(Ty, reg0, reg1);
-    if (isByteSizedArithType(Ty))
-      emitUint8(0x86);
-    else
-      emitUint8(0x87);
-    emitRegisterOperand(gprEncoding(reg0), gprEncoding(reg1));
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::xchg(Type Ty, const Address &addr,
-                                        GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRex(Ty, addr, reg);
-  if (isByteSizedArithType(Ty))
-    emitUint8(0x86);
-  else
-    emitUint8(0x87);
-  emitOperand(gprEncoding(reg), addr);
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::iaca_start() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x0F);
-  emitUint8(0x0B);
-
-  // mov $111, ebx
-  constexpr GPRRegister dst = Traits::GPRRegister::Encoded_Reg_ebx;
-  constexpr Type Ty = IceType_i32;
-  emitRexB(Ty, dst);
-  emitUint8(0xB8 + gprEncoding(dst));
-  emitImmediate(Ty, Immediate(111));
-
-  emitUint8(0x64);
-  emitUint8(0x67);
-  emitUint8(0x90);
-}
-
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::iaca_end() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-
-  // mov $222, ebx
-  constexpr GPRRegister dst = Traits::GPRRegister::Encoded_Reg_ebx;
-  constexpr Type Ty = IceType_i32;
-  emitRexB(Ty, dst);
-  emitUint8(0xB8 + gprEncoding(dst));
-  emitImmediate(Ty, Immediate(222));
-
-  emitUint8(0x64);
-  emitUint8(0x67);
-  emitUint8(0x90);
-
-  emitUint8(0x0F);
-  emitUint8(0x0B);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitSegmentOverride(uint8_t prefix) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(prefix);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::align(intptr_t alignment, intptr_t offset) {
-  assert(llvm::isPowerOf2_32(alignment));
-  intptr_t pos = offset + Buffer.getPosition();
-  intptr_t mod = pos & (alignment - 1);
-  if (mod == 0) {
-    return;
-  }
-  intptr_t bytes_needed = alignment - mod;
-  while (bytes_needed > MAX_NOP_SIZE) {
-    nop(MAX_NOP_SIZE);
-    bytes_needed -= MAX_NOP_SIZE;
-  }
-  if (bytes_needed) {
-    nop(bytes_needed);
-  }
-  assert(((offset + Buffer.getPosition()) & (alignment - 1)) == 0);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bind(Label *L) {
-  const intptr_t Bound = Buffer.size();
-  assert(!L->isBound()); // Labels can only be bound once.
-  while (L->isLinked()) {
-    const intptr_t Position = L->getLinkPosition();
-    const intptr_t Next = Buffer.load<int32_t>(Position);
-    const intptr_t Offset = Bound - (Position + 4);
-    Buffer.store<int32_t>(Position, Offset);
-    L->Position = Next;
-  }
-  while (L->hasNear()) {
-    intptr_t Position = L->getNearPosition();
-    const intptr_t Offset = Bound - (Position + 1);
-    assert(Utils::IsInt(8, Offset));
-    Buffer.store<int8_t>(Position, Offset);
-  }
-  L->bindTo(Bound);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitOperand(int rm, const Operand &operand,
-                                               RelocOffsetT Addend) {
-  assert(rm >= 0 && rm < 8);
-  const intptr_t length = operand.length_;
-  assert(length > 0);
-  intptr_t displacement_start = 1;
-  // Emit the ModRM byte updated with the given RM value.
-  assert((operand.encoding_[0] & 0x38) == 0);
-  emitUint8(operand.encoding_[0] + (rm << 3));
-  // Whenever the addressing mode is not register indirect, using esp == 0x4
-  // as the register operation indicates an SIB byte follows.
-  if (((operand.encoding_[0] & 0xc0) != 0xc0) &&
-      ((operand.encoding_[0] & 0x07) == 0x04)) {
-    emitUint8(operand.encoding_[1]);
-    displacement_start = 2;
-  }
-
-  AssemblerFixup *Fixup = operand.fixup();
-  if (Fixup == nullptr) {
-    for (intptr_t i = displacement_start; i < length; i++) {
-      emitUint8(operand.encoding_[i]);
-    }
-    return;
-  }
-
-  // Emit the fixup, and a dummy 4-byte immediate. Note that the Disp32 in
-  // operand.encoding_[i, i+1, i+2, i+3] is part of the constant relocatable
-  // used to create the fixup, so there's no need to add it to the addend.
-  assert(length - displacement_start == 4);
-  if (fixupIsPCRel(Fixup->kind())) {
-    Fixup->set_addend(Fixup->get_addend() - Addend);
-  } else {
-    Fixup->set_addend(Fixup->get_addend());
-  }
-  emitFixup(Fixup);
-  emitInt32(0);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitImmediate(Type Ty,
-                                                 const Immediate &imm) {
-  auto *const Fixup = imm.fixup();
-  if (Ty == IceType_i16) {
-    assert(Fixup == nullptr);
-    emitInt16(imm.value());
-    return;
-  }
-
-  if (Fixup == nullptr) {
-    emitInt32(imm.value());
-    return;
-  }
-
-  Fixup->set_addend(Fixup->get_addend() + imm.value());
-  emitFixup(Fixup);
-  emitInt32(0);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitComplexI8(int rm, const Operand &operand,
-                                                 const Immediate &immediate) {
-  assert(rm >= 0 && rm < 8);
-  assert(immediate.is_int8());
-  if (operand.IsRegister(Traits::Encoded_Reg_Accumulator)) {
-    // Use short form if the destination is al.
-    emitUint8(0x04 + (rm << 3));
-    emitUint8(immediate.value() & 0xFF);
-  } else {
-    // Use sign-extended 8-bit immediate.
-    emitUint8(0x80);
-    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
-    emitOperand(rm, operand, OffsetFromNextInstruction);
-    emitUint8(immediate.value() & 0xFF);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitComplex(Type Ty, int rm,
-                                               const Operand &operand,
-                                               const Immediate &immediate) {
-  assert(rm >= 0 && rm < 8);
-  if (immediate.is_int8()) {
-    // Use sign-extended 8-bit immediate.
-    emitUint8(0x83);
-    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
-    emitOperand(rm, operand, OffsetFromNextInstruction);
-    emitUint8(immediate.value() & 0xFF);
-  } else if (operand.IsRegister(Traits::Encoded_Reg_Accumulator)) {
-    // Use short form if the destination is eax.
-    emitUint8(0x05 + (rm << 3));
-    emitImmediate(Ty, immediate);
-  } else {
-    emitUint8(0x81);
-    const uint8_t OffsetFromNextInstruction = Ty == IceType_i16 ? 2 : 4;
-    emitOperand(rm, operand, OffsetFromNextInstruction);
-    emitImmediate(Ty, immediate);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitLabel(Label *label,
-                                             intptr_t instruction_size) {
-  if (label->isBound()) {
-    intptr_t offset = label->getPosition() - Buffer.size();
-    assert(offset <= 0);
-    emitInt32(offset - instruction_size);
-  } else {
-    emitLabelLink(label);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitLabelLink(Label *Label) {
-  assert(!Label->isBound());
-  intptr_t Position = Buffer.size();
-  emitInt32(Label->Position);
-  Label->linkTo(*this, Position);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitNearLabelLink(Label *Label) {
-  assert(!Label->isBound());
-  intptr_t Position = Buffer.size();
-  emitUint8(0);
-  Label->nearLinkTo(*this, Position);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitGenericShift(int rm, Type Ty,
-                                                    GPRRegister reg,
-                                                    const Immediate &imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // We don't assert that imm fits into 8 bits; instead, it gets masked below.
-  // Note that we don't mask it further (e.g. to 5 bits) because we want the
-  // same processor behavior regardless of whether it's an immediate (masked to
-  // 8 bits) or in register cl (essentially ecx masked to 8 bits).
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexB(Ty, reg);
-  if (imm.value() == 1) {
-    emitUint8(isByteSizedArithType(Ty) ? 0xD0 : 0xD1);
-    emitOperand(rm, Operand(reg));
-  } else {
-    emitUint8(isByteSizedArithType(Ty) ? 0xC0 : 0xC1);
-    static constexpr RelocOffsetT OffsetFromNextInstruction = 1;
-    emitOperand(rm, Operand(reg), OffsetFromNextInstruction);
-    emitUint8(imm.value() & 0xFF);
-  }
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitGenericShift(int rm, Type Ty,
-                                                    const Operand &operand,
-                                                    GPRRegister shifter) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  assert(shifter == Traits::Encoded_Reg_Counter);
-  (void)shifter;
-  if (Ty == IceType_i16)
-    emitOperandSizeOverride();
-  emitRexB(Ty, operand.rm());
-  emitUint8(isByteSizedArithType(Ty) ? 0xD2 : 0xD3);
-  emitOperand(rm, operand);
-}
-
-} // namespace X8632
-} // end of namespace Ice
diff --git a/third_party/subzero/src/IceAssemblerX8664BaseImpl.h b/third_party/subzero/src/IceAssemblerX8664.cpp
similarity index 60%
rename from third_party/subzero/src/IceAssemblerX8664BaseImpl.h
rename to third_party/subzero/src/IceAssemblerX8664.cpp
index 1dcddec..f493a45 100644
--- a/third_party/subzero/src/IceAssemblerX8664BaseImpl.h
+++ b/third_party/subzero/src/IceAssemblerX8664.cpp
@@ -1,4 +1,4 @@
-//===- subzero/src/IceAssemblerX86BaseImpl.h - base x86 assembler -*- C++ -*-=//
+//===- subzero/src/IceAssemblerX8664Impl.h - base x86 assembler -*- C++ -*-=//
 // Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.
@@ -15,12 +15,11 @@
 //===----------------------------------------------------------------------===//
 //
 /// \file
-/// \brief Implements the AssemblerX86Base template class, which is the base
-/// Assembler class for X86 assemblers.
+/// \brief Implements the AssemblerX8664 class.
 //
 //===----------------------------------------------------------------------===//
 
-#include "IceAssemblerX8664Base.h"
+#include "IceAssemblerX8664.h"
 
 #include "IceCfg.h"
 #include "IceCfgNode.h"
@@ -29,8 +28,7 @@
 namespace Ice {
 namespace X8664 {
 
-template <typename TraitsType>
-AssemblerX86Base<TraitsType>::~AssemblerX86Base() {
+AssemblerX8664::~AssemblerX8664() {
   if (BuildDefs::asserts()) {
     for (const Label *Label : CfgNodeLabels) {
       Label->finalCheck();
@@ -41,8 +39,7 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::alignFunction() {
+void AssemblerX8664::alignFunction() {
   const SizeT Align = 1 << getBundleAlignLog2Bytes();
   SizeT BytesNeeded = Utils::OffsetToAlignment(Buffer.getPosition(), Align);
   constexpr SizeT HltSize = 1;
@@ -52,10 +49,8 @@
   }
 }
 
-template <typename TraitsType>
-typename AssemblerX86Base<TraitsType>::Label *
-AssemblerX86Base<TraitsType>::getOrCreateLabel(SizeT Number,
-                                               LabelVector &Labels) {
+typename AssemblerX8664::Label *
+AssemblerX8664::getOrCreateLabel(SizeT Number, LabelVector &Labels) {
   Label *L = nullptr;
   if (Number == Labels.size()) {
     L = new (this->allocate<Label>()) Label();
@@ -73,56 +68,48 @@
   return L;
 }
 
-template <typename TraitsType>
-Ice::Label *AssemblerX86Base<TraitsType>::getCfgNodeLabel(SizeT NodeNumber) {
+Ice::Label *AssemblerX8664::getCfgNodeLabel(SizeT NodeNumber) {
   assert(NodeNumber < CfgNodeLabels.size());
   return CfgNodeLabels[NodeNumber];
 }
 
-template <typename TraitsType>
-typename AssemblerX86Base<TraitsType>::Label *
-AssemblerX86Base<TraitsType>::getOrCreateCfgNodeLabel(SizeT NodeNumber) {
+typename AssemblerX8664::Label *
+AssemblerX8664::getOrCreateCfgNodeLabel(SizeT NodeNumber) {
   return getOrCreateLabel(NodeNumber, CfgNodeLabels);
 }
 
-template <typename TraitsType>
-typename AssemblerX86Base<TraitsType>::Label *
-AssemblerX86Base<TraitsType>::getOrCreateLocalLabel(SizeT Number) {
+typename AssemblerX8664::Label *
+AssemblerX8664::getOrCreateLocalLabel(SizeT Number) {
   return getOrCreateLabel(Number, LocalLabels);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bindCfgNodeLabel(const CfgNode *Node) {
+void AssemblerX8664::bindCfgNodeLabel(const CfgNode *Node) {
   assert(!getPreliminary());
   Label *L = getOrCreateCfgNodeLabel(Node->getIndex());
   this->bind(L);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bindLocalLabel(SizeT Number) {
+void AssemblerX8664::bindLocalLabel(SizeT Number) {
   Label *L = getOrCreateLocalLabel(Number);
   if (!getPreliminary())
     this->bind(L);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::call(GPRRegister reg) {
+void AssemblerX8664::call(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(RexTypeIrrelevant, reg);
   emitUint8(0xFF);
   emitRegisterOperand(2, gprEncoding(reg));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::call(const Address &address) {
+void AssemblerX8664::call(const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
   emitUint8(0xFF);
   emitOperand(2, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::call(const ConstantRelocatable *label) {
+void AssemblerX8664::call(const ConstantRelocatable *label) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   intptr_t call_start = Buffer.getPosition();
   emitUint8(0xE8);
@@ -134,8 +121,7 @@
   (void)call_start;
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::call(const Immediate &abs_address) {
+void AssemblerX8664::call(const Immediate &abs_address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   intptr_t call_start = Buffer.getPosition();
   emitUint8(0xE8);
@@ -147,22 +133,19 @@
   (void)call_start;
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pushl(GPRRegister reg) {
+void AssemblerX8664::pushl(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(RexTypeIrrelevant, reg);
   emitUint8(0x50 + gprEncoding(reg));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pushl(const Immediate &Imm) {
+void AssemblerX8664::pushl(const Immediate &Imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x68);
   emitInt32(Imm.value());
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pushl(const ConstantRelocatable *Label) {
+void AssemblerX8664::pushl(const ConstantRelocatable *Label) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x68);
   emitFixup(this->createFixup(Traits::FK_Abs, Label));
@@ -173,8 +156,7 @@
   emitInt32(0);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::popl(GPRRegister reg) {
+void AssemblerX8664::popl(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // Any type that would not force a REX prefix to be emitted can be provided
   // here.
@@ -182,30 +164,14 @@
   emitUint8(0x58 + gprEncoding(reg));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::popl(const Address &address) {
+void AssemblerX8664::popl(const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
   emitUint8(0x8F);
   emitOperand(0, address);
 }
 
-template <typename TraitsType>
-template <typename, typename>
-void AssemblerX86Base<TraitsType>::pushal() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x60);
-}
-
-template <typename TraitsType>
-template <typename, typename>
-void AssemblerX86Base<TraitsType>::popal() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x61);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::setcc(BrCond condition, ByteRegister dst) {
+void AssemblerX8664::setcc(BrCond condition, ByteRegister dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(IceType_i8, dst);
   emitUint8(0x0F);
@@ -213,9 +179,7 @@
   emitUint8(0xC0 + gprEncoding(dst));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::setcc(BrCond condition,
-                                         const Address &address) {
+void AssemblerX8664::setcc(BrCond condition, const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(RexTypeIrrelevant, address, RexRegIrrelevant);
   emitUint8(0x0F);
@@ -223,9 +187,7 @@
   emitOperand(0, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
-                                       const Immediate &imm) {
+void AssemblerX8664::mov(Type Ty, GPRRegister dst, const Immediate &imm) {
   assert(Ty != IceType_i64 && "i64 not supported yet.");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -242,9 +204,7 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
+void AssemblerX8664::mov(Type Ty, GPRRegister dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -257,9 +217,7 @@
   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mov(Type Ty, GPRRegister dst,
-                                       const Address &src) {
+void AssemblerX8664::mov(Type Ty, GPRRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -272,9 +230,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mov(Type Ty, const Address &dst,
-                                       GPRRegister src) {
+void AssemblerX8664::mov(Type Ty, const Address &dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -287,9 +243,7 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mov(Type Ty, const Address &dst,
-                                       const Immediate &imm) {
+void AssemblerX8664::mov(Type Ty, const Address &dst, const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -307,10 +261,7 @@
   }
 }
 
-template <typename TraitsType>
-template <typename T>
-typename std::enable_if<T::Is64Bit, void>::type
-AssemblerX86Base<TraitsType>::movabs(const GPRRegister Dst, uint64_t Imm64) {
+void AssemblerX8664::movabs(const GPRRegister Dst, uint64_t Imm64) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   const bool NeedsRexW = (Imm64 & ~0xFFFFFFFFull) != 0;
   const Type RexType = NeedsRexW ? RexTypeForceRexW : RexTypeIrrelevant;
@@ -324,9 +275,7 @@
     emitInt32((Imm64 >> 32) & 0xFFFFFFFF);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movzx(Type SrcTy, GPRRegister dst,
-                                         GPRRegister src) {
+void AssemblerX8664::movzx(Type SrcTy, GPRRegister dst, GPRRegister src) {
   if (Traits::Is64Bit && SrcTy == IceType_i32) {
     // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
     // operand to 64-bit.
@@ -343,9 +292,7 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movzx(Type SrcTy, GPRRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::movzx(Type SrcTy, GPRRegister dst, const Address &src) {
   if (Traits::Is64Bit && SrcTy == IceType_i32) {
     // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
     // operand to 64-bit.
@@ -362,9 +309,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movsx(Type SrcTy, GPRRegister dst,
-                                         GPRRegister src) {
+void AssemblerX8664::movsx(Type SrcTy, GPRRegister dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   bool ByteSized = isByteSizedType(SrcTy);
   emitRexRB(RexTypeForceRexW, dst, SrcTy, src);
@@ -378,9 +323,7 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movsx(Type SrcTy, GPRRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::movsx(Type SrcTy, GPRRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   bool ByteSized = isByteSizedType(SrcTy);
   emitRex(SrcTy, src, RexTypeForceRexW, dst);
@@ -394,9 +337,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::lea(Type Ty, GPRRegister dst,
-                                       const Address &src) {
+void AssemblerX8664::lea(Type Ty, GPRRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -407,9 +348,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmov(Type Ty, BrCond cond, GPRRegister dst,
-                                        GPRRegister src) {
+void AssemblerX8664::cmov(Type Ty, BrCond cond, GPRRegister dst,
+                          GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -421,9 +361,8 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmov(Type Ty, BrCond cond, GPRRegister dst,
-                                        const Address &src) {
+void AssemblerX8664::cmov(Type Ty, BrCond cond, GPRRegister dst,
+                          const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -435,15 +374,13 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::rep_movsb() {
+void AssemblerX8664::rep_movsb() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF3);
   emitUint8(0xA4);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::movss(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -452,9 +389,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movss(Type Ty, const Address &dst,
-                                         XmmRegister src) {
+void AssemblerX8664::movss(Type Ty, const Address &dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRex(RexTypeIrrelevant, dst, src);
@@ -463,9 +398,7 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::movss(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, src, dst);
@@ -474,9 +407,7 @@
   emitXmmRegisterOperand(src, dst);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movd(Type SrcTy, XmmRegister dst,
-                                        GPRRegister src) {
+void AssemblerX8664::movd(Type SrcTy, XmmRegister dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(SrcTy, dst, src);
@@ -485,9 +416,7 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movd(Type SrcTy, XmmRegister dst,
-                                        const Address &src) {
+void AssemblerX8664::movd(Type SrcTy, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(SrcTy, src, dst);
@@ -496,9 +425,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movd(Type DestTy, GPRRegister dst,
-                                        XmmRegister src) {
+void AssemblerX8664::movd(Type DestTy, GPRRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(DestTy, src, dst);
@@ -507,9 +434,7 @@
   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movd(Type DestTy, const Address &dst,
-                                        XmmRegister src) {
+void AssemblerX8664::movd(Type DestTy, const Address &dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(DestTy, dst, src);
@@ -518,8 +443,7 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movq(XmmRegister dst, XmmRegister src) {
+void AssemblerX8664::movq(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF3);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -528,8 +452,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movq(const Address &dst, XmmRegister src) {
+void AssemblerX8664::movq(const Address &dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, dst, src);
@@ -538,8 +461,7 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movq(XmmRegister dst, const Address &src) {
+void AssemblerX8664::movq(XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF3);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -548,9 +470,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::addss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::addss(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -559,9 +479,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::addss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::addss(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -570,9 +488,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::subss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::subss(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -581,9 +497,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::subss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::subss(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -592,9 +506,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mulss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::mulss(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -603,9 +515,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mulss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::mulss(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -614,9 +524,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::divss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::divss(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -625,9 +533,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::divss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::divss(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -636,34 +542,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fld(Type Ty,
-                                       const typename T::Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
-  emitOperand(0, src);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fstp(Type Ty,
-                                        const typename T::Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(isFloat32Asserting32Or64(Ty) ? 0xD9 : 0xDD);
-  emitOperand(3, dst);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fstp(typename T::X87STRegister st) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDD);
-  emitUint8(0xD8 + st);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movaps(XmmRegister dst, XmmRegister src) {
+void AssemblerX8664::movaps(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -671,8 +550,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movups(XmmRegister dst, XmmRegister src) {
+void AssemblerX8664::movups(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -680,8 +558,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movups(XmmRegister dst, const Address &src) {
+void AssemblerX8664::movups(XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(RexTypeIrrelevant, src, dst);
   emitUint8(0x0F);
@@ -689,8 +566,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movups(const Address &dst, XmmRegister src) {
+void AssemblerX8664::movups(const Address &dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -698,9 +574,7 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::padd(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
+void AssemblerX8664::padd(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -715,9 +589,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::padd(Type Ty, XmmRegister dst,
-                                        const Address &src) {
+void AssemblerX8664::padd(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -732,9 +604,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::padds(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -749,9 +619,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::padds(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -766,9 +634,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
+void AssemblerX8664::paddus(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -783,9 +649,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
-                                          const Address &src) {
+void AssemblerX8664::paddus(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -800,9 +664,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pand(Type /* Ty */, XmmRegister dst,
-                                        XmmRegister src) {
+void AssemblerX8664::pand(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -811,9 +673,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pand(Type /* Ty */, XmmRegister dst,
-                                        const Address &src) {
+void AssemblerX8664::pand(Type /* Ty */, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -822,9 +682,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pandn(Type /* Ty */, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::pandn(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -833,9 +691,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pandn(Type /* Ty */, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::pandn(Type /* Ty */, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -844,9 +700,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmull(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::pmull(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -861,9 +715,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmull(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::pmull(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -878,9 +730,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmulhw(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
+void AssemblerX8664::pmulhw(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -891,9 +741,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmulhw(Type Ty, XmmRegister dst,
-                                          const Address &src) {
+void AssemblerX8664::pmulhw(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -904,9 +752,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmulhuw(Type Ty, XmmRegister dst,
-                                           XmmRegister src) {
+void AssemblerX8664::pmulhuw(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -917,9 +763,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmulhuw(Type Ty, XmmRegister dst,
-                                           const Address &src) {
+void AssemblerX8664::pmulhuw(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -930,9 +774,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmaddwd(Type Ty, XmmRegister dst,
-                                           XmmRegister src) {
+void AssemblerX8664::pmaddwd(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -943,9 +785,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmaddwd(Type Ty, XmmRegister dst,
-                                           const Address &src) {
+void AssemblerX8664::pmaddwd(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -956,9 +796,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmuludq(Type /* Ty */, XmmRegister dst,
-                                           XmmRegister src) {
+void AssemblerX8664::pmuludq(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -967,9 +805,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmuludq(Type /* Ty */, XmmRegister dst,
-                                           const Address &src) {
+void AssemblerX8664::pmuludq(Type /* Ty */, XmmRegister dst,
+                             const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -978,9 +815,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::por(Type /* Ty */, XmmRegister dst,
-                                       XmmRegister src) {
+void AssemblerX8664::por(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -989,9 +824,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::por(Type /* Ty */, XmmRegister dst,
-                                       const Address &src) {
+void AssemblerX8664::por(Type /* Ty */, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1000,9 +833,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psub(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
+void AssemblerX8664::psub(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1017,9 +848,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psub(Type Ty, XmmRegister dst,
-                                        const Address &src) {
+void AssemblerX8664::psub(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1034,9 +863,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::psubs(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1051,9 +878,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::psubs(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1067,9 +892,8 @@
   }
   emitOperand(gprEncoding(dst), src);
 }
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
+
+void AssemblerX8664::psubus(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1084,9 +908,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
-                                          const Address &src) {
+void AssemblerX8664::psubus(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1101,9 +923,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pxor(Type /* Ty */, XmmRegister dst,
-                                        XmmRegister src) {
+void AssemblerX8664::pxor(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1112,9 +932,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pxor(Type /* Ty */, XmmRegister dst,
-                                        const Address &src) {
+void AssemblerX8664::pxor(Type /* Ty */, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1123,9 +941,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
+void AssemblerX8664::psll(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1139,9 +955,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
-                                        const Address &src) {
+void AssemblerX8664::psll(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1155,9 +969,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psll(Type Ty, XmmRegister dst,
-                                        const Immediate &imm) {
+void AssemblerX8664::psll(Type Ty, XmmRegister dst, const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_int8());
   emitUint8(0x66);
@@ -1173,9 +985,7 @@
   emitUint8(imm.value() & 0xFF);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
+void AssemblerX8664::psra(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1189,9 +999,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
-                                        const Address &src) {
+void AssemblerX8664::psra(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1205,9 +1013,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psra(Type Ty, XmmRegister dst,
-                                        const Immediate &imm) {
+void AssemblerX8664::psra(Type Ty, XmmRegister dst, const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_int8());
   emitUint8(0x66);
@@ -1223,9 +1029,7 @@
   emitUint8(imm.value() & 0xFF);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
+void AssemblerX8664::psrl(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1241,9 +1045,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
-                                        const Address &src) {
+void AssemblerX8664::psrl(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1259,9 +1061,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::psrl(Type Ty, XmmRegister dst,
-                                        const Immediate &imm) {
+void AssemblerX8664::psrl(Type Ty, XmmRegister dst, const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_int8());
   emitUint8(0x66);
@@ -1282,9 +1082,8 @@
 // {add,sub,mul,div}ps are given a Ty parameter for consistency with
 // {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows addpd, etc.,
 // we can use the Ty parameter to decide on adding a 0x66 prefix.
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::addps(Type /* Ty */, XmmRegister dst,
-                                         XmmRegister src) {
+
+void AssemblerX8664::addps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1292,9 +1091,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::addps(Type /* Ty */, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::addps(Type /* Ty */, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(RexTypeIrrelevant, src, dst);
   emitUint8(0x0F);
@@ -1302,9 +1099,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::subps(Type /* Ty */, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::subps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1312,9 +1107,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::subps(Type /* Ty */, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::subps(Type /* Ty */, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(RexTypeIrrelevant, src, dst);
   emitUint8(0x0F);
@@ -1322,9 +1115,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::divps(Type /* Ty */, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::divps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1332,9 +1123,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::divps(Type /* Ty */, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::divps(Type /* Ty */, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(RexTypeIrrelevant, src, dst);
   emitUint8(0x0F);
@@ -1342,9 +1131,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mulps(Type /* Ty */, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::mulps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1352,9 +1139,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mulps(Type /* Ty */, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::mulps(Type /* Ty */, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(RexTypeIrrelevant, src, dst);
   emitUint8(0x0F);
@@ -1362,9 +1147,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::minps(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::minps(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1374,9 +1157,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::minps(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::minps(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1386,9 +1167,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::minss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::minss(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1397,9 +1176,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::minss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::minss(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1408,9 +1185,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::maxps(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::maxps(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1420,9 +1195,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::maxps(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::maxps(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1432,9 +1205,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::maxss(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::maxss(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1443,9 +1214,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::maxss(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::maxss(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1454,9 +1223,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::andnps(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
+void AssemblerX8664::andnps(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1466,9 +1233,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::andnps(Type Ty, XmmRegister dst,
-                                          const Address &src) {
+void AssemblerX8664::andnps(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1478,9 +1243,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::andps(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::andps(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1490,9 +1253,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::andps(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::andps(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1502,9 +1263,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::orps(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
+void AssemblerX8664::orps(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1514,9 +1273,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::orps(Type Ty, XmmRegister dst,
-                                        const Address &src) {
+void AssemblerX8664::orps(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -1526,9 +1283,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::blendvps(Type /* Ty */, XmmRegister dst,
-                                            XmmRegister src) {
+void AssemblerX8664::blendvps(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1538,9 +1293,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::blendvps(Type /* Ty */, XmmRegister dst,
-                                            const Address &src) {
+void AssemblerX8664::blendvps(Type /* Ty */, XmmRegister dst,
+                              const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1550,9 +1304,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pblendvb(Type /* Ty */, XmmRegister dst,
-                                            XmmRegister src) {
+void AssemblerX8664::pblendvb(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1562,9 +1314,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pblendvb(Type /* Ty */, XmmRegister dst,
-                                            const Address &src) {
+void AssemblerX8664::pblendvb(Type /* Ty */, XmmRegister dst,
+                              const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1574,10 +1325,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmpps(Type Ty, XmmRegister dst,
-                                         XmmRegister src,
-                                         CmppsCond CmpCondition) {
+void AssemblerX8664::cmpps(Type Ty, XmmRegister dst, XmmRegister src,
+                           CmppsCond CmpCondition) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_f64)
     emitUint8(0x66);
@@ -1588,10 +1337,8 @@
   emitUint8(CmpCondition);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmpps(Type Ty, XmmRegister dst,
-                                         const Address &src,
-                                         CmppsCond CmpCondition) {
+void AssemblerX8664::cmpps(Type Ty, XmmRegister dst, const Address &src,
+                           CmppsCond CmpCondition) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_f64)
     emitUint8(0x66);
@@ -1603,8 +1350,7 @@
   emitUint8(CmpCondition);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrtps(XmmRegister dst) {
+void AssemblerX8664::sqrtps(XmmRegister dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, dst);
   emitUint8(0x0F);
@@ -1612,8 +1358,7 @@
   emitXmmRegisterOperand(dst, dst);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::rsqrtps(XmmRegister dst) {
+void AssemblerX8664::rsqrtps(XmmRegister dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, dst);
   emitUint8(0x0F);
@@ -1621,8 +1366,7 @@
   emitXmmRegisterOperand(dst, dst);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::reciprocalps(XmmRegister dst) {
+void AssemblerX8664::reciprocalps(XmmRegister dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, dst);
   emitUint8(0x0F);
@@ -1630,8 +1374,7 @@
   emitXmmRegisterOperand(dst, dst);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movhlps(XmmRegister dst, XmmRegister src) {
+void AssemblerX8664::movhlps(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1639,8 +1382,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movlhps(XmmRegister dst, XmmRegister src) {
+void AssemblerX8664::movlhps(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1648,8 +1390,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::unpcklps(XmmRegister dst, XmmRegister src) {
+void AssemblerX8664::unpcklps(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1657,8 +1398,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::unpckhps(XmmRegister dst, XmmRegister src) {
+void AssemblerX8664::unpckhps(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1666,8 +1406,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::unpcklpd(XmmRegister dst, XmmRegister src) {
+void AssemblerX8664::unpcklpd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1676,8 +1415,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::unpckhpd(XmmRegister dst, XmmRegister src) {
+void AssemblerX8664::unpckhpd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1686,9 +1424,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::set1ps(XmmRegister dst, GPRRegister tmp1,
-                                          const Immediate &imm) {
+void AssemblerX8664::set1ps(XmmRegister dst, GPRRegister tmp1,
+                            const Immediate &imm) {
   // Load 32-bit immediate value into tmp1.
   mov(IceType_i32, tmp1, imm);
   // Move value from tmp1 into dst.
@@ -1697,9 +1434,7 @@
   shufps(RexTypeIrrelevant, dst, dst, Immediate(0x0));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pshufb(Type /* Ty */, XmmRegister dst,
-                                          XmmRegister src) {
+void AssemblerX8664::pshufb(Type /* Ty */, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1709,9 +1444,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pshufb(Type /* Ty */, XmmRegister dst,
-                                          const Address &src) {
+void AssemblerX8664::pshufb(Type /* Ty */, XmmRegister dst,
+                            const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1721,10 +1455,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst,
-                                          XmmRegister src,
-                                          const Immediate &imm) {
+void AssemblerX8664::pshufd(Type /* Ty */, XmmRegister dst, XmmRegister src,
+                            const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1735,10 +1467,8 @@
   emitUint8(imm.value());
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pshufd(Type /* Ty */, XmmRegister dst,
-                                          const Address &src,
-                                          const Immediate &imm) {
+void AssemblerX8664::pshufd(Type /* Ty */, XmmRegister dst, const Address &src,
+                            const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1750,9 +1480,7 @@
   emitUint8(imm.value());
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::punpckl(Type Ty, XmmRegister Dst,
-                                           XmmRegister Src) {
+void AssemblerX8664::punpckl(Type Ty, XmmRegister Dst, XmmRegister Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, Dst, Src);
@@ -1769,9 +1497,7 @@
   emitXmmRegisterOperand(Dst, Src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::punpckl(Type Ty, XmmRegister Dst,
-                                           const Address &Src) {
+void AssemblerX8664::punpckl(Type Ty, XmmRegister Dst, const Address &Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, Src, Dst);
@@ -1788,9 +1514,7 @@
   emitOperand(gprEncoding(Dst), Src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::punpckh(Type Ty, XmmRegister Dst,
-                                           XmmRegister Src) {
+void AssemblerX8664::punpckh(Type Ty, XmmRegister Dst, XmmRegister Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, Dst, Src);
@@ -1807,9 +1531,7 @@
   emitXmmRegisterOperand(Dst, Src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::punpckh(Type Ty, XmmRegister Dst,
-                                           const Address &Src) {
+void AssemblerX8664::punpckh(Type Ty, XmmRegister Dst, const Address &Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, Src, Dst);
@@ -1826,9 +1548,7 @@
   emitOperand(gprEncoding(Dst), Src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
-                                          XmmRegister Src) {
+void AssemblerX8664::packss(Type Ty, XmmRegister Dst, XmmRegister Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, Dst, Src);
@@ -1843,9 +1563,7 @@
   emitXmmRegisterOperand(Dst, Src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packss(Type Ty, XmmRegister Dst,
-                                          const Address &Src) {
+void AssemblerX8664::packss(Type Ty, XmmRegister Dst, const Address &Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, Src, Dst);
@@ -1860,9 +1578,7 @@
   emitOperand(gprEncoding(Dst), Src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
-                                          XmmRegister Src) {
+void AssemblerX8664::packus(Type Ty, XmmRegister Dst, XmmRegister Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, Dst, Src);
@@ -1878,9 +1594,7 @@
   emitXmmRegisterOperand(Dst, Src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::packus(Type Ty, XmmRegister Dst,
-                                          const Address &Src) {
+void AssemblerX8664::packus(Type Ty, XmmRegister Dst, const Address &Src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, Src, Dst);
@@ -1896,10 +1610,8 @@
   emitOperand(gprEncoding(Dst), Src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
-                                          XmmRegister src,
-                                          const Immediate &imm) {
+void AssemblerX8664::shufps(Type /* Ty */, XmmRegister dst, XmmRegister src,
+                            const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1909,10 +1621,8 @@
   emitUint8(imm.value());
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shufps(Type /* Ty */, XmmRegister dst,
-                                          const Address &src,
-                                          const Immediate &imm) {
+void AssemblerX8664::shufps(Type /* Ty */, XmmRegister dst, const Address &src,
+                            const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(RexTypeIrrelevant, src, dst);
   emitUint8(0x0F);
@@ -1923,8 +1633,7 @@
   emitUint8(imm.value());
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrtpd(XmmRegister dst) {
+void AssemblerX8664::sqrtpd(XmmRegister dst) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, dst);
@@ -1933,9 +1642,8 @@
   emitXmmRegisterOperand(dst, dst);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
-                                            XmmRegister src) {
+void AssemblerX8664::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
+                              XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(RexTypeIrrelevant, dst, src);
   emitUint8(0x0F);
@@ -1943,9 +1651,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
-                                            const Address &src) {
+void AssemblerX8664::cvtdq2ps(Type /* Ignore */, XmmRegister dst,
+                              const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(RexTypeIrrelevant, src, dst);
   emitUint8(0x0F);
@@ -1953,9 +1660,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvttps2dq(Type /* Ignore */, XmmRegister dst,
-                                             XmmRegister src) {
+void AssemblerX8664::cvttps2dq(Type /* Ignore */, XmmRegister dst,
+                               XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF3);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1964,9 +1670,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvttps2dq(Type /* Ignore */, XmmRegister dst,
-                                             const Address &src) {
+void AssemblerX8664::cvttps2dq(Type /* Ignore */, XmmRegister dst,
+                               const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF3);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1975,9 +1680,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtps2dq(Type /* Ignore */, XmmRegister dst,
-                                            XmmRegister src) {
+void AssemblerX8664::cvtps2dq(Type /* Ignore */, XmmRegister dst,
+                              XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -1986,9 +1690,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtps2dq(Type /* Ignore */, XmmRegister dst,
-                                            const Address &src) {
+void AssemblerX8664::cvtps2dq(Type /* Ignore */, XmmRegister dst,
+                              const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -1997,9 +1700,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtsi2ss(Type DestTy, XmmRegister dst,
-                                            Type SrcTy, GPRRegister src) {
+void AssemblerX8664::cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy,
+                              GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
   emitRexRB(SrcTy, dst, src);
@@ -2008,9 +1710,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtsi2ss(Type DestTy, XmmRegister dst,
-                                            Type SrcTy, const Address &src) {
+void AssemblerX8664::cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy,
+                              const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
   emitRex(SrcTy, src, dst);
@@ -2019,9 +1720,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtfloat2float(Type SrcTy, XmmRegister dst,
-                                                  XmmRegister src) {
+void AssemblerX8664::cvtfloat2float(Type SrcTy, XmmRegister dst,
+                                    XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // ss2sd or sd2ss
   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
@@ -2031,9 +1731,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtfloat2float(Type SrcTy, XmmRegister dst,
-                                                  const Address &src) {
+void AssemblerX8664::cvtfloat2float(Type SrcTy, XmmRegister dst,
+                                    const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -2042,9 +1741,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvttss2si(Type DestTy, GPRRegister dst,
-                                             Type SrcTy, XmmRegister src) {
+void AssemblerX8664::cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy,
+                               XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   emitRexRB(DestTy, dst, src);
@@ -2053,9 +1751,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvttss2si(Type DestTy, GPRRegister dst,
-                                             Type SrcTy, const Address &src) {
+void AssemblerX8664::cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy,
+                               const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   emitRex(DestTy, src, dst);
@@ -2064,9 +1761,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtss2si(Type DestTy, GPRRegister dst,
-                                            Type SrcTy, XmmRegister src) {
+void AssemblerX8664::cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy,
+                              XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   emitRexRB(DestTy, dst, src);
@@ -2075,9 +1771,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cvtss2si(Type DestTy, GPRRegister dst,
-                                            Type SrcTy, const Address &src) {
+void AssemblerX8664::cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy,
+                              const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
   emitRex(DestTy, src, dst);
@@ -2086,9 +1781,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::ucomiss(Type Ty, XmmRegister a,
-                                           XmmRegister b) {
+void AssemblerX8664::ucomiss(Type Ty, XmmRegister a, XmmRegister b) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_f64)
     emitUint8(0x66);
@@ -2098,9 +1791,7 @@
   emitXmmRegisterOperand(a, b);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::ucomiss(Type Ty, XmmRegister a,
-                                           const Address &b) {
+void AssemblerX8664::ucomiss(Type Ty, XmmRegister a, const Address &b) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_f64)
     emitUint8(0x66);
@@ -2110,9 +1801,7 @@
   emitOperand(gprEncoding(a), b);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::movmsk(Type Ty, GPRRegister dst,
-                                          XmmRegister src) {
+void AssemblerX8664::movmsk(Type Ty, GPRRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_v16i8) {
     emitUint8(0x66);
@@ -2133,9 +1822,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrt(Type Ty, XmmRegister dst,
-                                        const Address &src) {
+void AssemblerX8664::sqrt(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (isScalarFloatingType(Ty))
     emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
@@ -2145,9 +1832,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrt(Type Ty, XmmRegister dst,
-                                        XmmRegister src) {
+void AssemblerX8664::sqrt(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (isScalarFloatingType(Ty))
     emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
@@ -2157,9 +1842,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::xorps(Type Ty, XmmRegister dst,
-                                         const Address &src) {
+void AssemblerX8664::xorps(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -2169,9 +1852,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::xorps(Type Ty, XmmRegister dst,
-                                         XmmRegister src) {
+void AssemblerX8664::xorps(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (!isFloat32Asserting32Or64(Ty))
     emitUint8(0x66);
@@ -2181,10 +1862,8 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::insertps(Type Ty, XmmRegister dst,
-                                            XmmRegister src,
-                                            const Immediate &imm) {
+void AssemblerX8664::insertps(Type Ty, XmmRegister dst, XmmRegister src,
+                              const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_uint8());
   assert(isVectorFloatingType(Ty));
@@ -2198,10 +1877,8 @@
   emitUint8(imm.value());
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::insertps(Type Ty, XmmRegister dst,
-                                            const Address &src,
-                                            const Immediate &imm) {
+void AssemblerX8664::insertps(Type Ty, XmmRegister dst, const Address &src,
+                              const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_uint8());
   assert(isVectorFloatingType(Ty));
@@ -2216,10 +1893,8 @@
   emitUint8(imm.value());
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pinsr(Type Ty, XmmRegister dst,
-                                         GPRRegister src,
-                                         const Immediate &imm) {
+void AssemblerX8664::pinsr(Type Ty, XmmRegister dst, GPRRegister src,
+                           const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_uint8());
   emitUint8(0x66);
@@ -2235,10 +1910,8 @@
   emitUint8(imm.value());
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pinsr(Type Ty, XmmRegister dst,
-                                         const Address &src,
-                                         const Immediate &imm) {
+void AssemblerX8664::pinsr(Type Ty, XmmRegister dst, const Address &src,
+                           const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_uint8());
   emitUint8(0x66);
@@ -2255,10 +1928,8 @@
   emitUint8(imm.value());
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pextr(Type Ty, GPRRegister dst,
-                                         XmmRegister src,
-                                         const Immediate &imm) {
+void AssemblerX8664::pextr(Type Ty, GPRRegister dst, XmmRegister src,
+                           const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(imm.is_uint8());
   if (Ty == IceType_i16) {
@@ -2281,8 +1952,7 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pmovsxdq(XmmRegister dst, XmmRegister src) {
+void AssemblerX8664::pmovsxdq(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -2292,9 +1962,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pcmpeq(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
+void AssemblerX8664::pcmpeq(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -2309,9 +1977,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pcmpeq(Type Ty, XmmRegister dst,
-                                          const Address &src) {
+void AssemblerX8664::pcmpeq(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -2326,9 +1992,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst,
-                                          XmmRegister src) {
+void AssemblerX8664::pcmpgt(Type Ty, XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -2343,9 +2007,7 @@
   emitXmmRegisterOperand(dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst,
-                                          const Address &src) {
+void AssemblerX8664::pcmpgt(Type Ty, XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -2360,10 +2022,8 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
-                                         XmmRegister src,
-                                         const Immediate &mode) {
+void AssemblerX8664::round(Type Ty, XmmRegister dst, XmmRegister src,
+                           const Immediate &mode) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRexRB(RexTypeIrrelevant, dst, src);
@@ -2387,10 +2047,8 @@
   emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
-                                         const Address &src,
-                                         const Immediate &mode) {
+void AssemblerX8664::round(Type Ty, XmmRegister dst, const Address &src,
+                           const Immediate &mode) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x66);
   emitRex(RexTypeIrrelevant, src, dst);
@@ -2414,66 +2072,8 @@
   emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
 }
 
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fnstcw(const typename T::Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xD9);
-  emitOperand(7, dst);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fldcw(const typename T::Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xD9);
-  emitOperand(5, src);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fistpl(const typename T::Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDF);
-  emitOperand(7, dst);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fistps(const typename T::Address &dst) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDB);
-  emitOperand(3, dst);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::fildl(const typename T::Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDF);
-  emitOperand(5, src);
-}
-
-template <typename TraitsType>
-template <typename T, typename>
-void AssemblerX86Base<TraitsType>::filds(const typename T::Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xDB);
-  emitOperand(0, src);
-}
-
-template <typename TraitsType>
-template <typename, typename>
-void AssemblerX86Base<TraitsType>::fincstp() {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0xD9);
-  emitUint8(0xF7);
-}
-
-template <typename TraitsType>
 template <uint32_t Tag>
-void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg,
-                                             const Immediate &imm) {
+void AssemblerX8664::arith_int(Type Ty, GPRRegister reg, const Immediate &imm) {
   static_assert(Tag < 8, "Tag must be between 0..7");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -2486,10 +2086,8 @@
   }
 }
 
-template <typename TraitsType>
 template <uint32_t Tag>
-void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg0,
-                                             GPRRegister reg1) {
+void AssemblerX8664::arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1) {
   static_assert(Tag < 8, "Tag must be between 0..7");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -2502,10 +2100,9 @@
   emitRegisterOperand(gprEncoding(reg0), gprEncoding(reg1));
 }
 
-template <typename TraitsType>
 template <uint32_t Tag>
-void AssemblerX86Base<TraitsType>::arith_int(Type Ty, GPRRegister reg,
-                                             const Address &address) {
+void AssemblerX8664::arith_int(Type Ty, GPRRegister reg,
+                               const Address &address) {
   static_assert(Tag < 8, "Tag must be between 0..7");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -2518,10 +2115,9 @@
   emitOperand(gprEncoding(reg), address);
 }
 
-template <typename TraitsType>
 template <uint32_t Tag>
-void AssemblerX86Base<TraitsType>::arith_int(Type Ty, const Address &address,
-                                             GPRRegister reg) {
+void AssemblerX8664::arith_int(Type Ty, const Address &address,
+                               GPRRegister reg) {
   static_assert(Tag < 8, "Tag must be between 0..7");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -2534,10 +2130,9 @@
   emitOperand(gprEncoding(reg), address);
 }
 
-template <typename TraitsType>
 template <uint32_t Tag>
-void AssemblerX86Base<TraitsType>::arith_int(Type Ty, const Address &address,
-                                             const Immediate &imm) {
+void AssemblerX8664::arith_int(Type Ty, const Address &address,
+                               const Immediate &imm) {
   static_assert(Tag < 8, "Tag must be between 0..7");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
@@ -2550,39 +2145,28 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
+void AssemblerX8664::cmp(Type Ty, GPRRegister reg, const Immediate &imm) {
   arith_int<7>(Ty, reg, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg0,
-                                       GPRRegister reg1) {
+void AssemblerX8664::cmp(Type Ty, GPRRegister reg0, GPRRegister reg1) {
   arith_int<7>(Ty, reg0, reg1);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmp(Type Ty, GPRRegister reg,
-                                       const Address &address) {
+void AssemblerX8664::cmp(Type Ty, GPRRegister reg, const Address &address) {
   arith_int<7>(Ty, reg, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmp(Type Ty, const Address &address,
-                                       GPRRegister reg) {
+void AssemblerX8664::cmp(Type Ty, const Address &address, GPRRegister reg) {
   arith_int<7>(Ty, address, reg);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmp(Type Ty, const Address &address,
-                                       const Immediate &imm) {
+void AssemblerX8664::cmp(Type Ty, const Address &address,
+                         const Immediate &imm) {
   arith_int<7>(Ty, address, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::test(Type Ty, GPRRegister reg1,
-                                        GPRRegister reg2) {
+void AssemblerX8664::test(Type Ty, GPRRegister reg1, GPRRegister reg2) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2594,9 +2178,7 @@
   emitRegisterOperand(gprEncoding(reg1), gprEncoding(reg2));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::test(Type Ty, const Address &addr,
-                                        GPRRegister reg) {
+void AssemblerX8664::test(Type Ty, const Address &addr, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2608,9 +2190,8 @@
   emitOperand(gprEncoding(reg), addr);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::test(Type Ty, GPRRegister reg,
-                                        const Immediate &immediate) {
+void AssemblerX8664::test(Type Ty, GPRRegister reg,
+                          const Immediate &immediate) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // For registers that have a byte variant (EAX, EBX, ECX, and EDX) we only
   // test the byte register to keep the encoding short. This is legal even if
@@ -2643,9 +2224,8 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::test(Type Ty, const Address &addr,
-                                        const Immediate &immediate) {
+void AssemblerX8664::test(Type Ty, const Address &addr,
+                          const Immediate &immediate) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // If the immediate is short, we only test the byte addr to keep the encoding
   // short.
@@ -2667,244 +2247,176 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
+void AssemblerX8664::And(Type Ty, GPRRegister dst, GPRRegister src) {
   arith_int<4>(Ty, dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
-                                       const Address &address) {
+void AssemblerX8664::And(Type Ty, GPRRegister dst, const Address &address) {
   arith_int<4>(Ty, dst, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::And(Type Ty, GPRRegister dst,
-                                       const Immediate &imm) {
+void AssemblerX8664::And(Type Ty, GPRRegister dst, const Immediate &imm) {
   arith_int<4>(Ty, dst, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::And(Type Ty, const Address &address,
-                                       GPRRegister reg) {
+void AssemblerX8664::And(Type Ty, const Address &address, GPRRegister reg) {
   arith_int<4>(Ty, address, reg);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::And(Type Ty, const Address &address,
-                                       const Immediate &imm) {
+void AssemblerX8664::And(Type Ty, const Address &address,
+                         const Immediate &imm) {
   arith_int<4>(Ty, address, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
-                                      GPRRegister src) {
+void AssemblerX8664::Or(Type Ty, GPRRegister dst, GPRRegister src) {
   arith_int<1>(Ty, dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
-                                      const Address &address) {
+void AssemblerX8664::Or(Type Ty, GPRRegister dst, const Address &address) {
   arith_int<1>(Ty, dst, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Or(Type Ty, GPRRegister dst,
-                                      const Immediate &imm) {
+void AssemblerX8664::Or(Type Ty, GPRRegister dst, const Immediate &imm) {
   arith_int<1>(Ty, dst, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Or(Type Ty, const Address &address,
-                                      GPRRegister reg) {
+void AssemblerX8664::Or(Type Ty, const Address &address, GPRRegister reg) {
   arith_int<1>(Ty, address, reg);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Or(Type Ty, const Address &address,
-                                      const Immediate &imm) {
+void AssemblerX8664::Or(Type Ty, const Address &address, const Immediate &imm) {
   arith_int<1>(Ty, address, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
+void AssemblerX8664::Xor(Type Ty, GPRRegister dst, GPRRegister src) {
   arith_int<6>(Ty, dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
-                                       const Address &address) {
+void AssemblerX8664::Xor(Type Ty, GPRRegister dst, const Address &address) {
   arith_int<6>(Ty, dst, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Xor(Type Ty, GPRRegister dst,
-                                       const Immediate &imm) {
+void AssemblerX8664::Xor(Type Ty, GPRRegister dst, const Immediate &imm) {
   arith_int<6>(Ty, dst, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Xor(Type Ty, const Address &address,
-                                       GPRRegister reg) {
+void AssemblerX8664::Xor(Type Ty, const Address &address, GPRRegister reg) {
   arith_int<6>(Ty, address, reg);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::Xor(Type Ty, const Address &address,
-                                       const Immediate &imm) {
+void AssemblerX8664::Xor(Type Ty, const Address &address,
+                         const Immediate &imm) {
   arith_int<6>(Ty, address, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
+void AssemblerX8664::add(Type Ty, GPRRegister dst, GPRRegister src) {
   arith_int<0>(Ty, dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister reg,
-                                       const Address &address) {
+void AssemblerX8664::add(Type Ty, GPRRegister reg, const Address &address) {
   arith_int<0>(Ty, reg, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::add(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
+void AssemblerX8664::add(Type Ty, GPRRegister reg, const Immediate &imm) {
   arith_int<0>(Ty, reg, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::add(Type Ty, const Address &address,
-                                       GPRRegister reg) {
+void AssemblerX8664::add(Type Ty, const Address &address, GPRRegister reg) {
   arith_int<0>(Ty, address, reg);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::add(Type Ty, const Address &address,
-                                       const Immediate &imm) {
+void AssemblerX8664::add(Type Ty, const Address &address,
+                         const Immediate &imm) {
   arith_int<0>(Ty, address, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
+void AssemblerX8664::adc(Type Ty, GPRRegister dst, GPRRegister src) {
   arith_int<2>(Ty, dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister dst,
-                                       const Address &address) {
+void AssemblerX8664::adc(Type Ty, GPRRegister dst, const Address &address) {
   arith_int<2>(Ty, dst, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::adc(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
+void AssemblerX8664::adc(Type Ty, GPRRegister reg, const Immediate &imm) {
   arith_int<2>(Ty, reg, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::adc(Type Ty, const Address &address,
-                                       GPRRegister reg) {
+void AssemblerX8664::adc(Type Ty, const Address &address, GPRRegister reg) {
   arith_int<2>(Ty, address, reg);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::adc(Type Ty, const Address &address,
-                                       const Immediate &imm) {
+void AssemblerX8664::adc(Type Ty, const Address &address,
+                         const Immediate &imm) {
   arith_int<2>(Ty, address, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
+void AssemblerX8664::sub(Type Ty, GPRRegister dst, GPRRegister src) {
   arith_int<5>(Ty, dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister reg,
-                                       const Address &address) {
+void AssemblerX8664::sub(Type Ty, GPRRegister reg, const Address &address) {
   arith_int<5>(Ty, reg, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sub(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
+void AssemblerX8664::sub(Type Ty, GPRRegister reg, const Immediate &imm) {
   arith_int<5>(Ty, reg, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sub(Type Ty, const Address &address,
-                                       GPRRegister reg) {
+void AssemblerX8664::sub(Type Ty, const Address &address, GPRRegister reg) {
   arith_int<5>(Ty, address, reg);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sub(Type Ty, const Address &address,
-                                       const Immediate &imm) {
+void AssemblerX8664::sub(Type Ty, const Address &address,
+                         const Immediate &imm) {
   arith_int<5>(Ty, address, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
+void AssemblerX8664::sbb(Type Ty, GPRRegister dst, GPRRegister src) {
   arith_int<3>(Ty, dst, src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister dst,
-                                       const Address &address) {
+void AssemblerX8664::sbb(Type Ty, GPRRegister dst, const Address &address) {
   arith_int<3>(Ty, dst, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sbb(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
+void AssemblerX8664::sbb(Type Ty, GPRRegister reg, const Immediate &imm) {
   arith_int<3>(Ty, reg, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sbb(Type Ty, const Address &address,
-                                       GPRRegister reg) {
+void AssemblerX8664::sbb(Type Ty, const Address &address, GPRRegister reg) {
   arith_int<3>(Ty, address, reg);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sbb(Type Ty, const Address &address,
-                                       const Immediate &imm) {
+void AssemblerX8664::sbb(Type Ty, const Address &address,
+                         const Immediate &imm) {
   arith_int<3>(Ty, address, imm);
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::cbw() {
+void AssemblerX8664::cbw() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitOperandSizeOverride();
   emitUint8(0x98);
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::cwd() {
+void AssemblerX8664::cwd() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitOperandSizeOverride();
   emitUint8(0x99);
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::cdq() {
+void AssemblerX8664::cdq() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x99);
 }
 
-template <typename TraitsType>
-template <typename T>
-typename std::enable_if<T::Is64Bit, void>::type
-AssemblerX86Base<TraitsType>::cqo() {
+void AssemblerX8664::cqo() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(RexTypeForceRexW, RexRegIrrelevant);
   emitUint8(0x99);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::div(Type Ty, GPRRegister reg) {
+void AssemblerX8664::div(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2916,8 +2428,7 @@
   emitRegisterOperand(6, gprEncoding(reg));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::div(Type Ty, const Address &addr) {
+void AssemblerX8664::div(Type Ty, const Address &addr) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2929,8 +2440,7 @@
   emitOperand(6, addr);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::idiv(Type Ty, GPRRegister reg) {
+void AssemblerX8664::idiv(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2942,8 +2452,7 @@
   emitRegisterOperand(7, gprEncoding(reg));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::idiv(Type Ty, const Address &addr) {
+void AssemblerX8664::idiv(Type Ty, const Address &addr) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -2955,9 +2464,7 @@
   emitOperand(7, addr);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
-                                        GPRRegister src) {
+void AssemblerX8664::imul(Type Ty, GPRRegister dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -2969,9 +2476,7 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg,
-                                        const Address &address) {
+void AssemblerX8664::imul(Type Ty, GPRRegister reg, const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -2983,9 +2488,7 @@
   emitOperand(gprEncoding(reg), address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg,
-                                        const Immediate &imm) {
+void AssemblerX8664::imul(Type Ty, GPRRegister reg, const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 || Ty == IceType_i64);
   if (Ty == IceType_i16)
@@ -3002,8 +2505,7 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister reg) {
+void AssemblerX8664::imul(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3015,8 +2517,7 @@
   emitRegisterOperand(5, gprEncoding(reg));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, const Address &address) {
+void AssemblerX8664::imul(Type Ty, const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3028,9 +2529,8 @@
   emitOperand(5, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
-                                        GPRRegister src, const Immediate &imm) {
+void AssemblerX8664::imul(Type Ty, GPRRegister dst, GPRRegister src,
+                          const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -3047,10 +2547,8 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::imul(Type Ty, GPRRegister dst,
-                                        const Address &address,
-                                        const Immediate &imm) {
+void AssemblerX8664::imul(Type Ty, GPRRegister dst, const Address &address,
+                          const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -3069,8 +2567,7 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mul(Type Ty, GPRRegister reg) {
+void AssemblerX8664::mul(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3082,8 +2579,7 @@
   emitRegisterOperand(4, gprEncoding(reg));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::mul(Type Ty, const Address &address) {
+void AssemblerX8664::mul(Type Ty, const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3095,111 +2591,69 @@
   emitOperand(4, address);
 }
 
-template <typename TraitsType>
-template <typename, typename>
-void AssemblerX86Base<TraitsType>::incl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x40 + reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::incl(const Address &address) {
+void AssemblerX8664::incl(const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(IceType_i32, address, RexRegIrrelevant);
   emitUint8(0xFF);
   emitOperand(0, address);
 }
 
-template <typename TraitsType>
-template <typename, typename>
-void AssemblerX86Base<TraitsType>::decl(GPRRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  emitUint8(0x48 + reg);
-}
-
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::decl(const Address &address) {
+void AssemblerX8664::decl(const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRex(IceType_i32, address, RexRegIrrelevant);
   emitUint8(0xFF);
   emitOperand(1, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::rol(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
+void AssemblerX8664::rol(Type Ty, GPRRegister reg, const Immediate &imm) {
   emitGenericShift(0, Ty, reg, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::rol(Type Ty, GPRRegister operand,
-                                       GPRRegister shifter) {
+void AssemblerX8664::rol(Type Ty, GPRRegister operand, GPRRegister shifter) {
   emitGenericShift(0, Ty, Operand(operand), shifter);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::rol(Type Ty, const Address &operand,
-                                       GPRRegister shifter) {
+void AssemblerX8664::rol(Type Ty, const Address &operand, GPRRegister shifter) {
   emitGenericShift(0, Ty, operand, shifter);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shl(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
+void AssemblerX8664::shl(Type Ty, GPRRegister reg, const Immediate &imm) {
   emitGenericShift(4, Ty, reg, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shl(Type Ty, GPRRegister operand,
-                                       GPRRegister shifter) {
+void AssemblerX8664::shl(Type Ty, GPRRegister operand, GPRRegister shifter) {
   emitGenericShift(4, Ty, Operand(operand), shifter);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shl(Type Ty, const Address &operand,
-                                       GPRRegister shifter) {
+void AssemblerX8664::shl(Type Ty, const Address &operand, GPRRegister shifter) {
   emitGenericShift(4, Ty, operand, shifter);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shr(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
+void AssemblerX8664::shr(Type Ty, GPRRegister reg, const Immediate &imm) {
   emitGenericShift(5, Ty, reg, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shr(Type Ty, GPRRegister operand,
-                                       GPRRegister shifter) {
+void AssemblerX8664::shr(Type Ty, GPRRegister operand, GPRRegister shifter) {
   emitGenericShift(5, Ty, Operand(operand), shifter);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shr(Type Ty, const Address &operand,
-                                       GPRRegister shifter) {
+void AssemblerX8664::shr(Type Ty, const Address &operand, GPRRegister shifter) {
   emitGenericShift(5, Ty, operand, shifter);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sar(Type Ty, GPRRegister reg,
-                                       const Immediate &imm) {
+void AssemblerX8664::sar(Type Ty, GPRRegister reg, const Immediate &imm) {
   emitGenericShift(7, Ty, reg, imm);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sar(Type Ty, GPRRegister operand,
-                                       GPRRegister shifter) {
+void AssemblerX8664::sar(Type Ty, GPRRegister operand, GPRRegister shifter) {
   emitGenericShift(7, Ty, Operand(operand), shifter);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sar(Type Ty, const Address &address,
-                                       GPRRegister shifter) {
+void AssemblerX8664::sar(Type Ty, const Address &address, GPRRegister shifter) {
   emitGenericShift(7, Ty, address, shifter);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shld(Type Ty, GPRRegister dst,
-                                        GPRRegister src) {
+void AssemblerX8664::shld(Type Ty, GPRRegister dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -3210,9 +2664,8 @@
   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shld(Type Ty, GPRRegister dst,
-                                        GPRRegister src, const Immediate &imm) {
+void AssemblerX8664::shld(Type Ty, GPRRegister dst, GPRRegister src,
+                          const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   assert(imm.is_int8());
@@ -3225,9 +2678,7 @@
   emitUint8(imm.value() & 0xFF);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shld(Type Ty, const Address &operand,
-                                        GPRRegister src) {
+void AssemblerX8664::shld(Type Ty, const Address &operand, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -3238,9 +2689,7 @@
   emitOperand(gprEncoding(src), operand);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shrd(Type Ty, GPRRegister dst,
-                                        GPRRegister src) {
+void AssemblerX8664::shrd(Type Ty, GPRRegister dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -3251,9 +2700,8 @@
   emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shrd(Type Ty, GPRRegister dst,
-                                        GPRRegister src, const Immediate &imm) {
+void AssemblerX8664::shrd(Type Ty, GPRRegister dst, GPRRegister src,
+                          const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   assert(imm.is_int8());
@@ -3266,9 +2714,7 @@
   emitUint8(imm.value() & 0xFF);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::shrd(Type Ty, const Address &dst,
-                                        GPRRegister src) {
+void AssemblerX8664::shrd(Type Ty, const Address &dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32);
   if (Ty == IceType_i16)
@@ -3279,8 +2725,7 @@
   emitOperand(gprEncoding(src), dst);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::neg(Type Ty, GPRRegister reg) {
+void AssemblerX8664::neg(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3292,8 +2737,7 @@
   emitRegisterOperand(3, gprEncoding(reg));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::neg(Type Ty, const Address &addr) {
+void AssemblerX8664::neg(Type Ty, const Address &addr) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3305,16 +2749,14 @@
   emitOperand(3, addr);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::notl(GPRRegister reg) {
+void AssemblerX8664::notl(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(IceType_i32, reg);
   emitUint8(0xF7);
   emitUint8(0xD0 | gprEncoding(reg));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bswap(Type Ty, GPRRegister reg) {
+void AssemblerX8664::bswap(Type Ty, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
   emitRexB(Ty, reg);
@@ -3322,9 +2764,7 @@
   emitUint8(0xC8 | gprEncoding(reg));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bsf(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
+void AssemblerX8664::bsf(Type Ty, GPRRegister dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -3336,9 +2776,7 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bsf(Type Ty, GPRRegister dst,
-                                       const Address &src) {
+void AssemblerX8664::bsf(Type Ty, GPRRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -3350,9 +2788,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bsr(Type Ty, GPRRegister dst,
-                                       GPRRegister src) {
+void AssemblerX8664::bsr(Type Ty, GPRRegister dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -3364,9 +2800,7 @@
   emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bsr(Type Ty, GPRRegister dst,
-                                       const Address &src) {
+void AssemblerX8664::bsr(Type Ty, GPRRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(Ty == IceType_i16 || Ty == IceType_i32 ||
          (Traits::Is64Bit && Ty == IceType_i64));
@@ -3378,8 +2812,7 @@
   emitOperand(gprEncoding(dst), src);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bt(GPRRegister base, GPRRegister offset) {
+void AssemblerX8664::bt(GPRRegister base, GPRRegister offset) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexRB(IceType_i32, offset, base);
   emitUint8(0x0F);
@@ -3387,13 +2820,12 @@
   emitRegisterOperand(gprEncoding(offset), gprEncoding(base));
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::ret() {
+void AssemblerX8664::ret() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xC3);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::ret(const Immediate &imm) {
+void AssemblerX8664::ret(const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xC2);
   assert(imm.is_uint16());
@@ -3401,8 +2833,7 @@
   emitUint8((imm.value() >> 8) & 0xFF);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::nop(int size) {
+void AssemblerX8664::nop(int size) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // There are nops up to size 15, but for now just provide up to size 8.
   assert(0 < size && size <= MAX_NOP_SIZE);
@@ -3464,25 +2895,23 @@
   }
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::int3() {
+void AssemblerX8664::int3() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xCC);
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::hlt() {
+void AssemblerX8664::hlt() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF4);
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::ud2() {
+void AssemblerX8664::ud2() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x0F);
   emitUint8(0x0B);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::j(BrCond condition, Label *label,
-                                     bool near) {
+void AssemblerX8664::j(BrCond condition, Label *label, bool near) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (label->isBound()) {
     static const int kShortSize = 2;
@@ -3507,9 +2936,7 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::j(BrCond condition,
-                                     const ConstantRelocatable *label) {
+void AssemblerX8664::j(BrCond condition, const ConstantRelocatable *label) {
   llvm::report_fatal_error("Untested - please verify and then reenable.");
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x0F);
@@ -3520,16 +2947,14 @@
   emitInt32(0);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::jmp(GPRRegister reg) {
+void AssemblerX8664::jmp(GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitRexB(RexTypeIrrelevant, reg);
   emitUint8(0xFF);
   emitRegisterOperand(4, gprEncoding(reg));
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::jmp(Label *label, bool near) {
+void AssemblerX8664::jmp(Label *label, bool near) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (label->isBound()) {
     static const int kShortSize = 2;
@@ -3552,8 +2977,7 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::jmp(const ConstantRelocatable *label) {
+void AssemblerX8664::jmp(const ConstantRelocatable *label) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xE9);
   auto *Fixup = this->createFixup(Traits::FK_PcRel, label);
@@ -3562,8 +2986,7 @@
   emitInt32(0);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::jmp(const Immediate &abs_address) {
+void AssemblerX8664::jmp(const Immediate &abs_address) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xE9);
   AssemblerFixup *Fixup =
@@ -3573,21 +2996,20 @@
   emitInt32(0);
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::mfence() {
+void AssemblerX8664::mfence() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x0F);
   emitUint8(0xAE);
   emitUint8(0xF0);
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::lock() {
+void AssemblerX8664::lock() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0xF0);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmpxchg(Type Ty, const Address &address,
-                                           GPRRegister reg, bool Locked) {
+void AssemblerX8664::cmpxchg(Type Ty, const Address &address, GPRRegister reg,
+                             bool Locked) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3602,9 +3024,7 @@
   emitOperand(gprEncoding(reg), address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::cmpxchg8b(const Address &address,
-                                             bool Locked) {
+void AssemblerX8664::cmpxchg8b(const Address &address, bool Locked) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Locked)
     emitUint8(0xF0);
@@ -3614,9 +3034,8 @@
   emitOperand(1, address);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::xadd(Type Ty, const Address &addr,
-                                        GPRRegister reg, bool Locked) {
+void AssemblerX8664::xadd(Type Ty, const Address &addr, GPRRegister reg,
+                          bool Locked) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3631,9 +3050,7 @@
   emitOperand(gprEncoding(reg), addr);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::xchg(Type Ty, GPRRegister reg0,
-                                        GPRRegister reg1) {
+void AssemblerX8664::xchg(Type Ty, GPRRegister reg0, GPRRegister reg1) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3654,9 +3071,7 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::xchg(Type Ty, const Address &addr,
-                                        GPRRegister reg) {
+void AssemblerX8664::xchg(Type Ty, const Address &addr, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)
     emitOperandSizeOverride();
@@ -3668,7 +3083,7 @@
   emitOperand(gprEncoding(reg), addr);
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::iaca_start() {
+void AssemblerX8664::iaca_start() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(0x0F);
   emitUint8(0x0B);
@@ -3685,7 +3100,7 @@
   emitUint8(0x90);
 }
 
-template <typename TraitsType> void AssemblerX86Base<TraitsType>::iaca_end() {
+void AssemblerX8664::iaca_end() {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
 
   // mov $222, ebx
@@ -3703,14 +3118,12 @@
   emitUint8(0x0B);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitSegmentOverride(uint8_t prefix) {
+void AssemblerX8664::emitSegmentOverride(uint8_t prefix) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   emitUint8(prefix);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::align(intptr_t alignment, intptr_t offset) {
+void AssemblerX8664::align(intptr_t alignment, intptr_t offset) {
   assert(llvm::isPowerOf2_32(alignment));
   intptr_t pos = offset + Buffer.getPosition();
   intptr_t mod = pos & (alignment - 1);
@@ -3728,8 +3141,7 @@
   assert(((offset + Buffer.getPosition()) & (alignment - 1)) == 0);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::bind(Label *L) {
+void AssemblerX8664::bind(Label *L) {
   const intptr_t Bound = Buffer.size();
   assert(!L->isBound()); // Labels can only be bound once.
   while (L->isLinked()) {
@@ -3748,9 +3160,8 @@
   L->bindTo(Bound);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitOperand(int rm, const Operand &operand,
-                                               RelocOffsetT Addend) {
+void AssemblerX8664::emitOperand(int rm, const Operand &operand,
+                                 RelocOffsetT Addend) {
   assert(rm >= 0 && rm < 8);
   const intptr_t length = operand.length_;
   assert(length > 0);
@@ -3787,9 +3198,7 @@
   emitInt32(0);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitImmediate(Type Ty,
-                                                 const Immediate &imm) {
+void AssemblerX8664::emitImmediate(Type Ty, const Immediate &imm) {
   auto *const Fixup = imm.fixup();
   if (Ty == IceType_i16) {
     assert(Fixup == nullptr);
@@ -3807,9 +3216,8 @@
   emitInt32(0);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitComplexI8(int rm, const Operand &operand,
-                                                 const Immediate &immediate) {
+void AssemblerX8664::emitComplexI8(int rm, const Operand &operand,
+                                   const Immediate &immediate) {
   assert(rm >= 0 && rm < 8);
   assert(immediate.is_int8());
   if (operand.IsRegister(Traits::Encoded_Reg_Accumulator)) {
@@ -3825,10 +3233,8 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitComplex(Type Ty, int rm,
-                                               const Operand &operand,
-                                               const Immediate &immediate) {
+void AssemblerX8664::emitComplex(Type Ty, int rm, const Operand &operand,
+                                 const Immediate &immediate) {
   assert(rm >= 0 && rm < 8);
   if (immediate.is_int8()) {
     // Use sign-extended 8-bit immediate.
@@ -3848,9 +3254,7 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitLabel(Label *label,
-                                             intptr_t instruction_size) {
+void AssemblerX8664::emitLabel(Label *label, intptr_t instruction_size) {
   if (label->isBound()) {
     intptr_t offset = label->getPosition() - Buffer.size();
     assert(offset <= 0);
@@ -3860,26 +3264,22 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitLabelLink(Label *Label) {
+void AssemblerX8664::emitLabelLink(Label *Label) {
   assert(!Label->isBound());
   intptr_t Position = Buffer.size();
   emitInt32(Label->Position);
   Label->linkTo(*this, Position);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitNearLabelLink(Label *Label) {
+void AssemblerX8664::emitNearLabelLink(Label *Label) {
   assert(!Label->isBound());
   intptr_t Position = Buffer.size();
   emitUint8(0);
   Label->nearLinkTo(*this, Position);
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitGenericShift(int rm, Type Ty,
-                                                    GPRRegister reg,
-                                                    const Immediate &imm) {
+void AssemblerX8664::emitGenericShift(int rm, Type Ty, GPRRegister reg,
+                                      const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   // We don't assert that imm fits into 8 bits; instead, it gets masked below.
   // Note that we don't mask it further (e.g. to 5 bits) because we want the
@@ -3899,10 +3299,8 @@
   }
 }
 
-template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::emitGenericShift(int rm, Type Ty,
-                                                    const Operand &operand,
-                                                    GPRRegister shifter) {
+void AssemblerX8664::emitGenericShift(int rm, Type Ty, const Operand &operand,
+                                      GPRRegister shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   assert(shifter == Traits::Encoded_Reg_Counter);
   (void)shifter;
diff --git a/third_party/subzero/src/IceAssemblerX8664.h b/third_party/subzero/src/IceAssemblerX8664.h
index 3a49840..ad12a72 100644
--- a/third_party/subzero/src/IceAssemblerX8664.h
+++ b/third_party/subzero/src/IceAssemblerX8664.h
@@ -16,20 +16,834 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// \brief Instantiates the Assembler for X86-64.
+/// \brief Declares the Assembler class for X86-64.
 ///
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEASSEMBLERX8664_H
 #define SUBZERO_SRC_ICEASSEMBLERX8664_H
 
-#include "IceAssemblerX8664Base.h"
+#include "IceAssembler.h"
+#include "IceDefs.h"
+#include "IceOperand.h"
+#include "IceTypes.h"
+#include "IceUtils.h"
+
 #include "IceTargetLoweringX8664Traits.h"
 
 namespace Ice {
 namespace X8664 {
 
-using AssemblerX8664 = AssemblerX86Base<X8664::Traits>;
+class AssemblerX8664 : public ::Ice::Assembler {
+  AssemblerX8664(const AssemblerX8664 &) = delete;
+  AssemblerX8664 &operator=(const AssemblerX8664 &) = delete;
+
+protected:
+  explicit AssemblerX8664() : Assembler(Traits::AsmKind) {}
+
+public:
+  using Traits = TargetX8664Traits;
+  using Address = typename Traits::Address;
+  using ByteRegister = typename Traits::ByteRegister;
+  using BrCond = typename Traits::Cond::BrCond;
+  using CmppsCond = typename Traits::Cond::CmppsCond;
+  using GPRRegister = typename Traits::GPRRegister;
+  using Operand = typename Traits::Operand;
+  using XmmRegister = typename Traits::XmmRegister;
+
+  static constexpr int MAX_NOP_SIZE = 8;
+
+  static bool classof(const Assembler *Asm) {
+    return Asm->getKind() == Traits::AsmKind;
+  }
+
+  class Immediate {
+    Immediate(const Immediate &) = delete;
+    Immediate &operator=(const Immediate &) = delete;
+
+  public:
+    explicit Immediate(int32_t value) : value_(value) {}
+
+    explicit Immediate(AssemblerFixup *fixup) : fixup_(fixup) {}
+
+    int32_t value() const { return value_; }
+    AssemblerFixup *fixup() const { return fixup_; }
+
+    bool is_int8() const {
+      // We currently only allow 32-bit fixups, and they usually have value = 0,
+      // so if fixup_ != nullptr, it shouldn't be classified as int8/16.
+      return fixup_ == nullptr && Utils::IsInt(8, value_);
+    }
+    bool is_uint8() const {
+      return fixup_ == nullptr && Utils::IsUint(8, value_);
+    }
+    bool is_uint16() const {
+      return fixup_ == nullptr && Utils::IsUint(16, value_);
+    }
+
+  private:
+    const int32_t value_ = 0;
+    AssemblerFixup *fixup_ = nullptr;
+  };
+
+  /// X86 allows near and far jumps.
+  class Label final : public Ice::Label {
+    Label(const Label &) = delete;
+    Label &operator=(const Label &) = delete;
+
+  public:
+    Label() = default;
+    ~Label() = default;
+
+    void finalCheck() const override {
+      Ice::Label::finalCheck();
+      assert(!hasNear());
+    }
+
+    /// Returns the position of an earlier branch instruction which assumes that
+    /// this label is "near", and bumps iterator to the next near position.
+    intptr_t getNearPosition() {
+      assert(hasNear());
+      intptr_t Pos = UnresolvedNearPositions.back();
+      UnresolvedNearPositions.pop_back();
+      return Pos;
+    }
+
+    bool hasNear() const { return !UnresolvedNearPositions.empty(); }
+    bool isUnused() const override {
+      return Ice::Label::isUnused() && !hasNear();
+    }
+
+  private:
+    friend class AssemblerX8664;
+
+    void nearLinkTo(const Assembler &Asm, intptr_t position) {
+      if (Asm.getPreliminary())
+        return;
+      assert(!isBound());
+      UnresolvedNearPositions.push_back(position);
+    }
+
+    llvm::SmallVector<intptr_t, 20> UnresolvedNearPositions;
+  };
+
+public:
+  ~AssemblerX8664() override;
+
+  static const bool kNearJump = true;
+  static const bool kFarJump = false;
+
+  void alignFunction() override;
+
+  SizeT getBundleAlignLog2Bytes() const override { return 5; }
+
+  const char *getAlignDirective() const override { return ".p2align"; }
+
+  llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const override {
+    static const uint8_t Padding[] = {0xF4};
+    return llvm::ArrayRef<uint8_t>(Padding, 1);
+  }
+
+  void padWithNop(intptr_t Padding) override {
+    while (Padding > MAX_NOP_SIZE) {
+      nop(MAX_NOP_SIZE);
+      Padding -= MAX_NOP_SIZE;
+    }
+    if (Padding)
+      nop(Padding);
+  }
+
+  Ice::Label *getCfgNodeLabel(SizeT NodeNumber) override;
+  void bindCfgNodeLabel(const CfgNode *Node) override;
+  Label *getOrCreateCfgNodeLabel(SizeT Number);
+  Label *getOrCreateLocalLabel(SizeT Number);
+  void bindLocalLabel(SizeT Number);
+
+  bool fixupIsPCRel(FixupKind Kind) const override {
+    // Currently assuming this is the only PC-rel relocation type used.
+    // TODO(jpp): Traits.PcRelTypes.count(Kind) != 0
+    return Kind == Traits::FK_PcRel;
+  }
+
+  // Operations to emit GPR instructions (and dispatch on operand type).
+  using TypedEmitGPR = void (AssemblerX8664::*)(Type, GPRRegister);
+  using TypedEmitAddr = void (AssemblerX8664::*)(Type, const Address &);
+  struct GPREmitterOneOp {
+    TypedEmitGPR Reg;
+    TypedEmitAddr Addr;
+  };
+
+  using TypedEmitGPRGPR = void (AssemblerX8664::*)(Type, GPRRegister,
+                                                   GPRRegister);
+  using TypedEmitGPRAddr = void (AssemblerX8664::*)(Type, GPRRegister,
+                                                    const Address &);
+  using TypedEmitGPRImm = void (AssemblerX8664::*)(Type, GPRRegister,
+                                                   const Immediate &);
+  struct GPREmitterRegOp {
+    TypedEmitGPRGPR GPRGPR;
+    TypedEmitGPRAddr GPRAddr;
+    TypedEmitGPRImm GPRImm;
+  };
+
+  struct GPREmitterShiftOp {
+    // Technically, Addr/GPR and Addr/Imm are also allowed, but */Addr are
+    // not. In practice, we always normalize the Dest to a Register first.
+    TypedEmitGPRGPR GPRGPR;
+    TypedEmitGPRImm GPRImm;
+  };
+
+  using TypedEmitGPRGPRImm = void (AssemblerX8664::*)(Type, GPRRegister,
+                                                      GPRRegister,
+                                                      const Immediate &);
+  struct GPREmitterShiftD {
+    // Technically AddrGPR and AddrGPRImm are also allowed, but in practice we
+    // always normalize Dest to a Register first.
+    TypedEmitGPRGPR GPRGPR;
+    TypedEmitGPRGPRImm GPRGPRImm;
+  };
+
+  using TypedEmitAddrGPR = void (AssemblerX8664::*)(Type, const Address &,
+                                                    GPRRegister);
+  using TypedEmitAddrImm = void (AssemblerX8664::*)(Type, const Address &,
+                                                    const Immediate &);
+  struct GPREmitterAddrOp {
+    TypedEmitAddrGPR AddrGPR;
+    TypedEmitAddrImm AddrImm;
+  };
+
+  // Operations to emit XMM instructions (and dispatch on operand type).
+  using TypedEmitXmmXmm = void (AssemblerX8664::*)(Type, XmmRegister,
+                                                   XmmRegister);
+  using TypedEmitXmmAddr = void (AssemblerX8664::*)(Type, XmmRegister,
+                                                    const Address &);
+  struct XmmEmitterRegOp {
+    TypedEmitXmmXmm XmmXmm;
+    TypedEmitXmmAddr XmmAddr;
+  };
+
+  using EmitXmmXmm = void (AssemblerX8664::*)(XmmRegister, XmmRegister);
+  using EmitXmmAddr = void (AssemblerX8664::*)(XmmRegister, const Address &);
+  using EmitAddrXmm = void (AssemblerX8664::*)(const Address &, XmmRegister);
+  struct XmmEmitterMovOps {
+    EmitXmmXmm XmmXmm;
+    EmitXmmAddr XmmAddr;
+    EmitAddrXmm AddrXmm;
+  };
+
+  using TypedEmitXmmImm = void (AssemblerX8664::*)(Type, XmmRegister,
+                                                   const Immediate &);
+
+  struct XmmEmitterShiftOp {
+    TypedEmitXmmXmm XmmXmm;
+    TypedEmitXmmAddr XmmAddr;
+    TypedEmitXmmImm XmmImm;
+  };
+
+  // Cross Xmm/GPR cast instructions.
+  template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
+    using TypedEmitRegs = void (AssemblerX8664::*)(Type, DReg_t, Type, SReg_t);
+    using TypedEmitAddr = void (AssemblerX8664::*)(Type, DReg_t, Type,
+                                                   const Address &);
+
+    TypedEmitRegs RegReg;
+    TypedEmitAddr RegAddr;
+  };
+
+  // Three operand (potentially) cross Xmm/GPR instructions. The last operand
+  // must be an immediate.
+  template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
+    using TypedEmitRegRegImm = void (AssemblerX8664::*)(Type, DReg_t, SReg_t,
+                                                        const Immediate &);
+    using TypedEmitRegAddrImm = void (AssemblerX8664::*)(Type, DReg_t,
+                                                         const Address &,
+                                                         const Immediate &);
+
+    TypedEmitRegRegImm RegRegImm;
+    TypedEmitRegAddrImm RegAddrImm;
+  };
+
+  /*
+   * Emit Machine Instructions.
+   */
+  void call(GPRRegister reg);
+  void call(const Address &address);
+  void call(const ConstantRelocatable *label); // not testable.
+  void call(const Immediate &abs_address);
+
+  static const intptr_t kCallExternalLabelSize = 5;
+
+  void pushl(GPRRegister reg);
+  void pushl(const Immediate &Imm);
+  void pushl(const ConstantRelocatable *Label);
+
+  void popl(GPRRegister reg);
+  void popl(const Address &address);
+
+  void setcc(BrCond condition, ByteRegister dst);
+  void setcc(BrCond condition, const Address &address);
+
+  void mov(Type Ty, GPRRegister dst, const Immediate &src);
+  void mov(Type Ty, GPRRegister dst, GPRRegister src);
+  void mov(Type Ty, GPRRegister dst, const Address &src);
+  void mov(Type Ty, const Address &dst, GPRRegister src);
+  void mov(Type Ty, const Address &dst, const Immediate &imm);
+
+  void movabs(const GPRRegister Dst, uint64_t Imm64);
+
+  void movzx(Type Ty, GPRRegister dst, GPRRegister src);
+  void movzx(Type Ty, GPRRegister dst, const Address &src);
+  void movsx(Type Ty, GPRRegister dst, GPRRegister src);
+  void movsx(Type Ty, GPRRegister dst, const Address &src);
+
+  void lea(Type Ty, GPRRegister dst, const Address &src);
+
+  void cmov(Type Ty, BrCond cond, GPRRegister dst, GPRRegister src);
+  void cmov(Type Ty, BrCond cond, GPRRegister dst, const Address &src);
+
+  void rep_movsb();
+
+  void movss(Type Ty, XmmRegister dst, const Address &src);
+  void movss(Type Ty, const Address &dst, XmmRegister src);
+  void movss(Type Ty, XmmRegister dst, XmmRegister src);
+
+  void movd(Type SrcTy, XmmRegister dst, GPRRegister src);
+  void movd(Type SrcTy, XmmRegister dst, const Address &src);
+  void movd(Type DestTy, GPRRegister dst, XmmRegister src);
+  void movd(Type DestTy, const Address &dst, XmmRegister src);
+
+  void movq(XmmRegister dst, XmmRegister src);
+  void movq(const Address &dst, XmmRegister src);
+  void movq(XmmRegister dst, const Address &src);
+
+  void addss(Type Ty, XmmRegister dst, XmmRegister src);
+  void addss(Type Ty, XmmRegister dst, const Address &src);
+  void subss(Type Ty, XmmRegister dst, XmmRegister src);
+  void subss(Type Ty, XmmRegister dst, const Address &src);
+  void mulss(Type Ty, XmmRegister dst, XmmRegister src);
+  void mulss(Type Ty, XmmRegister dst, const Address &src);
+  void divss(Type Ty, XmmRegister dst, XmmRegister src);
+  void divss(Type Ty, XmmRegister dst, const Address &src);
+
+  void movaps(XmmRegister dst, XmmRegister src);
+
+  void movups(XmmRegister dst, XmmRegister src);
+  void movups(XmmRegister dst, const Address &src);
+  void movups(const Address &dst, XmmRegister src);
+
+  void padd(Type Ty, XmmRegister dst, XmmRegister src);
+  void padd(Type Ty, XmmRegister dst, const Address &src);
+  void padds(Type Ty, XmmRegister dst, XmmRegister src);
+  void padds(Type Ty, XmmRegister dst, const Address &src);
+  void paddus(Type Ty, XmmRegister dst, XmmRegister src);
+  void paddus(Type Ty, XmmRegister dst, const Address &src);
+  void pand(Type Ty, XmmRegister dst, XmmRegister src);
+  void pand(Type Ty, XmmRegister dst, const Address &src);
+  void pandn(Type Ty, XmmRegister dst, XmmRegister src);
+  void pandn(Type Ty, XmmRegister dst, const Address &src);
+  void pmull(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmull(Type Ty, XmmRegister dst, const Address &src);
+  void pmulhw(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmulhw(Type Ty, XmmRegister dst, const Address &src);
+  void pmulhuw(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmulhuw(Type Ty, XmmRegister dst, const Address &src);
+  void pmaddwd(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmaddwd(Type Ty, XmmRegister dst, const Address &src);
+  void pmuludq(Type Ty, XmmRegister dst, XmmRegister src);
+  void pmuludq(Type Ty, XmmRegister dst, const Address &src);
+  void por(Type Ty, XmmRegister dst, XmmRegister src);
+  void por(Type Ty, XmmRegister dst, const Address &src);
+  void psub(Type Ty, XmmRegister dst, XmmRegister src);
+  void psub(Type Ty, XmmRegister dst, const Address &src);
+  void psubs(Type Ty, XmmRegister dst, XmmRegister src);
+  void psubs(Type Ty, XmmRegister dst, const Address &src);
+  void psubus(Type Ty, XmmRegister dst, XmmRegister src);
+  void psubus(Type Ty, XmmRegister dst, const Address &src);
+  void pxor(Type Ty, XmmRegister dst, XmmRegister src);
+  void pxor(Type Ty, XmmRegister dst, const Address &src);
+
+  void psll(Type Ty, XmmRegister dst, XmmRegister src);
+  void psll(Type Ty, XmmRegister dst, const Address &src);
+  void psll(Type Ty, XmmRegister dst, const Immediate &src);
+
+  void psra(Type Ty, XmmRegister dst, XmmRegister src);
+  void psra(Type Ty, XmmRegister dst, const Address &src);
+  void psra(Type Ty, XmmRegister dst, const Immediate &src);
+  void psrl(Type Ty, XmmRegister dst, XmmRegister src);
+  void psrl(Type Ty, XmmRegister dst, const Address &src);
+  void psrl(Type Ty, XmmRegister dst, const Immediate &src);
+
+  void addps(Type Ty, XmmRegister dst, XmmRegister src);
+  void addps(Type Ty, XmmRegister dst, const Address &src);
+  void subps(Type Ty, XmmRegister dst, XmmRegister src);
+  void subps(Type Ty, XmmRegister dst, const Address &src);
+  void divps(Type Ty, XmmRegister dst, XmmRegister src);
+  void divps(Type Ty, XmmRegister dst, const Address &src);
+  void mulps(Type Ty, XmmRegister dst, XmmRegister src);
+  void mulps(Type Ty, XmmRegister dst, const Address &src);
+  void minps(Type Ty, XmmRegister dst, const Address &src);
+  void minps(Type Ty, XmmRegister dst, XmmRegister src);
+  void minss(Type Ty, XmmRegister dst, const Address &src);
+  void minss(Type Ty, XmmRegister dst, XmmRegister src);
+  void maxps(Type Ty, XmmRegister dst, const Address &src);
+  void maxps(Type Ty, XmmRegister dst, XmmRegister src);
+  void maxss(Type Ty, XmmRegister dst, const Address &src);
+  void maxss(Type Ty, XmmRegister dst, XmmRegister src);
+  void andnps(Type Ty, XmmRegister dst, const Address &src);
+  void andnps(Type Ty, XmmRegister dst, XmmRegister src);
+  void andps(Type Ty, XmmRegister dst, const Address &src);
+  void andps(Type Ty, XmmRegister dst, XmmRegister src);
+  void orps(Type Ty, XmmRegister dst, const Address &src);
+  void orps(Type Ty, XmmRegister dst, XmmRegister src);
+
+  void blendvps(Type Ty, XmmRegister dst, XmmRegister src);
+  void blendvps(Type Ty, XmmRegister dst, const Address &src);
+  void pblendvb(Type Ty, XmmRegister dst, XmmRegister src);
+  void pblendvb(Type Ty, XmmRegister dst, const Address &src);
+
+  void cmpps(Type Ty, XmmRegister dst, XmmRegister src, CmppsCond CmpCondition);
+  void cmpps(Type Ty, XmmRegister dst, const Address &src,
+             CmppsCond CmpCondition);
+
+  void sqrtps(XmmRegister dst);
+  void rsqrtps(XmmRegister dst);
+  void reciprocalps(XmmRegister dst);
+
+  void movhlps(XmmRegister dst, XmmRegister src);
+  void movlhps(XmmRegister dst, XmmRegister src);
+  void unpcklps(XmmRegister dst, XmmRegister src);
+  void unpckhps(XmmRegister dst, XmmRegister src);
+  void unpcklpd(XmmRegister dst, XmmRegister src);
+  void unpckhpd(XmmRegister dst, XmmRegister src);
+
+  void set1ps(XmmRegister dst, GPRRegister tmp, const Immediate &imm);
+
+  void sqrtpd(XmmRegister dst);
+
+  void pshufb(Type Ty, XmmRegister dst, XmmRegister src);
+  void pshufb(Type Ty, XmmRegister dst, const Address &src);
+  void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
+  void pshufd(Type Ty, XmmRegister dst, const Address &src,
+              const Immediate &mask);
+  void punpckl(Type Ty, XmmRegister Dst, XmmRegister Src);
+  void punpckl(Type Ty, XmmRegister Dst, const Address &Src);
+  void punpckh(Type Ty, XmmRegister Dst, XmmRegister Src);
+  void punpckh(Type Ty, XmmRegister Dst, const Address &Src);
+  void packss(Type Ty, XmmRegister Dst, XmmRegister Src);
+  void packss(Type Ty, XmmRegister Dst, const Address &Src);
+  void packus(Type Ty, XmmRegister Dst, XmmRegister Src);
+  void packus(Type Ty, XmmRegister Dst, const Address &Src);
+  void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
+  void shufps(Type Ty, XmmRegister dst, const Address &src,
+              const Immediate &mask);
+
+  void cvtdq2ps(Type, XmmRegister dst, XmmRegister src);
+  void cvtdq2ps(Type, XmmRegister dst, const Address &src);
+
+  void cvttps2dq(Type, XmmRegister dst, XmmRegister src);
+  void cvttps2dq(Type, XmmRegister dst, const Address &src);
+
+  void cvtps2dq(Type, XmmRegister dst, XmmRegister src);
+  void cvtps2dq(Type, XmmRegister dst, const Address &src);
+
+  void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, GPRRegister src);
+  void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, const Address &src);
+
+  void cvtfloat2float(Type SrcTy, XmmRegister dst, XmmRegister src);
+  void cvtfloat2float(Type SrcTy, XmmRegister dst, const Address &src);
+
+  void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
+  void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
+
+  void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
+  void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
+
+  void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
+  void ucomiss(Type Ty, XmmRegister a, const Address &b);
+
+  void movmsk(Type Ty, GPRRegister dst, XmmRegister src);
+
+  void sqrt(Type Ty, XmmRegister dst, const Address &src);
+  void sqrt(Type Ty, XmmRegister dst, XmmRegister src);
+
+  void xorps(Type Ty, XmmRegister dst, const Address &src);
+  void xorps(Type Ty, XmmRegister dst, XmmRegister src);
+
+  void insertps(Type Ty, XmmRegister dst, XmmRegister src,
+                const Immediate &imm);
+  void insertps(Type Ty, XmmRegister dst, const Address &src,
+                const Immediate &imm);
+
+  void pinsr(Type Ty, XmmRegister dst, GPRRegister src, const Immediate &imm);
+  void pinsr(Type Ty, XmmRegister dst, const Address &src,
+             const Immediate &imm);
+
+  void pextr(Type Ty, GPRRegister dst, XmmRegister src, const Immediate &imm);
+
+  void pmovsxdq(XmmRegister dst, XmmRegister src);
+
+  void pcmpeq(Type Ty, XmmRegister dst, XmmRegister src);
+  void pcmpeq(Type Ty, XmmRegister dst, const Address &src);
+  void pcmpgt(Type Ty, XmmRegister dst, XmmRegister src);
+  void pcmpgt(Type Ty, XmmRegister dst, const Address &src);
+
+  enum RoundingMode {
+    kRoundToNearest = 0x0,
+    kRoundDown = 0x1,
+    kRoundUp = 0x2,
+    kRoundToZero = 0x3
+  };
+  void round(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mode);
+  void round(Type Ty, XmmRegister dst, const Address &src,
+             const Immediate &mode);
+
+  void cmp(Type Ty, GPRRegister reg0, GPRRegister reg1);
+  void cmp(Type Ty, GPRRegister reg, const Address &address);
+  void cmp(Type Ty, GPRRegister reg, const Immediate &imm);
+  void cmp(Type Ty, const Address &address, GPRRegister reg);
+  void cmp(Type Ty, const Address &address, const Immediate &imm);
+
+  void test(Type Ty, GPRRegister reg0, GPRRegister reg1);
+  void test(Type Ty, GPRRegister reg, const Immediate &imm);
+  void test(Type Ty, const Address &address, GPRRegister reg);
+  void test(Type Ty, const Address &address, const Immediate &imm);
+
+  void And(Type Ty, GPRRegister dst, GPRRegister src);
+  void And(Type Ty, GPRRegister dst, const Address &address);
+  void And(Type Ty, GPRRegister dst, const Immediate &imm);
+  void And(Type Ty, const Address &address, GPRRegister reg);
+  void And(Type Ty, const Address &address, const Immediate &imm);
+
+  void Or(Type Ty, GPRRegister dst, GPRRegister src);
+  void Or(Type Ty, GPRRegister dst, const Address &address);
+  void Or(Type Ty, GPRRegister dst, const Immediate &imm);
+  void Or(Type Ty, const Address &address, GPRRegister reg);
+  void Or(Type Ty, const Address &address, const Immediate &imm);
+
+  void Xor(Type Ty, GPRRegister dst, GPRRegister src);
+  void Xor(Type Ty, GPRRegister dst, const Address &address);
+  void Xor(Type Ty, GPRRegister dst, const Immediate &imm);
+  void Xor(Type Ty, const Address &address, GPRRegister reg);
+  void Xor(Type Ty, const Address &address, const Immediate &imm);
+
+  void add(Type Ty, GPRRegister dst, GPRRegister src);
+  void add(Type Ty, GPRRegister reg, const Address &address);
+  void add(Type Ty, GPRRegister reg, const Immediate &imm);
+  void add(Type Ty, const Address &address, GPRRegister reg);
+  void add(Type Ty, const Address &address, const Immediate &imm);
+
+  void adc(Type Ty, GPRRegister dst, GPRRegister src);
+  void adc(Type Ty, GPRRegister dst, const Address &address);
+  void adc(Type Ty, GPRRegister reg, const Immediate &imm);
+  void adc(Type Ty, const Address &address, GPRRegister reg);
+  void adc(Type Ty, const Address &address, const Immediate &imm);
+
+  void sub(Type Ty, GPRRegister dst, GPRRegister src);
+  void sub(Type Ty, GPRRegister reg, const Address &address);
+  void sub(Type Ty, GPRRegister reg, const Immediate &imm);
+  void sub(Type Ty, const Address &address, GPRRegister reg);
+  void sub(Type Ty, const Address &address, const Immediate &imm);
+
+  void sbb(Type Ty, GPRRegister dst, GPRRegister src);
+  void sbb(Type Ty, GPRRegister reg, const Address &address);
+  void sbb(Type Ty, GPRRegister reg, const Immediate &imm);
+  void sbb(Type Ty, const Address &address, GPRRegister reg);
+  void sbb(Type Ty, const Address &address, const Immediate &imm);
+
+  void cbw();
+  void cwd();
+  void cdq();
+  void cqo();
+
+  void div(Type Ty, GPRRegister reg);
+  void div(Type Ty, const Address &address);
+
+  void idiv(Type Ty, GPRRegister reg);
+  void idiv(Type Ty, const Address &address);
+
+  void imul(Type Ty, GPRRegister dst, GPRRegister src);
+  void imul(Type Ty, GPRRegister reg, const Immediate &imm);
+  void imul(Type Ty, GPRRegister reg, const Address &address);
+
+  void imul(Type Ty, GPRRegister reg);
+  void imul(Type Ty, const Address &address);
+
+  void imul(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
+  void imul(Type Ty, GPRRegister dst, const Address &address,
+            const Immediate &imm);
+
+  void mul(Type Ty, GPRRegister reg);
+  void mul(Type Ty, const Address &address);
+
+  void incl(GPRRegister reg);
+  void incl(const Address &address);
+
+  void decl(GPRRegister reg);
+  void decl(const Address &address);
+
+  void rol(Type Ty, GPRRegister reg, const Immediate &imm);
+  void rol(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void rol(Type Ty, const Address &operand, GPRRegister shifter);
+
+  void shl(Type Ty, GPRRegister reg, const Immediate &imm);
+  void shl(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void shl(Type Ty, const Address &operand, GPRRegister shifter);
+
+  void shr(Type Ty, GPRRegister reg, const Immediate &imm);
+  void shr(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void shr(Type Ty, const Address &operand, GPRRegister shifter);
+
+  void sar(Type Ty, GPRRegister reg, const Immediate &imm);
+  void sar(Type Ty, GPRRegister operand, GPRRegister shifter);
+  void sar(Type Ty, const Address &address, GPRRegister shifter);
+
+  void shld(Type Ty, GPRRegister dst, GPRRegister src);
+  void shld(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
+  void shld(Type Ty, const Address &operand, GPRRegister src);
+  void shrd(Type Ty, GPRRegister dst, GPRRegister src);
+  void shrd(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
+  void shrd(Type Ty, const Address &dst, GPRRegister src);
+
+  void neg(Type Ty, GPRRegister reg);
+  void neg(Type Ty, const Address &addr);
+  void notl(GPRRegister reg);
+
+  void bsf(Type Ty, GPRRegister dst, GPRRegister src);
+  void bsf(Type Ty, GPRRegister dst, const Address &src);
+  void bsr(Type Ty, GPRRegister dst, GPRRegister src);
+  void bsr(Type Ty, GPRRegister dst, const Address &src);
+
+  void bswap(Type Ty, GPRRegister reg);
+
+  void bt(GPRRegister base, GPRRegister offset);
+
+  void ret();
+  void ret(const Immediate &imm);
+
+  // 'size' indicates size in bytes and must be in the range 1..8.
+  void nop(int size = 1);
+  void int3();
+  void hlt();
+  void ud2();
+
+  // j(Label) is fully tested.
+  void j(BrCond condition, Label *label, bool near = kFarJump);
+  void j(BrCond condition, const ConstantRelocatable *label); // not testable.
+
+  void jmp(GPRRegister reg);
+  void jmp(Label *label, bool near = kFarJump);
+  void jmp(const ConstantRelocatable *label); // not testable.
+  void jmp(const Immediate &abs_address);
+
+  void mfence();
+
+  void lock();
+  void cmpxchg(Type Ty, const Address &address, GPRRegister reg, bool Locked);
+  void cmpxchg8b(const Address &address, bool Locked);
+  void xadd(Type Ty, const Address &address, GPRRegister reg, bool Locked);
+  void xchg(Type Ty, GPRRegister reg0, GPRRegister reg1);
+  void xchg(Type Ty, const Address &address, GPRRegister reg);
+
+  /// \name Intel Architecture Code Analyzer markers.
+  /// @{
+  void iaca_start();
+  void iaca_end();
+  /// @}
+
+  void emitSegmentOverride(uint8_t prefix);
+
+  intptr_t preferredLoopAlignment() { return 16; }
+  void align(intptr_t alignment, intptr_t offset);
+  void bind(Label *label);
+
+  intptr_t CodeSize() const { return Buffer.size(); }
+
+protected:
+  inline void emitUint8(uint8_t value);
+
+private:
+  ENABLE_MAKE_UNIQUE;
+
+  static constexpr Type RexTypeIrrelevant = IceType_i32;
+  static constexpr Type RexTypeForceRexW = IceType_i64;
+  static constexpr GPRRegister RexRegIrrelevant =
+      Traits::GPRRegister::Encoded_Reg_eax;
+
+  inline void emitInt16(int16_t value);
+  inline void emitInt32(int32_t value);
+  inline void emitRegisterOperand(int rm, int reg);
+  template <typename RegType, typename RmType>
+  inline void emitXmmRegisterOperand(RegType reg, RmType rm);
+  inline void emitOperandSizeOverride();
+
+  void emitOperand(int rm, const Operand &operand, RelocOffsetT Addend = 0);
+  void emitImmediate(Type ty, const Immediate &imm);
+  void emitComplexI8(int rm, const Operand &operand,
+                     const Immediate &immediate);
+  void emitComplex(Type Ty, int rm, const Operand &operand,
+                   const Immediate &immediate);
+  void emitLabel(Label *label, intptr_t instruction_size);
+  void emitLabelLink(Label *label);
+  void emitNearLabelLink(Label *label);
+
+  void emitGenericShift(int rm, Type Ty, GPRRegister reg, const Immediate &imm);
+  void emitGenericShift(int rm, Type Ty, const Operand &operand,
+                        GPRRegister shifter);
+
+  using LabelVector = std::vector<Label *>;
+  // A vector of pool-allocated x86 labels for CFG nodes.
+  LabelVector CfgNodeLabels;
+  // A vector of pool-allocated x86 labels for Local labels.
+  LabelVector LocalLabels;
+
+  Label *getOrCreateLabel(SizeT Number, LabelVector &Labels);
+
+  // The arith_int() methods factor out the commonality between the encodings
+  // of add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag
+  // parameter is statically asserted to be less than 8.
+  template <uint32_t Tag>
+  void arith_int(Type Ty, GPRRegister reg, const Immediate &imm);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, GPRRegister reg, const Address &address);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, const Address &address, GPRRegister reg);
+
+  template <uint32_t Tag>
+  void arith_int(Type Ty, const Address &address, const Immediate &imm);
+
+  // gprEncoding returns Reg encoding for operand emission. For x86-64 we mask
+  // out the 4th bit as it is encoded in the REX.[RXB] bits. No other bits are
+  // touched because we don't want to mask errors.
+  template <typename RegType, typename T = Traits>
+  typename std::enable_if<T::Is64Bit, typename T::GPRRegister>::type
+  gprEncoding(const RegType Reg) {
+    return static_cast<GPRRegister>(static_cast<uint8_t>(Reg) & ~0x08);
+  }
+
+  template <typename RegType, typename T = Traits>
+  typename std::enable_if<!T::Is64Bit, typename T::GPRRegister>::type
+  gprEncoding(const RegType Reg) {
+    return static_cast<typename T::GPRRegister>(Reg);
+  }
+
+  template <typename RegType>
+  bool is8BitRegisterRequiringRex(const Type Ty, const RegType Reg) {
+    static constexpr bool IsGPR =
+        std::is_same<typename std::decay<RegType>::type, ByteRegister>::value ||
+        std::is_same<typename std::decay<RegType>::type, GPRRegister>::value;
+
+    // At this point in the assembler, we have encoded regs, so it is not
+    // possible to distinguish between the "new" low byte registers introduced
+    // in x86-64 and the legacy [abcd]h registers. Because x86, we may still
+    // see ah (div) in the assembler, so we allow it here.
+    //
+    // The "local" uint32_t Encoded_Reg_ah is needed because RegType is an
+    // enum that is not necessarily the same type of
+    // Traits::RegisterSet::Encoded_Reg_ah.
+    constexpr uint32_t Encoded_Reg_ah = Traits::RegisterSet::Encoded_Reg_ah;
+    return IsGPR && (Reg & 0x04) != 0 && (Reg & 0x08) == 0 &&
+           isByteSizedType(Ty) && (Reg != Encoded_Reg_ah);
+  }
+
+  // assembleAndEmitRex is used for determining which (if any) rex prefix
+  // should be emitted for the current instruction. It allows different types
+  // for Reg and Rm because they could be of different types (e.g., in
+  // mov[sz]x instructions.) If Addr is not nullptr, then Rm is ignored, and
+  // Rex.B is determined by Addr instead. TyRm is still used to determine
+  // Addr's size.
+  template <typename RegType, typename RmType, typename T = Traits>
+  typename std::enable_if<T::Is64Bit, void>::type
+  assembleAndEmitRex(const Type TyReg, const RegType Reg, const Type TyRm,
+                     const RmType Rm,
+                     const typename T::Address *Addr = nullptr) {
+    const uint8_t W = (TyReg == IceType_i64 || TyRm == IceType_i64)
+                          ? T::Operand::RexW
+                          : T::Operand::RexNone;
+    const uint8_t R = (Reg & 0x08) ? T::Operand::RexR : T::Operand::RexNone;
+    const uint8_t X = (Addr != nullptr)
+                          ? (typename T::Operand::RexBits)Addr->rexX()
+                          : T::Operand::RexNone;
+    const uint8_t B = (Addr != nullptr)
+                          ? (typename T::Operand::RexBits)Addr->rexB()
+                      : (Rm & 0x08) ? T::Operand::RexB
+                                    : T::Operand::RexNone;
+    const uint8_t Prefix = W | R | X | B;
+    if (Prefix != T::Operand::RexNone) {
+      emitUint8(Prefix);
+    } else if (is8BitRegisterRequiringRex(TyReg, Reg) ||
+               (Addr == nullptr && is8BitRegisterRequiringRex(TyRm, Rm))) {
+      emitUint8(T::Operand::RexBase);
+    }
+  }
+
+  template <typename RegType, typename RmType, typename T = Traits>
+  typename std::enable_if<!T::Is64Bit, void>::type
+  assembleAndEmitRex(const Type, const RegType, const Type, const RmType,
+                     const typename T::Address * = nullptr) {}
+
+  // emitRexRB is used for emitting a Rex prefix instructions with two
+  // explicit register operands in its mod-rm byte.
+  template <typename RegType, typename RmType>
+  void emitRexRB(const Type Ty, const RegType Reg, const RmType Rm) {
+    assembleAndEmitRex(Ty, Reg, Ty, Rm);
+  }
+
+  template <typename RegType, typename RmType>
+  void emitRexRB(const Type TyReg, const RegType Reg, const Type TyRm,
+                 const RmType Rm) {
+    assembleAndEmitRex(TyReg, Reg, TyRm, Rm);
+  }
+
+  // emitRexB is used for emitting a Rex prefix if one is needed on encoding
+  // the Reg field in an x86 instruction. It is invoked by the template when
+  // Reg is the single register operand in the instruction (e.g., push Reg.)
+  template <typename RmType> void emitRexB(const Type Ty, const RmType Rm) {
+    emitRexRB(Ty, RexRegIrrelevant, Ty, Rm);
+  }
+
+  // emitRex is used for emitting a Rex prefix for an address and a GPR. The
+  // address may contain zero, one, or two registers.
+  template <typename RegType>
+  void emitRex(const Type Ty, const Address &Addr, const RegType Reg) {
+    assembleAndEmitRex(Ty, Reg, Ty, RexRegIrrelevant, &Addr);
+  }
+
+  template <typename RegType>
+  void emitRex(const Type AddrTy, const Address &Addr, const Type TyReg,
+               const RegType Reg) {
+    assembleAndEmitRex(TyReg, Reg, AddrTy, RexRegIrrelevant, &Addr);
+  }
+};
+
+inline void AssemblerX8664::emitUint8(uint8_t value) {
+  Buffer.emit<uint8_t>(value);
+}
+
+inline void AssemblerX8664::emitInt16(int16_t value) {
+  Buffer.emit<int16_t>(value);
+}
+
+inline void AssemblerX8664::emitInt32(int32_t value) {
+  Buffer.emit<int32_t>(value);
+}
+
+inline void AssemblerX8664::emitRegisterOperand(int reg, int rm) {
+  assert(reg >= 0 && reg < 8);
+  assert(rm >= 0 && rm < 8);
+  Buffer.emit<uint8_t>(0xC0 + (reg << 3) + rm);
+}
+
+template <typename RegType, typename RmType>
+inline void AssemblerX8664::emitXmmRegisterOperand(RegType reg, RmType rm) {
+  emitRegisterOperand(gprEncoding(reg), gprEncoding(rm));
+}
+
+inline void AssemblerX8664::emitOperandSizeOverride() { emitUint8(0x66); }
+
 using Label = AssemblerX8664::Label;
 using Immediate = AssemblerX8664::Immediate;
 
diff --git a/third_party/subzero/src/IceAssemblerX8664Base.h b/third_party/subzero/src/IceAssemblerX8664Base.h
deleted file mode 100644
index 8262615..0000000
--- a/third_party/subzero/src/IceAssemblerX8664Base.h
+++ /dev/null
@@ -1,930 +0,0 @@
-//===- subzero/src/IceAssemblerX8664Base.h - base x86 assembler -*- C++
-//-*---===//
-//
-// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
-// for details. All rights reserved. Use of this source code is governed by a
-// BSD-style license that can be found in the LICENSE file.
-//
-// Modified by the Subzero authors.
-//
-//===----------------------------------------------------------------------===//
-//
-//                        The Subzero Code Generator
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Defines the AssemblerX8664 template class for x86, the base of all
-/// X8664 assemblers.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SUBZERO_SRC_ICEASSEMBLERX8664BASE_H
-#define SUBZERO_SRC_ICEASSEMBLERX8664BASE_H
-
-#include "IceAssembler.h"
-#include "IceDefs.h"
-#include "IceOperand.h"
-#include "IceTypes.h"
-#include "IceUtils.h"
-
-namespace Ice {
-namespace X8664 {
-
-template <typename TraitsType>
-class AssemblerX86Base : public ::Ice::Assembler {
-  AssemblerX86Base(const AssemblerX86Base &) = delete;
-  AssemblerX86Base &operator=(const AssemblerX86Base &) = delete;
-
-protected:
-  explicit AssemblerX86Base() : Assembler(Traits::AsmKind) {}
-
-public:
-  using Traits = TraitsType;
-  using Address = typename Traits::Address;
-  using ByteRegister = typename Traits::ByteRegister;
-  using BrCond = typename Traits::Cond::BrCond;
-  using CmppsCond = typename Traits::Cond::CmppsCond;
-  using GPRRegister = typename Traits::GPRRegister;
-  using Operand = typename Traits::Operand;
-  using XmmRegister = typename Traits::XmmRegister;
-
-  static constexpr int MAX_NOP_SIZE = 8;
-
-  static bool classof(const Assembler *Asm) {
-    return Asm->getKind() == Traits::AsmKind;
-  }
-
-  class Immediate {
-    Immediate(const Immediate &) = delete;
-    Immediate &operator=(const Immediate &) = delete;
-
-  public:
-    explicit Immediate(int32_t value) : value_(value) {}
-
-    explicit Immediate(AssemblerFixup *fixup) : fixup_(fixup) {}
-
-    int32_t value() const { return value_; }
-    AssemblerFixup *fixup() const { return fixup_; }
-
-    bool is_int8() const {
-      // We currently only allow 32-bit fixups, and they usually have value = 0,
-      // so if fixup_ != nullptr, it shouldn't be classified as int8/16.
-      return fixup_ == nullptr && Utils::IsInt(8, value_);
-    }
-    bool is_uint8() const {
-      return fixup_ == nullptr && Utils::IsUint(8, value_);
-    }
-    bool is_uint16() const {
-      return fixup_ == nullptr && Utils::IsUint(16, value_);
-    }
-
-  private:
-    const int32_t value_ = 0;
-    AssemblerFixup *fixup_ = nullptr;
-  };
-
-  /// X86 allows near and far jumps.
-  class Label final : public Ice::Label {
-    Label(const Label &) = delete;
-    Label &operator=(const Label &) = delete;
-
-  public:
-    Label() = default;
-    ~Label() = default;
-
-    void finalCheck() const override {
-      Ice::Label::finalCheck();
-      assert(!hasNear());
-    }
-
-    /// Returns the position of an earlier branch instruction which assumes that
-    /// this label is "near", and bumps iterator to the next near position.
-    intptr_t getNearPosition() {
-      assert(hasNear());
-      intptr_t Pos = UnresolvedNearPositions.back();
-      UnresolvedNearPositions.pop_back();
-      return Pos;
-    }
-
-    bool hasNear() const { return !UnresolvedNearPositions.empty(); }
-    bool isUnused() const override {
-      return Ice::Label::isUnused() && !hasNear();
-    }
-
-  private:
-    friend class AssemblerX86Base<TraitsType>;
-
-    void nearLinkTo(const Assembler &Asm, intptr_t position) {
-      if (Asm.getPreliminary())
-        return;
-      assert(!isBound());
-      UnresolvedNearPositions.push_back(position);
-    }
-
-    llvm::SmallVector<intptr_t, 20> UnresolvedNearPositions;
-  };
-
-public:
-  ~AssemblerX86Base() override;
-
-  static const bool kNearJump = true;
-  static const bool kFarJump = false;
-
-  void alignFunction() override;
-
-  SizeT getBundleAlignLog2Bytes() const override { return 5; }
-
-  const char *getAlignDirective() const override { return ".p2align"; }
-
-  llvm::ArrayRef<uint8_t> getNonExecBundlePadding() const override {
-    static const uint8_t Padding[] = {0xF4};
-    return llvm::ArrayRef<uint8_t>(Padding, 1);
-  }
-
-  void padWithNop(intptr_t Padding) override {
-    while (Padding > MAX_NOP_SIZE) {
-      nop(MAX_NOP_SIZE);
-      Padding -= MAX_NOP_SIZE;
-    }
-    if (Padding)
-      nop(Padding);
-  }
-
-  Ice::Label *getCfgNodeLabel(SizeT NodeNumber) override;
-  void bindCfgNodeLabel(const CfgNode *Node) override;
-  Label *getOrCreateCfgNodeLabel(SizeT Number);
-  Label *getOrCreateLocalLabel(SizeT Number);
-  void bindLocalLabel(SizeT Number);
-
-  bool fixupIsPCRel(FixupKind Kind) const override {
-    // Currently assuming this is the only PC-rel relocation type used.
-    // TODO(jpp): Traits.PcRelTypes.count(Kind) != 0
-    return Kind == Traits::FK_PcRel;
-  }
-
-  // Operations to emit GPR instructions (and dispatch on operand type).
-  using TypedEmitGPR = void (AssemblerX86Base::*)(Type, GPRRegister);
-  using TypedEmitAddr = void (AssemblerX86Base::*)(Type, const Address &);
-  struct GPREmitterOneOp {
-    TypedEmitGPR Reg;
-    TypedEmitAddr Addr;
-  };
-
-  using TypedEmitGPRGPR = void (AssemblerX86Base::*)(Type, GPRRegister,
-                                                     GPRRegister);
-  using TypedEmitGPRAddr = void (AssemblerX86Base::*)(Type, GPRRegister,
-                                                      const Address &);
-  using TypedEmitGPRImm = void (AssemblerX86Base::*)(Type, GPRRegister,
-                                                     const Immediate &);
-  struct GPREmitterRegOp {
-    TypedEmitGPRGPR GPRGPR;
-    TypedEmitGPRAddr GPRAddr;
-    TypedEmitGPRImm GPRImm;
-  };
-
-  struct GPREmitterShiftOp {
-    // Technically, Addr/GPR and Addr/Imm are also allowed, but */Addr are
-    // not. In practice, we always normalize the Dest to a Register first.
-    TypedEmitGPRGPR GPRGPR;
-    TypedEmitGPRImm GPRImm;
-  };
-
-  using TypedEmitGPRGPRImm = void (AssemblerX86Base::*)(Type, GPRRegister,
-                                                        GPRRegister,
-                                                        const Immediate &);
-  struct GPREmitterShiftD {
-    // Technically AddrGPR and AddrGPRImm are also allowed, but in practice we
-    // always normalize Dest to a Register first.
-    TypedEmitGPRGPR GPRGPR;
-    TypedEmitGPRGPRImm GPRGPRImm;
-  };
-
-  using TypedEmitAddrGPR = void (AssemblerX86Base::*)(Type, const Address &,
-                                                      GPRRegister);
-  using TypedEmitAddrImm = void (AssemblerX86Base::*)(Type, const Address &,
-                                                      const Immediate &);
-  struct GPREmitterAddrOp {
-    TypedEmitAddrGPR AddrGPR;
-    TypedEmitAddrImm AddrImm;
-  };
-
-  // Operations to emit XMM instructions (and dispatch on operand type).
-  using TypedEmitXmmXmm = void (AssemblerX86Base::*)(Type, XmmRegister,
-                                                     XmmRegister);
-  using TypedEmitXmmAddr = void (AssemblerX86Base::*)(Type, XmmRegister,
-                                                      const Address &);
-  struct XmmEmitterRegOp {
-    TypedEmitXmmXmm XmmXmm;
-    TypedEmitXmmAddr XmmAddr;
-  };
-
-  using EmitXmmXmm = void (AssemblerX86Base::*)(XmmRegister, XmmRegister);
-  using EmitXmmAddr = void (AssemblerX86Base::*)(XmmRegister, const Address &);
-  using EmitAddrXmm = void (AssemblerX86Base::*)(const Address &, XmmRegister);
-  struct XmmEmitterMovOps {
-    EmitXmmXmm XmmXmm;
-    EmitXmmAddr XmmAddr;
-    EmitAddrXmm AddrXmm;
-  };
-
-  using TypedEmitXmmImm = void (AssemblerX86Base::*)(Type, XmmRegister,
-                                                     const Immediate &);
-
-  struct XmmEmitterShiftOp {
-    TypedEmitXmmXmm XmmXmm;
-    TypedEmitXmmAddr XmmAddr;
-    TypedEmitXmmImm XmmImm;
-  };
-
-  // Cross Xmm/GPR cast instructions.
-  template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
-    using TypedEmitRegs = void (AssemblerX86Base::*)(Type, DReg_t, Type,
-                                                     SReg_t);
-    using TypedEmitAddr = void (AssemblerX86Base::*)(Type, DReg_t, Type,
-                                                     const Address &);
-
-    TypedEmitRegs RegReg;
-    TypedEmitAddr RegAddr;
-  };
-
-  // Three operand (potentially) cross Xmm/GPR instructions. The last operand
-  // must be an immediate.
-  template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
-    using TypedEmitRegRegImm = void (AssemblerX86Base::*)(Type, DReg_t, SReg_t,
-                                                          const Immediate &);
-    using TypedEmitRegAddrImm = void (AssemblerX86Base::*)(Type, DReg_t,
-                                                           const Address &,
-                                                           const Immediate &);
-
-    TypedEmitRegRegImm RegRegImm;
-    TypedEmitRegAddrImm RegAddrImm;
-  };
-
-  /*
-   * Emit Machine Instructions.
-   */
-  void call(GPRRegister reg);
-  void call(const Address &address);
-  void call(const ConstantRelocatable *label); // not testable.
-  void call(const Immediate &abs_address);
-
-  static const intptr_t kCallExternalLabelSize = 5;
-
-  void pushl(GPRRegister reg);
-  void pushl(const Immediate &Imm);
-  void pushl(const ConstantRelocatable *Label);
-
-  void popl(GPRRegister reg);
-  void popl(const Address &address);
-
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::HasPusha>::type>
-  void pushal();
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::HasPopa>::type>
-  void popal();
-
-  void setcc(BrCond condition, ByteRegister dst);
-  void setcc(BrCond condition, const Address &address);
-
-  void mov(Type Ty, GPRRegister dst, const Immediate &src);
-  void mov(Type Ty, GPRRegister dst, GPRRegister src);
-  void mov(Type Ty, GPRRegister dst, const Address &src);
-  void mov(Type Ty, const Address &dst, GPRRegister src);
-  void mov(Type Ty, const Address &dst, const Immediate &imm);
-
-  template <typename T = Traits>
-  typename std::enable_if<T::Is64Bit, void>::type movabs(const GPRRegister Dst,
-                                                         uint64_t Imm64);
-  template <typename T = Traits>
-  typename std::enable_if<!T::Is64Bit, void>::type movabs(const GPRRegister,
-                                                          uint64_t) {
-    llvm::report_fatal_error("movabs is only supported in 64-bit x86 targets.");
-  }
-
-  void movzx(Type Ty, GPRRegister dst, GPRRegister src);
-  void movzx(Type Ty, GPRRegister dst, const Address &src);
-  void movsx(Type Ty, GPRRegister dst, GPRRegister src);
-  void movsx(Type Ty, GPRRegister dst, const Address &src);
-
-  void lea(Type Ty, GPRRegister dst, const Address &src);
-
-  void cmov(Type Ty, BrCond cond, GPRRegister dst, GPRRegister src);
-  void cmov(Type Ty, BrCond cond, GPRRegister dst, const Address &src);
-
-  void rep_movsb();
-
-  void movss(Type Ty, XmmRegister dst, const Address &src);
-  void movss(Type Ty, const Address &dst, XmmRegister src);
-  void movss(Type Ty, XmmRegister dst, XmmRegister src);
-
-  void movd(Type SrcTy, XmmRegister dst, GPRRegister src);
-  void movd(Type SrcTy, XmmRegister dst, const Address &src);
-  void movd(Type DestTy, GPRRegister dst, XmmRegister src);
-  void movd(Type DestTy, const Address &dst, XmmRegister src);
-
-  void movq(XmmRegister dst, XmmRegister src);
-  void movq(const Address &dst, XmmRegister src);
-  void movq(XmmRegister dst, const Address &src);
-
-  void addss(Type Ty, XmmRegister dst, XmmRegister src);
-  void addss(Type Ty, XmmRegister dst, const Address &src);
-  void subss(Type Ty, XmmRegister dst, XmmRegister src);
-  void subss(Type Ty, XmmRegister dst, const Address &src);
-  void mulss(Type Ty, XmmRegister dst, XmmRegister src);
-  void mulss(Type Ty, XmmRegister dst, const Address &src);
-  void divss(Type Ty, XmmRegister dst, XmmRegister src);
-  void divss(Type Ty, XmmRegister dst, const Address &src);
-
-  void movaps(XmmRegister dst, XmmRegister src);
-
-  void movups(XmmRegister dst, XmmRegister src);
-  void movups(XmmRegister dst, const Address &src);
-  void movups(const Address &dst, XmmRegister src);
-
-  void padd(Type Ty, XmmRegister dst, XmmRegister src);
-  void padd(Type Ty, XmmRegister dst, const Address &src);
-  void padds(Type Ty, XmmRegister dst, XmmRegister src);
-  void padds(Type Ty, XmmRegister dst, const Address &src);
-  void paddus(Type Ty, XmmRegister dst, XmmRegister src);
-  void paddus(Type Ty, XmmRegister dst, const Address &src);
-  void pand(Type Ty, XmmRegister dst, XmmRegister src);
-  void pand(Type Ty, XmmRegister dst, const Address &src);
-  void pandn(Type Ty, XmmRegister dst, XmmRegister src);
-  void pandn(Type Ty, XmmRegister dst, const Address &src);
-  void pmull(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmull(Type Ty, XmmRegister dst, const Address &src);
-  void pmulhw(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmulhw(Type Ty, XmmRegister dst, const Address &src);
-  void pmulhuw(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmulhuw(Type Ty, XmmRegister dst, const Address &src);
-  void pmaddwd(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmaddwd(Type Ty, XmmRegister dst, const Address &src);
-  void pmuludq(Type Ty, XmmRegister dst, XmmRegister src);
-  void pmuludq(Type Ty, XmmRegister dst, const Address &src);
-  void por(Type Ty, XmmRegister dst, XmmRegister src);
-  void por(Type Ty, XmmRegister dst, const Address &src);
-  void psub(Type Ty, XmmRegister dst, XmmRegister src);
-  void psub(Type Ty, XmmRegister dst, const Address &src);
-  void psubs(Type Ty, XmmRegister dst, XmmRegister src);
-  void psubs(Type Ty, XmmRegister dst, const Address &src);
-  void psubus(Type Ty, XmmRegister dst, XmmRegister src);
-  void psubus(Type Ty, XmmRegister dst, const Address &src);
-  void pxor(Type Ty, XmmRegister dst, XmmRegister src);
-  void pxor(Type Ty, XmmRegister dst, const Address &src);
-
-  void psll(Type Ty, XmmRegister dst, XmmRegister src);
-  void psll(Type Ty, XmmRegister dst, const Address &src);
-  void psll(Type Ty, XmmRegister dst, const Immediate &src);
-
-  void psra(Type Ty, XmmRegister dst, XmmRegister src);
-  void psra(Type Ty, XmmRegister dst, const Address &src);
-  void psra(Type Ty, XmmRegister dst, const Immediate &src);
-  void psrl(Type Ty, XmmRegister dst, XmmRegister src);
-  void psrl(Type Ty, XmmRegister dst, const Address &src);
-  void psrl(Type Ty, XmmRegister dst, const Immediate &src);
-
-  void addps(Type Ty, XmmRegister dst, XmmRegister src);
-  void addps(Type Ty, XmmRegister dst, const Address &src);
-  void subps(Type Ty, XmmRegister dst, XmmRegister src);
-  void subps(Type Ty, XmmRegister dst, const Address &src);
-  void divps(Type Ty, XmmRegister dst, XmmRegister src);
-  void divps(Type Ty, XmmRegister dst, const Address &src);
-  void mulps(Type Ty, XmmRegister dst, XmmRegister src);
-  void mulps(Type Ty, XmmRegister dst, const Address &src);
-  void minps(Type Ty, XmmRegister dst, const Address &src);
-  void minps(Type Ty, XmmRegister dst, XmmRegister src);
-  void minss(Type Ty, XmmRegister dst, const Address &src);
-  void minss(Type Ty, XmmRegister dst, XmmRegister src);
-  void maxps(Type Ty, XmmRegister dst, const Address &src);
-  void maxps(Type Ty, XmmRegister dst, XmmRegister src);
-  void maxss(Type Ty, XmmRegister dst, const Address &src);
-  void maxss(Type Ty, XmmRegister dst, XmmRegister src);
-  void andnps(Type Ty, XmmRegister dst, const Address &src);
-  void andnps(Type Ty, XmmRegister dst, XmmRegister src);
-  void andps(Type Ty, XmmRegister dst, const Address &src);
-  void andps(Type Ty, XmmRegister dst, XmmRegister src);
-  void orps(Type Ty, XmmRegister dst, const Address &src);
-  void orps(Type Ty, XmmRegister dst, XmmRegister src);
-
-  void blendvps(Type Ty, XmmRegister dst, XmmRegister src);
-  void blendvps(Type Ty, XmmRegister dst, const Address &src);
-  void pblendvb(Type Ty, XmmRegister dst, XmmRegister src);
-  void pblendvb(Type Ty, XmmRegister dst, const Address &src);
-
-  void cmpps(Type Ty, XmmRegister dst, XmmRegister src, CmppsCond CmpCondition);
-  void cmpps(Type Ty, XmmRegister dst, const Address &src,
-             CmppsCond CmpCondition);
-
-  void sqrtps(XmmRegister dst);
-  void rsqrtps(XmmRegister dst);
-  void reciprocalps(XmmRegister dst);
-
-  void movhlps(XmmRegister dst, XmmRegister src);
-  void movlhps(XmmRegister dst, XmmRegister src);
-  void unpcklps(XmmRegister dst, XmmRegister src);
-  void unpckhps(XmmRegister dst, XmmRegister src);
-  void unpcklpd(XmmRegister dst, XmmRegister src);
-  void unpckhpd(XmmRegister dst, XmmRegister src);
-
-  void set1ps(XmmRegister dst, GPRRegister tmp, const Immediate &imm);
-
-  void sqrtpd(XmmRegister dst);
-
-  void pshufb(Type Ty, XmmRegister dst, XmmRegister src);
-  void pshufb(Type Ty, XmmRegister dst, const Address &src);
-  void pshufd(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
-  void pshufd(Type Ty, XmmRegister dst, const Address &src,
-              const Immediate &mask);
-  void punpckl(Type Ty, XmmRegister Dst, XmmRegister Src);
-  void punpckl(Type Ty, XmmRegister Dst, const Address &Src);
-  void punpckh(Type Ty, XmmRegister Dst, XmmRegister Src);
-  void punpckh(Type Ty, XmmRegister Dst, const Address &Src);
-  void packss(Type Ty, XmmRegister Dst, XmmRegister Src);
-  void packss(Type Ty, XmmRegister Dst, const Address &Src);
-  void packus(Type Ty, XmmRegister Dst, XmmRegister Src);
-  void packus(Type Ty, XmmRegister Dst, const Address &Src);
-  void shufps(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mask);
-  void shufps(Type Ty, XmmRegister dst, const Address &src,
-              const Immediate &mask);
-
-  void cvtdq2ps(Type, XmmRegister dst, XmmRegister src);
-  void cvtdq2ps(Type, XmmRegister dst, const Address &src);
-
-  void cvttps2dq(Type, XmmRegister dst, XmmRegister src);
-  void cvttps2dq(Type, XmmRegister dst, const Address &src);
-
-  void cvtps2dq(Type, XmmRegister dst, XmmRegister src);
-  void cvtps2dq(Type, XmmRegister dst, const Address &src);
-
-  void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, GPRRegister src);
-  void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, const Address &src);
-
-  void cvtfloat2float(Type SrcTy, XmmRegister dst, XmmRegister src);
-  void cvtfloat2float(Type SrcTy, XmmRegister dst, const Address &src);
-
-  void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
-  void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
-
-  void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
-  void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
-
-  void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
-  void ucomiss(Type Ty, XmmRegister a, const Address &b);
-
-  void movmsk(Type Ty, GPRRegister dst, XmmRegister src);
-
-  void sqrt(Type Ty, XmmRegister dst, const Address &src);
-  void sqrt(Type Ty, XmmRegister dst, XmmRegister src);
-
-  void xorps(Type Ty, XmmRegister dst, const Address &src);
-  void xorps(Type Ty, XmmRegister dst, XmmRegister src);
-
-  void insertps(Type Ty, XmmRegister dst, XmmRegister src,
-                const Immediate &imm);
-  void insertps(Type Ty, XmmRegister dst, const Address &src,
-                const Immediate &imm);
-
-  void pinsr(Type Ty, XmmRegister dst, GPRRegister src, const Immediate &imm);
-  void pinsr(Type Ty, XmmRegister dst, const Address &src,
-             const Immediate &imm);
-
-  void pextr(Type Ty, GPRRegister dst, XmmRegister src, const Immediate &imm);
-
-  void pmovsxdq(XmmRegister dst, XmmRegister src);
-
-  void pcmpeq(Type Ty, XmmRegister dst, XmmRegister src);
-  void pcmpeq(Type Ty, XmmRegister dst, const Address &src);
-  void pcmpgt(Type Ty, XmmRegister dst, XmmRegister src);
-  void pcmpgt(Type Ty, XmmRegister dst, const Address &src);
-
-  enum RoundingMode {
-    kRoundToNearest = 0x0,
-    kRoundDown = 0x1,
-    kRoundUp = 0x2,
-    kRoundToZero = 0x3
-  };
-  void round(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mode);
-  void round(Type Ty, XmmRegister dst, const Address &src,
-             const Immediate &mode);
-
-  //----------------------------------------------------------------------------
-  //
-  // Begin: X87 instructions. Only available when Traits::UsesX87.
-  //
-  //----------------------------------------------------------------------------
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fld(Type Ty, const typename T::Address &src);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fstp(Type Ty, const typename T::Address &dst);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fstp(typename T::X87STRegister st);
-
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fnstcw(const typename T::Address &dst);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fldcw(const typename T::Address &src);
-
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fistpl(const typename T::Address &dst);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fistps(const typename T::Address &dst);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fildl(const typename T::Address &src);
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void filds(const typename T::Address &src);
-
-  template <typename T = Traits,
-            typename = typename std::enable_if<T::UsesX87>::type>
-  void fincstp();
-  //----------------------------------------------------------------------------
-  //
-  // End: X87 instructions.
-  //
-  //----------------------------------------------------------------------------
-
-  void cmp(Type Ty, GPRRegister reg0, GPRRegister reg1);
-  void cmp(Type Ty, GPRRegister reg, const Address &address);
-  void cmp(Type Ty, GPRRegister reg, const Immediate &imm);
-  void cmp(Type Ty, const Address &address, GPRRegister reg);
-  void cmp(Type Ty, const Address &address, const Immediate &imm);
-
-  void test(Type Ty, GPRRegister reg0, GPRRegister reg1);
-  void test(Type Ty, GPRRegister reg, const Immediate &imm);
-  void test(Type Ty, const Address &address, GPRRegister reg);
-  void test(Type Ty, const Address &address, const Immediate &imm);
-
-  void And(Type Ty, GPRRegister dst, GPRRegister src);
-  void And(Type Ty, GPRRegister dst, const Address &address);
-  void And(Type Ty, GPRRegister dst, const Immediate &imm);
-  void And(Type Ty, const Address &address, GPRRegister reg);
-  void And(Type Ty, const Address &address, const Immediate &imm);
-
-  void Or(Type Ty, GPRRegister dst, GPRRegister src);
-  void Or(Type Ty, GPRRegister dst, const Address &address);
-  void Or(Type Ty, GPRRegister dst, const Immediate &imm);
-  void Or(Type Ty, const Address &address, GPRRegister reg);
-  void Or(Type Ty, const Address &address, const Immediate &imm);
-
-  void Xor(Type Ty, GPRRegister dst, GPRRegister src);
-  void Xor(Type Ty, GPRRegister dst, const Address &address);
-  void Xor(Type Ty, GPRRegister dst, const Immediate &imm);
-  void Xor(Type Ty, const Address &address, GPRRegister reg);
-  void Xor(Type Ty, const Address &address, const Immediate &imm);
-
-  void add(Type Ty, GPRRegister dst, GPRRegister src);
-  void add(Type Ty, GPRRegister reg, const Address &address);
-  void add(Type Ty, GPRRegister reg, const Immediate &imm);
-  void add(Type Ty, const Address &address, GPRRegister reg);
-  void add(Type Ty, const Address &address, const Immediate &imm);
-
-  void adc(Type Ty, GPRRegister dst, GPRRegister src);
-  void adc(Type Ty, GPRRegister dst, const Address &address);
-  void adc(Type Ty, GPRRegister reg, const Immediate &imm);
-  void adc(Type Ty, const Address &address, GPRRegister reg);
-  void adc(Type Ty, const Address &address, const Immediate &imm);
-
-  void sub(Type Ty, GPRRegister dst, GPRRegister src);
-  void sub(Type Ty, GPRRegister reg, const Address &address);
-  void sub(Type Ty, GPRRegister reg, const Immediate &imm);
-  void sub(Type Ty, const Address &address, GPRRegister reg);
-  void sub(Type Ty, const Address &address, const Immediate &imm);
-
-  void sbb(Type Ty, GPRRegister dst, GPRRegister src);
-  void sbb(Type Ty, GPRRegister reg, const Address &address);
-  void sbb(Type Ty, GPRRegister reg, const Immediate &imm);
-  void sbb(Type Ty, const Address &address, GPRRegister reg);
-  void sbb(Type Ty, const Address &address, const Immediate &imm);
-
-  void cbw();
-  void cwd();
-  void cdq();
-  template <typename T = Traits>
-  typename std::enable_if<T::Is64Bit, void>::type cqo();
-  template <typename T = Traits>
-  typename std::enable_if<!T::Is64Bit, void>::type cqo() {
-    llvm::report_fatal_error("CQO is only available in 64-bit x86 backends.");
-  }
-
-  void div(Type Ty, GPRRegister reg);
-  void div(Type Ty, const Address &address);
-
-  void idiv(Type Ty, GPRRegister reg);
-  void idiv(Type Ty, const Address &address);
-
-  void imul(Type Ty, GPRRegister dst, GPRRegister src);
-  void imul(Type Ty, GPRRegister reg, const Immediate &imm);
-  void imul(Type Ty, GPRRegister reg, const Address &address);
-
-  void imul(Type Ty, GPRRegister reg);
-  void imul(Type Ty, const Address &address);
-
-  void imul(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
-  void imul(Type Ty, GPRRegister dst, const Address &address,
-            const Immediate &imm);
-
-  void mul(Type Ty, GPRRegister reg);
-  void mul(Type Ty, const Address &address);
-
-  template <class T = Traits,
-            typename = typename std::enable_if<!T::Is64Bit>::type>
-  void incl(GPRRegister reg);
-  void incl(const Address &address);
-
-  template <class T = Traits,
-            typename = typename std::enable_if<!T::Is64Bit>::type>
-  void decl(GPRRegister reg);
-  void decl(const Address &address);
-
-  void rol(Type Ty, GPRRegister reg, const Immediate &imm);
-  void rol(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void rol(Type Ty, const Address &operand, GPRRegister shifter);
-
-  void shl(Type Ty, GPRRegister reg, const Immediate &imm);
-  void shl(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void shl(Type Ty, const Address &operand, GPRRegister shifter);
-
-  void shr(Type Ty, GPRRegister reg, const Immediate &imm);
-  void shr(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void shr(Type Ty, const Address &operand, GPRRegister shifter);
-
-  void sar(Type Ty, GPRRegister reg, const Immediate &imm);
-  void sar(Type Ty, GPRRegister operand, GPRRegister shifter);
-  void sar(Type Ty, const Address &address, GPRRegister shifter);
-
-  void shld(Type Ty, GPRRegister dst, GPRRegister src);
-  void shld(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
-  void shld(Type Ty, const Address &operand, GPRRegister src);
-  void shrd(Type Ty, GPRRegister dst, GPRRegister src);
-  void shrd(Type Ty, GPRRegister dst, GPRRegister src, const Immediate &imm);
-  void shrd(Type Ty, const Address &dst, GPRRegister src);
-
-  void neg(Type Ty, GPRRegister reg);
-  void neg(Type Ty, const Address &addr);
-  void notl(GPRRegister reg);
-
-  void bsf(Type Ty, GPRRegister dst, GPRRegister src);
-  void bsf(Type Ty, GPRRegister dst, const Address &src);
-  void bsr(Type Ty, GPRRegister dst, GPRRegister src);
-  void bsr(Type Ty, GPRRegister dst, const Address &src);
-
-  void bswap(Type Ty, GPRRegister reg);
-
-  void bt(GPRRegister base, GPRRegister offset);
-
-  void ret();
-  void ret(const Immediate &imm);
-
-  // 'size' indicates size in bytes and must be in the range 1..8.
-  void nop(int size = 1);
-  void int3();
-  void hlt();
-  void ud2();
-
-  // j(Label) is fully tested.
-  void j(BrCond condition, Label *label, bool near = kFarJump);
-  void j(BrCond condition, const ConstantRelocatable *label); // not testable.
-
-  void jmp(GPRRegister reg);
-  void jmp(Label *label, bool near = kFarJump);
-  void jmp(const ConstantRelocatable *label); // not testable.
-  void jmp(const Immediate &abs_address);
-
-  void mfence();
-
-  void lock();
-  void cmpxchg(Type Ty, const Address &address, GPRRegister reg, bool Locked);
-  void cmpxchg8b(const Address &address, bool Locked);
-  void xadd(Type Ty, const Address &address, GPRRegister reg, bool Locked);
-  void xchg(Type Ty, GPRRegister reg0, GPRRegister reg1);
-  void xchg(Type Ty, const Address &address, GPRRegister reg);
-
-  /// \name Intel Architecture Code Analyzer markers.
-  /// @{
-  void iaca_start();
-  void iaca_end();
-  /// @}
-
-  void emitSegmentOverride(uint8_t prefix);
-
-  intptr_t preferredLoopAlignment() { return 16; }
-  void align(intptr_t alignment, intptr_t offset);
-  void bind(Label *label);
-
-  intptr_t CodeSize() const { return Buffer.size(); }
-
-protected:
-  inline void emitUint8(uint8_t value);
-
-private:
-  ENABLE_MAKE_UNIQUE;
-
-  static constexpr Type RexTypeIrrelevant = IceType_i32;
-  static constexpr Type RexTypeForceRexW = IceType_i64;
-  static constexpr GPRRegister RexRegIrrelevant =
-      Traits::GPRRegister::Encoded_Reg_eax;
-
-  inline void emitInt16(int16_t value);
-  inline void emitInt32(int32_t value);
-  inline void emitRegisterOperand(int rm, int reg);
-  template <typename RegType, typename RmType>
-  inline void emitXmmRegisterOperand(RegType reg, RmType rm);
-  inline void emitOperandSizeOverride();
-
-  void emitOperand(int rm, const Operand &operand, RelocOffsetT Addend = 0);
-  void emitImmediate(Type ty, const Immediate &imm);
-  void emitComplexI8(int rm, const Operand &operand,
-                     const Immediate &immediate);
-  void emitComplex(Type Ty, int rm, const Operand &operand,
-                   const Immediate &immediate);
-  void emitLabel(Label *label, intptr_t instruction_size);
-  void emitLabelLink(Label *label);
-  void emitNearLabelLink(Label *label);
-
-  void emitGenericShift(int rm, Type Ty, GPRRegister reg, const Immediate &imm);
-  void emitGenericShift(int rm, Type Ty, const Operand &operand,
-                        GPRRegister shifter);
-
-  using LabelVector = std::vector<Label *>;
-  // A vector of pool-allocated x86 labels for CFG nodes.
-  LabelVector CfgNodeLabels;
-  // A vector of pool-allocated x86 labels for Local labels.
-  LabelVector LocalLabels;
-
-  Label *getOrCreateLabel(SizeT Number, LabelVector &Labels);
-
-  // The arith_int() methods factor out the commonality between the encodings
-  // of add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag
-  // parameter is statically asserted to be less than 8.
-  template <uint32_t Tag>
-  void arith_int(Type Ty, GPRRegister reg, const Immediate &imm);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, GPRRegister reg0, GPRRegister reg1);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, GPRRegister reg, const Address &address);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, const Address &address, GPRRegister reg);
-
-  template <uint32_t Tag>
-  void arith_int(Type Ty, const Address &address, const Immediate &imm);
-
-  // gprEncoding returns Reg encoding for operand emission. For x86-64 we mask
-  // out the 4th bit as it is encoded in the REX.[RXB] bits. No other bits are
-  // touched because we don't want to mask errors.
-  template <typename RegType, typename T = Traits>
-  typename std::enable_if<T::Is64Bit, typename T::GPRRegister>::type
-  gprEncoding(const RegType Reg) {
-    return static_cast<GPRRegister>(static_cast<uint8_t>(Reg) & ~0x08);
-  }
-
-  template <typename RegType, typename T = Traits>
-  typename std::enable_if<!T::Is64Bit, typename T::GPRRegister>::type
-  gprEncoding(const RegType Reg) {
-    return static_cast<typename T::GPRRegister>(Reg);
-  }
-
-  template <typename RegType>
-  bool is8BitRegisterRequiringRex(const Type Ty, const RegType Reg) {
-    static constexpr bool IsGPR =
-        std::is_same<typename std::decay<RegType>::type, ByteRegister>::value ||
-        std::is_same<typename std::decay<RegType>::type, GPRRegister>::value;
-
-    // At this point in the assembler, we have encoded regs, so it is not
-    // possible to distinguish between the "new" low byte registers introduced
-    // in x86-64 and the legacy [abcd]h registers. Because x86, we may still
-    // see ah (div) in the assembler, so we allow it here.
-    //
-    // The "local" uint32_t Encoded_Reg_ah is needed because RegType is an
-    // enum that is not necessarily the same type of
-    // Traits::RegisterSet::Encoded_Reg_ah.
-    constexpr uint32_t Encoded_Reg_ah = Traits::RegisterSet::Encoded_Reg_ah;
-    return IsGPR && (Reg & 0x04) != 0 && (Reg & 0x08) == 0 &&
-           isByteSizedType(Ty) && (Reg != Encoded_Reg_ah);
-  }
-
-  // assembleAndEmitRex is used for determining which (if any) rex prefix
-  // should be emitted for the current instruction. It allows different types
-  // for Reg and Rm because they could be of different types (e.g., in
-  // mov[sz]x instructions.) If Addr is not nullptr, then Rm is ignored, and
-  // Rex.B is determined by Addr instead. TyRm is still used to determine
-  // Addr's size.
-  template <typename RegType, typename RmType, typename T = Traits>
-  typename std::enable_if<T::Is64Bit, void>::type
-  assembleAndEmitRex(const Type TyReg, const RegType Reg, const Type TyRm,
-                     const RmType Rm,
-                     const typename T::Address *Addr = nullptr) {
-    const uint8_t W = (TyReg == IceType_i64 || TyRm == IceType_i64)
-                          ? T::Operand::RexW
-                          : T::Operand::RexNone;
-    const uint8_t R = (Reg & 0x08) ? T::Operand::RexR : T::Operand::RexNone;
-    const uint8_t X = (Addr != nullptr)
-                          ? (typename T::Operand::RexBits)Addr->rexX()
-                          : T::Operand::RexNone;
-    const uint8_t B = (Addr != nullptr)
-                          ? (typename T::Operand::RexBits)Addr->rexB()
-                      : (Rm & 0x08) ? T::Operand::RexB
-                                    : T::Operand::RexNone;
-    const uint8_t Prefix = W | R | X | B;
-    if (Prefix != T::Operand::RexNone) {
-      emitUint8(Prefix);
-    } else if (is8BitRegisterRequiringRex(TyReg, Reg) ||
-               (Addr == nullptr && is8BitRegisterRequiringRex(TyRm, Rm))) {
-      emitUint8(T::Operand::RexBase);
-    }
-  }
-
-  template <typename RegType, typename RmType, typename T = Traits>
-  typename std::enable_if<!T::Is64Bit, void>::type
-  assembleAndEmitRex(const Type, const RegType, const Type, const RmType,
-                     const typename T::Address * = nullptr) {}
-
-  // emitRexRB is used for emitting a Rex prefix instructions with two
-  // explicit register operands in its mod-rm byte.
-  template <typename RegType, typename RmType>
-  void emitRexRB(const Type Ty, const RegType Reg, const RmType Rm) {
-    assembleAndEmitRex(Ty, Reg, Ty, Rm);
-  }
-
-  template <typename RegType, typename RmType>
-  void emitRexRB(const Type TyReg, const RegType Reg, const Type TyRm,
-                 const RmType Rm) {
-    assembleAndEmitRex(TyReg, Reg, TyRm, Rm);
-  }
-
-  // emitRexB is used for emitting a Rex prefix if one is needed on encoding
-  // the Reg field in an x86 instruction. It is invoked by the template when
-  // Reg is the single register operand in the instruction (e.g., push Reg.)
-  template <typename RmType> void emitRexB(const Type Ty, const RmType Rm) {
-    emitRexRB(Ty, RexRegIrrelevant, Ty, Rm);
-  }
-
-  // emitRex is used for emitting a Rex prefix for an address and a GPR. The
-  // address may contain zero, one, or two registers.
-  template <typename RegType>
-  void emitRex(const Type Ty, const Address &Addr, const RegType Reg) {
-    assembleAndEmitRex(Ty, Reg, Ty, RexRegIrrelevant, &Addr);
-  }
-
-  template <typename RegType>
-  void emitRex(const Type AddrTy, const Address &Addr, const Type TyReg,
-               const RegType Reg) {
-    assembleAndEmitRex(TyReg, Reg, AddrTy, RexRegIrrelevant, &Addr);
-  }
-};
-
-template <typename TraitsType>
-inline void AssemblerX86Base<TraitsType>::emitUint8(uint8_t value) {
-  Buffer.emit<uint8_t>(value);
-}
-
-template <typename TraitsType>
-inline void AssemblerX86Base<TraitsType>::emitInt16(int16_t value) {
-  Buffer.emit<int16_t>(value);
-}
-
-template <typename TraitsType>
-inline void AssemblerX86Base<TraitsType>::emitInt32(int32_t value) {
-  Buffer.emit<int32_t>(value);
-}
-
-template <typename TraitsType>
-inline void AssemblerX86Base<TraitsType>::emitRegisterOperand(int reg, int rm) {
-  assert(reg >= 0 && reg < 8);
-  assert(rm >= 0 && rm < 8);
-  Buffer.emit<uint8_t>(0xC0 + (reg << 3) + rm);
-}
-
-template <typename TraitsType>
-template <typename RegType, typename RmType>
-inline void AssemblerX86Base<TraitsType>::emitXmmRegisterOperand(RegType reg,
-                                                                 RmType rm) {
-  emitRegisterOperand(gprEncoding(reg), gprEncoding(rm));
-}
-
-template <typename TraitsType>
-inline void AssemblerX86Base<TraitsType>::emitOperandSizeOverride() {
-  emitUint8(0x66);
-}
-
-} // end of namespace X8664
-} // end of namespace Ice
-
-#include "IceAssemblerX8664BaseImpl.h"
-
-#endif // SUBZERO_SRC_ICEASSEMBLERX8664BASE_H
diff --git a/third_party/subzero/src/IceInstX8632Base.h b/third_party/subzero/src/IceInstX8632Base.h
index 1f6f84d..29c7c92 100644
--- a/third_party/subzero/src/IceInstX8632Base.h
+++ b/third_party/subzero/src/IceInstX8632Base.h
@@ -20,6 +20,7 @@
 #ifndef SUBZERO_SRC_ICEINSTX8632BASE_H
 #define SUBZERO_SRC_ICEINSTX8632BASE_H
 
+#include "IceAssemblerX8632.h"
 #include "IceDefs.h"
 #include "IceInst.h"
 #include "IceOperand.h"
@@ -29,7 +30,7 @@
 
 template <typename TraitsType> struct InstImpl {
   using Traits = TraitsType;
-  using Assembler = typename Traits::Assembler;
+  using Assembler = AssemblerX8632;
   using AssemblerLabel = typename Assembler::Label;
   using AssemblerImmediate = typename Assembler::Immediate;
   using TargetLowering = typename Traits::TargetLowering;
@@ -48,18 +49,18 @@
 
   template <typename SReg_t, typename DReg_t>
   using CastEmitterRegOp =
-      typename Traits::Assembler::template CastEmitterRegOp<SReg_t, DReg_t>;
+      typename Assembler::template CastEmitterRegOp<SReg_t, DReg_t>;
   template <typename SReg_t, typename DReg_t>
   using ThreeOpImmEmitter =
-      typename Traits::Assembler::template ThreeOpImmEmitter<SReg_t, DReg_t>;
-  using GPREmitterAddrOp = typename Traits::Assembler::GPREmitterAddrOp;
-  using GPREmitterRegOp = typename Traits::Assembler::GPREmitterRegOp;
-  using GPREmitterShiftD = typename Traits::Assembler::GPREmitterShiftD;
-  using GPREmitterShiftOp = typename Traits::Assembler::GPREmitterShiftOp;
-  using GPREmitterOneOp = typename Traits::Assembler::GPREmitterOneOp;
-  using XmmEmitterRegOp = typename Traits::Assembler::XmmEmitterRegOp;
-  using XmmEmitterShiftOp = typename Traits::Assembler::XmmEmitterShiftOp;
-  using XmmEmitterMovOps = typename Traits::Assembler::XmmEmitterMovOps;
+      typename Assembler::template ThreeOpImmEmitter<SReg_t, DReg_t>;
+  using GPREmitterAddrOp = typename Assembler::GPREmitterAddrOp;
+  using GPREmitterRegOp = typename Assembler::GPREmitterRegOp;
+  using GPREmitterShiftD = typename Assembler::GPREmitterShiftD;
+  using GPREmitterShiftOp = typename Assembler::GPREmitterShiftOp;
+  using GPREmitterOneOp = typename Assembler::GPREmitterOneOp;
+  using XmmEmitterRegOp = typename Assembler::XmmEmitterRegOp;
+  using XmmEmitterShiftOp = typename Assembler::XmmEmitterShiftOp;
+  using XmmEmitterMovOps = typename Assembler::XmmEmitterMovOps;
 
   class InstX86Base : public InstTarget {
     InstX86Base() = delete;
@@ -3272,14 +3273,8 @@
   using StoreQ = typename InstImpl<TraitsType>::InstX86StoreQ;
   using StoreD = typename InstImpl<TraitsType>::InstX86StoreD;
   using Nop = typename InstImpl<TraitsType>::InstX86Nop;
-  template <typename T = typename InstImpl<TraitsType>::Traits>
-  using Fld =
-      typename std::enable_if<T::UsesX87,
-                              typename InstImpl<TraitsType>::InstX86Fld>::type;
-  template <typename T = typename InstImpl<TraitsType>::Traits>
-  using Fstp =
-      typename std::enable_if<T::UsesX87,
-                              typename InstImpl<TraitsType>::InstX86Fstp>::type;
+  using Fld = typename InstImpl<TraitsType>::InstX86Fld;
+  using Fstp = typename InstImpl<TraitsType>::InstX86Fstp;
   using Pop = typename InstImpl<TraitsType>::InstX86Pop;
   using Push = typename InstImpl<TraitsType>::InstX86Push;
   using Ret = typename InstImpl<TraitsType>::InstX86Ret;
diff --git a/third_party/subzero/src/IceInstX8632BaseImpl.h b/third_party/subzero/src/IceInstX8632BaseImpl.h
index a77a6e0..c14354f 100644
--- a/third_party/subzero/src/IceInstX8632BaseImpl.h
+++ b/third_party/subzero/src/IceInstX8632BaseImpl.h
@@ -18,7 +18,7 @@
 
 #include "IceInstX8632Base.h"
 
-#include "IceAssemblerX8632Base.h"
+#include "IceAssemblerX8632.h"
 #include "IceCfg.h"
 #include "IceCfgNode.h"
 #include "IceDefs.h"
@@ -2270,13 +2270,6 @@
       assert(isScalarIntegerType(DestTy));
       // Widen DestTy for truncation (see above note). We should only do this
       // when both Src and Dest are integer types.
-      if (Traits::Is64Bit && DestTy == IceType_i64) {
-        if (const auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src)) {
-          Func->getAssembler<Assembler>()->movabs(
-              Traits::getEncodedGPR(Dest->getRegNum()), C64->getValue());
-          return;
-        }
-      }
       if (isScalarIntegerType(SrcTy)) {
         SrcTy = DestTy;
       }
diff --git a/third_party/subzero/src/IceInstX8664Base.h b/third_party/subzero/src/IceInstX8664Base.h
index e2d1560..2d5cbe1 100644
--- a/third_party/subzero/src/IceInstX8664Base.h
+++ b/third_party/subzero/src/IceInstX8664Base.h
@@ -20,6 +20,7 @@
 #ifndef SUBZERO_SRC_ICEINSTX8664BASE_H
 #define SUBZERO_SRC_ICEINSTX8664BASE_H
 
+#include "IceAssemblerX8664.h"
 #include "IceDefs.h"
 #include "IceInst.h"
 #include "IceOperand.h"
@@ -29,7 +30,7 @@
 
 template <typename TraitsType> struct InstImpl {
   using Traits = TraitsType;
-  using Assembler = typename Traits::Assembler;
+  using Assembler = AssemblerX8664;
   using AssemblerLabel = typename Assembler::Label;
   using AssemblerImmediate = typename Assembler::Immediate;
   using TargetLowering = typename Traits::TargetLowering;
@@ -2842,48 +2843,6 @@
     NopVariant Variant;
   };
 
-  /// Fld - load a value onto the x87 FP stack.
-  class InstX86Fld final : public InstX86Base {
-    InstX86Fld() = delete;
-    InstX86Fld(const InstX86Fld &) = delete;
-    InstX86Fld &operator=(const InstX86Fld &) = delete;
-
-  public:
-    static InstX86Fld *create(Cfg *Func, Operand *Src) {
-      return new (Func->allocate<InstX86Fld>()) InstX86Fld(Func, Src);
-    }
-    void emit(const Cfg *Func) const override;
-    void emitIAS(const Cfg *Func) const override;
-    void dump(const Cfg *Func) const override;
-    static bool classof(const Inst *Instr) {
-      return InstX86Base::isClassof(Instr, InstX86Base::Fld);
-    }
-
-  private:
-    InstX86Fld(Cfg *Func, Operand *Src);
-  };
-
-  /// Fstp - store x87 st(0) into memory and pop st(0).
-  class InstX86Fstp final : public InstX86Base {
-    InstX86Fstp() = delete;
-    InstX86Fstp(const InstX86Fstp &) = delete;
-    InstX86Fstp &operator=(const InstX86Fstp &) = delete;
-
-  public:
-    static InstX86Fstp *create(Cfg *Func, Variable *Dest) {
-      return new (Func->allocate<InstX86Fstp>()) InstX86Fstp(Func, Dest);
-    }
-    void emit(const Cfg *Func) const override;
-    void emitIAS(const Cfg *Func) const override;
-    void dump(const Cfg *Func) const override;
-    static bool classof(const Inst *Instr) {
-      return InstX86Base::isClassof(Instr, InstX86Base::Fstp);
-    }
-
-  private:
-    InstX86Fstp(Cfg *Func, Variable *Dest);
-  };
-
   class InstX86Pop final : public InstX86Base {
     InstX86Pop() = delete;
     InstX86Pop(const InstX86Pop &) = delete;
@@ -3272,14 +3231,6 @@
   using StoreQ = typename InstImpl<TraitsType>::InstX86StoreQ;
   using StoreD = typename InstImpl<TraitsType>::InstX86StoreD;
   using Nop = typename InstImpl<TraitsType>::InstX86Nop;
-  template <typename T = typename InstImpl<TraitsType>::Traits>
-  using Fld =
-      typename std::enable_if<T::UsesX87,
-                              typename InstImpl<TraitsType>::InstX86Fld>::type;
-  template <typename T = typename InstImpl<TraitsType>::Traits>
-  using Fstp =
-      typename std::enable_if<T::UsesX87,
-                              typename InstImpl<TraitsType>::InstX86Fstp>::type;
   using Pop = typename InstImpl<TraitsType>::InstX86Pop;
   using Push = typename InstImpl<TraitsType>::InstX86Push;
   using Ret = typename InstImpl<TraitsType>::InstX86Ret;
diff --git a/third_party/subzero/src/IceInstX8664BaseImpl.h b/third_party/subzero/src/IceInstX8664BaseImpl.h
index 0dcd88b..4870be5 100644
--- a/third_party/subzero/src/IceInstX8664BaseImpl.h
+++ b/third_party/subzero/src/IceInstX8664BaseImpl.h
@@ -18,7 +18,7 @@
 
 #include "IceInstX8664Base.h"
 
-#include "IceAssemblerX8664Base.h"
+#include "IceAssemblerX8664.h"
 #include "IceCfg.h"
 #include "IceCfgNode.h"
 #include "IceDefs.h"
@@ -320,16 +320,6 @@
     : InstX86Base(Func, InstX86Base::Nop, 0, nullptr), Variant(Variant) {}
 
 template <typename TraitsType>
-InstImpl<TraitsType>::InstX86Fld::InstX86Fld(Cfg *Func, Operand *Src)
-    : InstX86Base(Func, InstX86Base::Fld, 1, nullptr) {
-  this->addSource(Src);
-}
-
-template <typename TraitsType>
-InstImpl<TraitsType>::InstX86Fstp::InstX86Fstp(Cfg *Func, Variable *Dest)
-    : InstX86Base(Func, InstX86Base::Fstp, 0, Dest) {}
-
-template <typename TraitsType>
 InstImpl<TraitsType>::InstX86Pop::InstX86Pop(Cfg *Func, Variable *Dest)
     : InstX86Base(Func, InstX86Base::Pop, 0, Dest) {
   // A pop instruction affects the stack pointer and so it should not be
@@ -2569,151 +2559,6 @@
 }
 
 template <typename TraitsType>
-void InstImpl<TraitsType>::InstX86Fld::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(this->getSrcSize() == 1);
-  Type Ty = this->getSrc(0)->getType();
-  const auto *Var = llvm::dyn_cast<Variable>(this->getSrc(0));
-  if (Var && Var->hasReg()) {
-    // This is a physical xmm register, so we need to spill it to a temporary
-    // stack slot.  Function prolog emission guarantees that there is sufficient
-    // space to do this.
-    Str << "\t"
-           "mov"
-        << Traits::TypeAttributes[Ty].SdSsString << "\t";
-    Var->emit(Func);
-    Str << ", (%esp)\n"
-           "\t"
-           "fld"
-        << this->getFldString(Ty)
-        << "\t"
-           "(%esp)";
-    return;
-  }
-  Str << "\t"
-         "fld"
-      << this->getFldString(Ty) << "\t";
-  this->getSrc(0)->emit(Func);
-}
-
-template <typename TraitsType>
-void InstImpl<TraitsType>::InstX86Fld::emitIAS(const Cfg *Func) const {
-  Assembler *Asm = Func->getAssembler<Assembler>();
-  assert(this->getSrcSize() == 1);
-  const Operand *Src = this->getSrc(0);
-  auto *Target = InstX86Base::getTarget(Func);
-  Type Ty = Src->getType();
-  if (const auto *Var = llvm::dyn_cast<Variable>(Src)) {
-    if (Var->hasReg()) {
-      // This is a physical xmm register, so we need to spill it to a temporary
-      // stack slot.  Function prolog emission guarantees that there is
-      // sufficient space to do this.
-      Address StackSlot =
-          Address(RegisterSet::Encoded_Reg_esp, 0, AssemblerFixup::NoFixup);
-      Asm->movss(Ty, StackSlot, Traits::getEncodedXmm(Var->getRegNum()));
-      Asm->fld(Ty, StackSlot);
-    } else {
-      Address StackAddr(Target->stackVarToAsmOperand(Var));
-      Asm->fld(Ty, StackAddr);
-    }
-  } else if (const auto *Mem = llvm::dyn_cast<X86OperandMem>(Src)) {
-    assert(Mem->getSegmentRegister() == X86OperandMem::DefaultSegment);
-    Asm->fld(Ty, Mem->toAsmAddress(Asm, Target));
-  } else if (const auto *Imm = llvm::dyn_cast<Constant>(Src)) {
-    Asm->fld(Ty, Traits::Address::ofConstPool(Asm, Imm));
-  } else {
-    llvm_unreachable("Unexpected operand type");
-  }
-}
-
-template <typename TraitsType>
-void InstImpl<TraitsType>::InstX86Fld::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "fld." << this->getSrc(0)->getType() << " ";
-  this->dumpSources(Func);
-}
-
-template <typename TraitsType>
-void InstImpl<TraitsType>::InstX86Fstp::emit(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(this->getSrcSize() == 0);
-  // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
-  // "partially" delete the fstp if the Dest is unused. Even if Dest is unused,
-  // the fstp should be kept for the SideEffects of popping the stack.
-  if (!this->getDest()) {
-    Str << "\t"
-           "fstp\t"
-           "st(0)";
-    return;
-  }
-  Type Ty = this->getDest()->getType();
-  if (!this->getDest()->hasReg()) {
-    Str << "\t"
-           "fstp"
-        << this->getFldString(Ty) << "\t";
-    this->getDest()->emit(Func);
-    return;
-  }
-  // Dest is a physical (xmm) register, so st(0) needs to go through memory.
-  // Hack this by using caller-reserved memory at the top of stack, spilling
-  // st(0) there, and loading it into the xmm register.
-  Str << "\t"
-         "fstp"
-      << this->getFldString(Ty)
-      << "\t"
-         "(%esp)\n";
-  Str << "\t"
-         "mov"
-      << Traits::TypeAttributes[Ty].SdSsString
-      << "\t"
-         "(%esp), ";
-  this->getDest()->emit(Func);
-}
-
-template <typename TraitsType>
-void InstImpl<TraitsType>::InstX86Fstp::emitIAS(const Cfg *Func) const {
-  Assembler *Asm = Func->getAssembler<Assembler>();
-  assert(this->getSrcSize() == 0);
-  const Variable *Dest = this->getDest();
-  // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
-  // "partially" delete the fstp if the Dest is unused. Even if Dest is unused,
-  // the fstp should be kept for the SideEffects of popping the stack.
-  if (!Dest) {
-    Asm->fstp(RegisterSet::getEncodedSTReg(0));
-    return;
-  }
-  auto *Target = InstX86Base::getTarget(Func);
-  Type Ty = Dest->getType();
-  if (!Dest->hasReg()) {
-    Address StackAddr(Target->stackVarToAsmOperand(Dest));
-    Asm->fstp(Ty, StackAddr);
-  } else {
-    // Dest is a physical (xmm) register, so st(0) needs to go through memory.
-    // Hack this by using caller-reserved memory at the top of stack, spilling
-    // st(0) there, and loading it into the xmm register.
-    Address StackSlot =
-        Address(RegisterSet::Encoded_Reg_esp, 0, AssemblerFixup::NoFixup);
-    Asm->fstp(Ty, StackSlot);
-    Asm->movss(Ty, Traits::getEncodedXmm(Dest->getRegNum()), StackSlot);
-  }
-}
-
-template <typename TraitsType>
-void InstImpl<TraitsType>::InstX86Fstp::dump(const Cfg *Func) const {
-  if (!BuildDefs::dump())
-    return;
-  Ostream &Str = Func->getContext()->getStrDump();
-  this->dumpDest(Func);
-  Str << " = fstp." << this->getDest()->getType() << ", st(0)";
-}
-
-template <typename TraitsType>
 void InstImpl<TraitsType>::InstX86Pextr::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
diff --git a/third_party/subzero/src/IceTargetLoweringX8632Base.h b/third_party/subzero/src/IceTargetLoweringX8632Base.h
index e619712..9395590 100644
--- a/third_party/subzero/src/IceTargetLoweringX8632Base.h
+++ b/third_party/subzero/src/IceTargetLoweringX8632Base.h
@@ -528,25 +528,11 @@
   void _divss(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Divss>(Dest, Src0);
   }
-  template <typename T = Traits>
-  typename std::enable_if<T::UsesX87, void>::type _fld(Operand *Src0) {
-    Context.insert<typename Traits::Insts::template Fld<>>(Src0);
+  void _fld(Operand *Src0) {
+    Context.insert<typename Traits::Insts::Fld>(Src0);
   }
-  // TODO(jpp): when implementing the X8664 calling convention, make sure x8664
-  // does not invoke this method, and remove it.
-  template <typename T = Traits>
-  typename std::enable_if<!T::UsesX87, void>::type _fld(Operand *) {
-    llvm::report_fatal_error("fld is not available in x86-64");
-  }
-  template <typename T = Traits>
-  typename std::enable_if<T::UsesX87, void>::type _fstp(Variable *Dest) {
-    Context.insert<typename Traits::Insts::template Fstp<>>(Dest);
-  }
-  // TODO(jpp): when implementing the X8664 calling convention, make sure x8664
-  // does not invoke this method, and remove it.
-  template <typename T = Traits>
-  typename std::enable_if<!T::UsesX87, void>::type _fstp(Variable *) {
-    llvm::report_fatal_error("fstp is not available in x86-64");
+  void _fstp(Variable *Dest) {
+    Context.insert<typename Traits::Insts::Fstp>(Dest);
   }
   void _idiv(Variable *Dest, Operand *Src0, Operand *Src1) {
     Context.insert<typename Traits::Insts::Idiv>(Dest, Src0, Src1);
diff --git a/third_party/subzero/src/IceTargetLoweringX8632BaseImpl.h b/third_party/subzero/src/IceTargetLoweringX8632BaseImpl.h
index ae776e1..0ecf1e5 100644
--- a/third_party/subzero/src/IceTargetLoweringX8632BaseImpl.h
+++ b/third_party/subzero/src/IceTargetLoweringX8632BaseImpl.h
@@ -1063,14 +1063,12 @@
   // This is done by a movp[sd] and an fld[sd].  Ensure there is enough scratch
   // space on the stack for this.
   const Type ReturnType = Func->getReturnType();
-  if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
-    if (isScalarFloatingType(ReturnType)) {
-      // Avoid misaligned double-precision load/store.
-      RequiredStackAlignment = std::max<size_t>(
-          RequiredStackAlignment, Traits::X86_STACK_ALIGNMENT_BYTES);
-      SpillAreaSizeBytes =
-          std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
-    }
+  if (isScalarFloatingType(ReturnType)) {
+    // Avoid misaligned double-precision load/store.
+    RequiredStackAlignment = std::max<size_t>(
+        RequiredStackAlignment, Traits::X86_STACK_ALIGNMENT_BYTES);
+    SpillAreaSizeBytes =
+        std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
   }
 
   RequiredStackAlignment =
@@ -1155,14 +1153,7 @@
         ++NumXmmArgs;
         continue;
       }
-    } else if (isScalarFloatingType(Arg->getType())) {
-      if (Traits::X86_PASS_SCALAR_FP_IN_XMM &&
-          Traits::getRegisterForXmmArgNum(Traits::getArgIndex(i, NumXmmArgs))
-              .hasValue()) {
-        ++NumXmmArgs;
-        continue;
-      }
-    } else {
+    } else if (!isScalarFloatingType(Arg->getType())) {
       assert(isScalarIntegerType(Arg->getType()));
       if (Traits::getRegisterForGprArgNum(Traits::WordType,
                                           Traits::getArgIndex(i, NumGPRArgs))
@@ -1511,17 +1502,7 @@
       ++NumXmmArgs;
       RegisterArg = Func->makeVariable(Ty);
     } else if (isScalarFloatingType(Ty)) {
-      if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
-        continue;
-      }
-      RegNum =
-          Traits::getRegisterForXmmArgNum(Traits::getArgIndex(i, NumXmmArgs));
-      if (RegNum.hasNoValue()) {
-        XmmSlotsRemain = false;
-        continue;
-      }
-      ++NumXmmArgs;
-      RegisterArg = Func->makeVariable(Ty);
+      continue;
     } else if (isScalarIntegerType(Ty)) {
       RegNum = Traits::getRegisterForGprArgNum(
           Ty, Traits::getArgIndex(i, NumGprArgs));
@@ -2600,12 +2581,6 @@
             .hasValue()) {
       XmmArgs.push_back(Arg);
       XmmArgIndices.push_back(i);
-    } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
-               Traits::getRegisterForXmmArgNum(
-                   Traits::getArgIndex(i, XmmArgs.size()))
-                   .hasValue()) {
-      XmmArgs.push_back(Arg);
-      XmmArgIndices.push_back(i);
     } else if (isScalarIntegerType(Ty) &&
                Traits::getRegisterForGprArgNum(
                    Ty, Traits::getArgIndex(i, GprArgs.size()))
@@ -2629,12 +2604,10 @@
   // Ensure there is enough space for the fstp/movs for floating returns.
   Variable *Dest = Instr->getDest();
   const Type DestTy = Dest ? Dest->getType() : IceType_void;
-  if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
-    if (isScalarFloatingType(DestTy)) {
-      ParameterAreaSizeBytes =
-          std::max(static_cast<size_t>(ParameterAreaSizeBytes),
-                   typeWidthInBytesOnStack(DestTy));
-    }
+  if (isScalarFloatingType(DestTy)) {
+    ParameterAreaSizeBytes =
+        std::max(static_cast<size_t>(ParameterAreaSizeBytes),
+                 typeWidthInBytesOnStack(DestTy));
   }
   // Adjust the parameter area so that the stack is aligned. It is assumed that
   // the stack is already aligned at the start of the calling sequence.
@@ -2701,12 +2674,9 @@
       break;
     case IceType_f32:
     case IceType_f64:
-      if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
-        // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
-        // the fstp instruction.
-        break;
-      }
-    // Fallthrough intended.
+      // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
+      // the fstp instruction.
+      break;
     case IceType_v4i1:
     case IceType_v8i1:
     case IceType_v16i1:
@@ -2729,8 +2699,7 @@
   // Mark the call as killing all the caller-save registers.
   Context.insert<InstFakeKill>(NewCall);
   // Handle x86-32 floating point returns.
-  if (Dest != nullptr && isScalarFloatingType(DestTy) &&
-      !Traits::X86_PASS_SCALAR_FP_IN_XMM) {
+  if (Dest != nullptr && isScalarFloatingType(DestTy)) {
     // Special treatment for an FP function which returns its result in st(0).
     // If Dest ends up being a physical xmm register, the fstp emit code will
     // route st(0) through the space reserved in the function argument area
@@ -2755,13 +2724,7 @@
     Tmp = makeReg(DestTy);
     _movp(Tmp, ReturnReg);
     _movp(Dest, Tmp);
-  } else if (isScalarFloatingType(DestTy)) {
-    if (Traits::X86_PASS_SCALAR_FP_IN_XMM) {
-      assert(ReturnReg && "FP type requires a return register");
-      _mov(Tmp, ReturnReg);
-      _mov(Dest, Tmp);
-    }
-  } else {
+  } else if (!isScalarFloatingType(DestTy)) {
     assert(isScalarIntegerType(DestTy));
     assert(ReturnReg && "Integer type requires a return register");
     if (DestTy == IceType_i64 && !Traits::Is64Bit) {
@@ -7465,11 +7428,6 @@
         Traits::getRegisterForXmmArgNum(Traits::getArgIndex(i, XmmArgCount))
             .hasValue()) {
       ++XmmArgCount;
-    } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
-               Traits::getRegisterForXmmArgNum(
-                   Traits::getArgIndex(i, XmmArgCount))
-                   .hasValue()) {
-      ++XmmArgCount;
     } else if (isScalarIntegerType(Ty) &&
                Traits::getRegisterForGprArgNum(
                    Ty, Traits::getArgIndex(i, GprArgCount))
diff --git a/third_party/subzero/src/IceTargetLoweringX8632Traits.h b/third_party/subzero/src/IceTargetLoweringX8632Traits.h
index 5b6702b..c312b92 100644
--- a/third_party/subzero/src/IceTargetLoweringX8632Traits.h
+++ b/third_party/subzero/src/IceTargetLoweringX8632Traits.h
@@ -15,7 +15,7 @@
 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX8632TRAITS_H
 #define SUBZERO_SRC_ICETARGETLOWERINGX8632TRAITS_H
 
-#include "IceAssembler.h"
+#include "IceAssemblerX8632.h"
 #include "IceConditionCodesX8632.h"
 #include "IceDefs.h"
 #include "IceInst.h"
@@ -33,9 +33,9 @@
 namespace X8632 {
 using namespace ::Ice::X86;
 
+class AssemblerX8632;
 template <class Machine> struct Insts;
 template <class Machine> class TargetX86Base;
-template <class Machine> class AssemblerX86Base;
 
 class TargetX8632;
 
@@ -52,9 +52,6 @@
       ::Ice::Assembler::Asm_X8632;
 
   static constexpr bool Is64Bit = false;
-  static constexpr bool HasPopa = true;
-  static constexpr bool HasPusha = true;
-  static constexpr bool UsesX87 = true;
   static constexpr ::Ice::RegX8632::GPRRegister Last8BitGPR =
       ::Ice::RegX8632::GPRRegister::Encoded_Reg_ebx;
 
@@ -176,7 +173,7 @@
              && ((encoding_[0] & 0x07) == reg); // Register codes match.
     }
 
-    friend class AssemblerX86Base<TargetX8632Traits>;
+    friend class AssemblerX8632;
   };
 
   class Address : public Operand {
@@ -612,8 +609,6 @@
   static constexpr uint32_t X86_MAX_XMM_ARGS = 4;
   /// The maximum number of arguments to pass in GPR registers
   static constexpr uint32_t X86_MAX_GPR_ARGS = 0;
-  /// Whether scalar floating point arguments are passed in XMM registers
-  static constexpr bool X86_PASS_SCALAR_FP_IN_XMM = false;
   /// Get the register for a given argument slot in the XMM registers.
   static RegNumT getRegisterForXmmArgNum(uint32_t ArgNum) {
     // TODO(sehr): Change to use the CCArg technique used in ARM32.
@@ -744,7 +739,7 @@
 
   using TargetLowering = ::Ice::X8632::TargetX86Base<Traits>;
   using ConcreteTarget = ::Ice::X8632::TargetX8632;
-  using Assembler = ::Ice::X8632::AssemblerX86Base<Traits>;
+  using Assembler = ::Ice::X8632::AssemblerX8632;
 
   /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem
   /// and VariableSplit.
diff --git a/third_party/subzero/src/IceTargetLoweringX8664Base.h b/third_party/subzero/src/IceTargetLoweringX8664Base.h
index d033b3f..c0bce54 100644
--- a/third_party/subzero/src/IceTargetLoweringX8664Base.h
+++ b/third_party/subzero/src/IceTargetLoweringX8664Base.h
@@ -528,26 +528,6 @@
   void _divss(Variable *Dest, Operand *Src0) {
     Context.insert<typename Traits::Insts::Divss>(Dest, Src0);
   }
-  template <typename T = Traits>
-  typename std::enable_if<T::UsesX87, void>::type _fld(Operand *Src0) {
-    Context.insert<typename Traits::Insts::template Fld<>>(Src0);
-  }
-  // TODO(jpp): when implementing the X8664 calling convention, make sure x8664
-  // does not invoke this method, and remove it.
-  template <typename T = Traits>
-  typename std::enable_if<!T::UsesX87, void>::type _fld(Operand *) {
-    llvm::report_fatal_error("fld is not available in x86-64");
-  }
-  template <typename T = Traits>
-  typename std::enable_if<T::UsesX87, void>::type _fstp(Variable *Dest) {
-    Context.insert<typename Traits::Insts::template Fstp<>>(Dest);
-  }
-  // TODO(jpp): when implementing the X8664 calling convention, make sure x8664
-  // does not invoke this method, and remove it.
-  template <typename T = Traits>
-  typename std::enable_if<!T::UsesX87, void>::type _fstp(Variable *) {
-    llvm::report_fatal_error("fstp is not available in x86-64");
-  }
   void _idiv(Variable *Dest, Operand *Src0, Operand *Src1) {
     Context.insert<typename Traits::Insts::Idiv>(Dest, Src0, Src1);
   }
diff --git a/third_party/subzero/src/IceTargetLoweringX8664BaseImpl.h b/third_party/subzero/src/IceTargetLoweringX8664BaseImpl.h
index ca22a84..ffc42bb 100644
--- a/third_party/subzero/src/IceTargetLoweringX8664BaseImpl.h
+++ b/third_party/subzero/src/IceTargetLoweringX8664BaseImpl.h
@@ -1058,21 +1058,6 @@
   uint32_t GlobalsAndSubsequentPaddingSize =
       GlobalsSize + LocalsSlotsPaddingBytes;
 
-  // Functions returning scalar floating point types may need to convert values
-  // from an in-register xmm value to the top of the x87 floating point stack.
-  // This is done by a movp[sd] and an fld[sd].  Ensure there is enough scratch
-  // space on the stack for this.
-  const Type ReturnType = Func->getReturnType();
-  if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
-    if (isScalarFloatingType(ReturnType)) {
-      // Avoid misaligned double-precision load/store.
-      RequiredStackAlignment = std::max<size_t>(
-          RequiredStackAlignment, Traits::X86_STACK_ALIGNMENT_BYTES);
-      SpillAreaSizeBytes =
-          std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
-    }
-  }
-
   RequiredStackAlignment =
       std::max<size_t>(RequiredStackAlignment, SpillAreaAlignmentBytes);
 
@@ -1156,8 +1141,7 @@
         continue;
       }
     } else if (isScalarFloatingType(Arg->getType())) {
-      if (Traits::X86_PASS_SCALAR_FP_IN_XMM &&
-          Traits::getRegisterForXmmArgNum(Traits::getArgIndex(i, NumXmmArgs))
+      if (Traits::getRegisterForXmmArgNum(Traits::getArgIndex(i, NumXmmArgs))
               .hasValue()) {
         ++NumXmmArgs;
         continue;
@@ -1511,9 +1495,6 @@
       ++NumXmmArgs;
       RegisterArg = Func->makeVariable(Ty);
     } else if (isScalarFloatingType(Ty)) {
-      if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
-        continue;
-      }
       RegNum =
           Traits::getRegisterForXmmArgNum(Traits::getArgIndex(i, NumXmmArgs));
       if (RegNum.hasNoValue()) {
@@ -2600,7 +2581,7 @@
             .hasValue()) {
       XmmArgs.push_back(Arg);
       XmmArgIndices.push_back(i);
-    } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
+    } else if (isScalarFloatingType(Ty) &&
                Traits::getRegisterForXmmArgNum(
                    Traits::getArgIndex(i, XmmArgs.size()))
                    .hasValue()) {
@@ -2629,13 +2610,6 @@
   // Ensure there is enough space for the fstp/movs for floating returns.
   Variable *Dest = Instr->getDest();
   const Type DestTy = Dest ? Dest->getType() : IceType_void;
-  if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
-    if (isScalarFloatingType(DestTy)) {
-      ParameterAreaSizeBytes =
-          std::max(static_cast<size_t>(ParameterAreaSizeBytes),
-                   typeWidthInBytesOnStack(DestTy));
-    }
-  }
   // Adjust the parameter area so that the stack is aligned. It is assumed that
   // the stack is already aligned at the start of the calling sequence.
   ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
@@ -2701,11 +2675,6 @@
       break;
     case IceType_f32:
     case IceType_f64:
-      if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
-        // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
-        // the fstp instruction.
-        break;
-      }
     // Fallthrough intended.
     case IceType_v4i1:
     case IceType_v8i1:
@@ -2728,18 +2697,6 @@
     Context.insert<InstFakeDef>(ReturnRegHi);
   // Mark the call as killing all the caller-save registers.
   Context.insert<InstFakeKill>(NewCall);
-  // Handle x86-32 floating point returns.
-  if (Dest != nullptr && isScalarFloatingType(DestTy) &&
-      !Traits::X86_PASS_SCALAR_FP_IN_XMM) {
-    // Special treatment for an FP function which returns its result in st(0).
-    // If Dest ends up being a physical xmm register, the fstp emit code will
-    // route st(0) through the space reserved in the function argument area
-    // we allocated.
-    _fstp(Dest);
-    // Create a fake use of Dest in case it actually isn't used, because st(0)
-    // still needs to be popped.
-    Context.insert<InstFakeUse>(Dest);
-  }
   // Generate a FakeUse to keep the call live if necessary.
   if (Instr->hasSideEffects() && ReturnReg) {
     Context.insert<InstFakeUse>(ReturnReg);
@@ -2756,11 +2713,9 @@
     _movp(Tmp, ReturnReg);
     _movp(Dest, Tmp);
   } else if (isScalarFloatingType(DestTy)) {
-    if (Traits::X86_PASS_SCALAR_FP_IN_XMM) {
-      assert(ReturnReg && "FP type requires a return register");
-      _mov(Tmp, ReturnReg);
-      _mov(Dest, Tmp);
-    }
+    assert(ReturnReg && "FP type requires a return register");
+    _mov(Tmp, ReturnReg);
+    _mov(Dest, Tmp);
   } else {
     assert(isScalarIntegerType(DestTy));
     assert(ReturnReg && "Integer type requires a return register");
@@ -7465,7 +7420,7 @@
         Traits::getRegisterForXmmArgNum(Traits::getArgIndex(i, XmmArgCount))
             .hasValue()) {
       ++XmmArgCount;
-    } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
+    } else if (isScalarFloatingType(Ty) &&
                Traits::getRegisterForXmmArgNum(
                    Traits::getArgIndex(i, XmmArgCount))
                    .hasValue()) {
diff --git a/third_party/subzero/src/IceTargetLoweringX8664Traits.h b/third_party/subzero/src/IceTargetLoweringX8664Traits.h
index d2bf600..e1f4a7e 100644
--- a/third_party/subzero/src/IceTargetLoweringX8664Traits.h
+++ b/third_party/subzero/src/IceTargetLoweringX8664Traits.h
@@ -34,7 +34,7 @@
 namespace X8664 {
 using namespace ::Ice::X86;
 
-template <class TraitsType> class AssemblerX86Base;
+class AssemblerX8664;
 template <class TraitsType> struct Insts;
 template <class TraitsType> class TargetX86Base;
 
@@ -53,9 +53,6 @@
       ::Ice::Assembler::Asm_X8664;
 
   static constexpr bool Is64Bit = true;
-  static constexpr bool HasPopa = false;
-  static constexpr bool HasPusha = false;
-  static constexpr bool UsesX87 = false;
   static constexpr ::Ice::RegX8664::GPRRegister Last8BitGPR =
       ::Ice::RegX8664::GPRRegister::Encoded_Reg_r15d;
 
@@ -183,7 +180,7 @@
              && (rm() == reg); // Register codes match.
     }
 
-    friend class AssemblerX86Base<TargetX8664Traits>;
+    friend class AssemblerX8664;
   };
 
   class Address : public Operand {
@@ -711,8 +708,6 @@
   }
 #endif
 
-  /// Whether scalar floating point arguments are passed in XMM registers
-  static constexpr bool X86_PASS_SCALAR_FP_IN_XMM = true;
   /// Get the register for a given argument slot in the XMM registers.
   static RegNumT getRegisterForXmmArgNum(uint32_t ArgNum) {
     // TODO(sehr): Change to use the CCArg technique used in ARM32.
@@ -830,7 +825,7 @@
 
   using TargetLowering = ::Ice::X8664::TargetX86Base<Traits>;
   using ConcreteTarget = ::Ice::X8664::TargetX8664;
-  using Assembler = ::Ice::X8664::AssemblerX86Base<Traits>;
+  using Assembler = ::Ice::X8664::AssemblerX8664;
 
   /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem
   /// and VariableSplit.