emitIAS for icmp, and test, movss-reg, movq, movups, storep, storeq, tighten some of the Xmm ops

The "test" instruction is used in very limited situations. I've made a best effort
to fill in the possible forms (address for the first operand), but it's not tested,
so I put the *untested* parts behind an assert. Otherwise it's very similar to
icmp, so if it starts to be used and tested then the asserts can be taken out,
and the code shared with icmp.

Tighten some of the XMM dispatch/emitters. Most of those XMM instructions
can only encode the variant where dest is a register. Rather than waste
a slot for a NULL method pointer, just make the struct type have two variants
instead of three.

Fill out a couple of XMM instructions which *do* allow mem-ops as dest
(mov instructions).

BUG=none
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/624263002
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 8930a17..18b4b44 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -267,13 +267,15 @@
   addSource(Mem);
 }
 
-InstX8632StoreP::InstX8632StoreP(Cfg *Func, Operand *Value, OperandX8632 *Mem)
+InstX8632StoreP::InstX8632StoreP(Cfg *Func, Variable *Value,
+                                 OperandX8632Mem *Mem)
     : InstX8632(Func, InstX8632::StoreP, 2, NULL) {
   addSource(Value);
   addSource(Mem);
 }
 
-InstX8632StoreQ::InstX8632StoreQ(Cfg *Func, Operand *Value, OperandX8632 *Mem)
+InstX8632StoreQ::InstX8632StoreQ(Cfg *Func, Variable *Value,
+                                 OperandX8632Mem *Mem)
     : InstX8632(Func, InstX8632::StoreQ, 2, NULL) {
   addSource(Value);
   addSource(Mem);
@@ -536,6 +538,27 @@
   emitIASBytes(Str, Asm, StartPosition);
 }
 
+void emitIASAddrOpTyGPR(const Cfg *Func, Type Ty, const x86::Address &Addr,
+                        const Operand *Src,
+                        const x86::AssemblerX86::GPREmitterAddrOp &Emitter) {
+  x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
+  intptr_t StartPosition = Asm->GetPosition();
+  // Src can only be Reg or Immediate.
+  if (const Variable *SrcVar = llvm::dyn_cast<Variable>(Src)) {
+    assert(SrcVar->hasReg());
+    RegX8632::GPRRegister SrcReg =
+        RegX8632::getEncodedByteRegOrGPR(Ty, SrcVar->getRegNum());
+    (Asm->*(Emitter.AddrGPR))(Ty, Addr, SrcReg);
+  } else if (const ConstantInteger32 *Imm =
+                 llvm::dyn_cast<ConstantInteger32>(Src)) {
+    (Asm->*(Emitter.AddrImm))(Ty, Addr, x86::Immediate(Imm->getValue()));
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+  Ostream &Str = Func->getContext()->getStrEmit();
+  emitIASBytes(Str, Asm, StartPosition);
+}
+
 void emitIASGPRShift(const Cfg *Func, Type Ty, const Variable *Var,
                      const Operand *Src,
                      const x86::AssemblerX86::GPREmitterShiftOp &Emitter) {
@@ -595,10 +618,9 @@
   emitIASBytes(Str, Asm, StartPosition);
 }
 
-void
-emitIASVarOperandTyXMM(const Cfg *Func, Type Ty, const Variable *Var,
+void emitIASRegOpTyXMM(const Cfg *Func, Type Ty, const Variable *Var,
                        const Operand *Src,
-                       const x86::AssemblerX86::XmmEmitterTwoOps &Emitter) {
+                       const x86::AssemblerX86::XmmEmitterRegOp &Emitter) {
   x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
   intptr_t StartPosition = Asm->GetPosition();
   assert(Var->hasReg());
@@ -627,6 +649,41 @@
   emitIASBytes(Str, Asm, StartPosition);
 }
 
+void emitIASMovlikeXMM(const Cfg *Func, const Variable *Dest,
+                       const Operand *Src,
+                       const x86::AssemblerX86::XmmEmitterMovOps Emitter) {
+  x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
+  intptr_t StartPosition = Asm->GetPosition();
+  if (Dest->hasReg()) {
+    RegX8632::XmmRegister DestReg = RegX8632::getEncodedXmm(Dest->getRegNum());
+    if (const Variable *SrcVar = llvm::dyn_cast<Variable>(Src)) {
+      if (SrcVar->hasReg()) {
+        (Asm->*(Emitter.XmmXmm))(DestReg,
+                                 RegX8632::getEncodedXmm(SrcVar->getRegNum()));
+      } else {
+        x86::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
+                                   ->stackVarToAsmOperand(SrcVar));
+        (Asm->*(Emitter.XmmAddr))(DestReg, StackAddr);
+      }
+    } else if (const OperandX8632Mem *SrcMem =
+                   llvm::dyn_cast<OperandX8632Mem>(Src)) {
+      (Asm->*(Emitter.XmmAddr))(DestReg, SrcMem->toAsmAddress(Asm));
+    } else {
+      llvm_unreachable("Unexpected operand type");
+    }
+  } else {
+    x86::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
+                               ->stackVarToAsmOperand(Dest));
+    // Src must be a register in this case.
+    const Variable *SrcVar = llvm::cast<Variable>(Src);
+    assert(SrcVar->hasReg());
+    (Asm->*(Emitter.AddrXmm))(StackAddr,
+                              RegX8632::getEncodedXmm(SrcVar->getRegNum()));
+  }
+  Ostream &Str = Func->getContext()->getStrEmit();
+  emitIASBytes(Str, Asm, StartPosition);
+}
+
 bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) {
   const Variable *Src = llvm::dyn_cast<const Variable>(Source);
   if (Src == NULL)
@@ -692,7 +749,7 @@
 template <> const char *InstX8632Psra::Opcode = "psra";
 template <> const char *InstX8632Pcmpeq::Opcode = "pcmpeq";
 template <> const char *InstX8632Pcmpgt::Opcode = "pcmpgt";
-template <> const char *InstX8632Movss::Opcode = "movss";
+template <> const char *InstX8632MovssRegs::Opcode = "movss";
 // Ternary ops
 template <> const char *InstX8632Insertps::Opcode = "insertps";
 template <> const char *InstX8632Shufps::Opcode = "shufps";
@@ -724,8 +781,8 @@
 
 // Unary XMM ops
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Sqrtss::Emitter = {
-    &x86::AssemblerX86::sqrtss, &x86::AssemblerX86::sqrtss, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Sqrtss::Emitter = {
+    &x86::AssemblerX86::sqrtss, &x86::AssemblerX86::sqrtss};
 
 // Binary GPR ops
 template <>
@@ -766,59 +823,59 @@
 
 // Binary XMM ops
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Addss::Emitter = {
-    &x86::AssemblerX86::addss, &x86::AssemblerX86::addss, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Addss::Emitter = {
+    &x86::AssemblerX86::addss, &x86::AssemblerX86::addss};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Addps::Emitter = {
-    &x86::AssemblerX86::addps, &x86::AssemblerX86::addps, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Addps::Emitter = {
+    &x86::AssemblerX86::addps, &x86::AssemblerX86::addps};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Divss::Emitter = {
-    &x86::AssemblerX86::divss, &x86::AssemblerX86::divss, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Divss::Emitter = {
+    &x86::AssemblerX86::divss, &x86::AssemblerX86::divss};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Divps::Emitter = {
-    &x86::AssemblerX86::divps, &x86::AssemblerX86::divps, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Divps::Emitter = {
+    &x86::AssemblerX86::divps, &x86::AssemblerX86::divps};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Mulss::Emitter = {
-    &x86::AssemblerX86::mulss, &x86::AssemblerX86::mulss, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Mulss::Emitter = {
+    &x86::AssemblerX86::mulss, &x86::AssemblerX86::mulss};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Mulps::Emitter = {
-    &x86::AssemblerX86::mulps, &x86::AssemblerX86::mulps, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Mulps::Emitter = {
+    &x86::AssemblerX86::mulps, &x86::AssemblerX86::mulps};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Padd::Emitter = {
-    &x86::AssemblerX86::padd, &x86::AssemblerX86::padd, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Padd::Emitter = {
+    &x86::AssemblerX86::padd, &x86::AssemblerX86::padd};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Pand::Emitter = {
-    &x86::AssemblerX86::pand, &x86::AssemblerX86::pand, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pand::Emitter = {
+    &x86::AssemblerX86::pand, &x86::AssemblerX86::pand};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Pandn::Emitter = {
-    &x86::AssemblerX86::pandn, &x86::AssemblerX86::pandn, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pandn::Emitter = {
+    &x86::AssemblerX86::pandn, &x86::AssemblerX86::pandn};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Pcmpeq::Emitter = {
-    &x86::AssemblerX86::pcmpeq, &x86::AssemblerX86::pcmpeq, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pcmpeq::Emitter = {
+    &x86::AssemblerX86::pcmpeq, &x86::AssemblerX86::pcmpeq};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Pcmpgt::Emitter = {
-    &x86::AssemblerX86::pcmpgt, &x86::AssemblerX86::pcmpgt, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pcmpgt::Emitter = {
+    &x86::AssemblerX86::pcmpgt, &x86::AssemblerX86::pcmpgt};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Pmull::Emitter = {
-    &x86::AssemblerX86::pmull, &x86::AssemblerX86::pmull, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pmull::Emitter = {
+    &x86::AssemblerX86::pmull, &x86::AssemblerX86::pmull};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Pmuludq::Emitter = {
-    &x86::AssemblerX86::pmuludq, &x86::AssemblerX86::pmuludq, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pmuludq::Emitter = {
+    &x86::AssemblerX86::pmuludq, &x86::AssemblerX86::pmuludq};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Por::Emitter = {
-    &x86::AssemblerX86::por, &x86::AssemblerX86::por, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Por::Emitter = {
+    &x86::AssemblerX86::por, &x86::AssemblerX86::por};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Psub::Emitter = {
-    &x86::AssemblerX86::psub, &x86::AssemblerX86::psub, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Psub::Emitter = {
+    &x86::AssemblerX86::psub, &x86::AssemblerX86::psub};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Pxor::Emitter = {
-    &x86::AssemblerX86::pxor, &x86::AssemblerX86::pxor, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pxor::Emitter = {
+    &x86::AssemblerX86::pxor, &x86::AssemblerX86::pxor};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Subss::Emitter = {
-    &x86::AssemblerX86::subss, &x86::AssemblerX86::subss, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Subss::Emitter = {
+    &x86::AssemblerX86::subss, &x86::AssemblerX86::subss};
 template <>
-const x86::AssemblerX86::XmmEmitterTwoOps InstX8632Subps::Emitter = {
-    &x86::AssemblerX86::subps, &x86::AssemblerX86::subps, NULL};
+const x86::AssemblerX86::XmmEmitterRegOp InstX8632Subps::Emitter = {
+    &x86::AssemblerX86::subps, &x86::AssemblerX86::subps};
 
 // Binary XMM Shift ops
 template <>
@@ -886,7 +943,7 @@
   assert(InstructionSetIsValid);
   assert(getSrcSize() == 2);
   Type ElementTy = typeElementType(Ty);
-  emitIASVarOperandTyXMM(Func, ElementTy, getDest(), getSrc(1), Emitter);
+  emitIASRegOpTyXMM(Func, ElementTy, getDest(), getSrc(1), Emitter);
 }
 
 template <> void InstX8632Subss::emit(const Cfg *Func) const {
@@ -1364,6 +1421,31 @@
   Str << "\n";
 }
 
+void InstX8632Icmp::emitIAS(const Cfg *Func) const {
+  assert(getSrcSize() == 2);
+  const Operand *Src0 = getSrc(0);
+  const Operand *Src1 = getSrc(1);
+  Type Ty = Src0->getType();
+  static const x86::AssemblerX86::GPREmitterRegOp RegEmitter = {
+      &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp,
+      &x86::AssemblerX86::cmp};
+  static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = {
+      &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp};
+  if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
+    if (SrcVar0->hasReg()) {
+      emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
+    } else {
+      x86::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
+                                 ->stackVarToAsmOperand(SrcVar0));
+      emitIASAddrOpTyGPR(Func, Ty, StackAddr, Src1, AddrEmitter);
+    }
+  } else if (const OperandX8632Mem *SrcMem0 =
+                 llvm::dyn_cast<OperandX8632Mem>(Src0)) {
+    x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
+    emitIASAddrOpTyGPR(Func, Ty, SrcMem0->toAsmAddress(Asm), Src1, AddrEmitter);
+  }
+}
+
 void InstX8632Icmp::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "cmp." << getSrc(0)->getType() << " ";
@@ -1388,9 +1470,9 @@
   assert(llvm::isa<Variable>(getSrc(0)));
   const Variable *Src0 = llvm::cast<Variable>(getSrc(0));
   Type Ty = Src0->getType();
-  const static x86::AssemblerX86::XmmEmitterTwoOps Emitter = {
-      &x86::AssemblerX86::ucomiss, &x86::AssemblerX86::ucomiss, NULL};
-  emitIASVarOperandTyXMM(Func, Ty, Src0, getSrc(1), Emitter);
+  const static x86::AssemblerX86::XmmEmitterRegOp Emitter = {
+      &x86::AssemblerX86::ucomiss, &x86::AssemblerX86::ucomiss};
+  emitIASRegOpTyXMM(Func, Ty, Src0, getSrc(1), Emitter);
 }
 
 void InstX8632Ucomiss::dump(const Cfg *Func) const {
@@ -1405,6 +1487,14 @@
   Str << "\tud2\n";
 }
 
+void InstX8632UD2::emitIAS(const Cfg *Func) const {
+  x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
+  intptr_t StartPosition = Asm->GetPosition();
+  Asm->ud2();
+  Ostream &Str = Func->getContext()->getStrEmit();
+  emitIASBytes(Str, Asm, StartPosition);
+}
+
 void InstX8632UD2::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "ud2\n";
@@ -1420,6 +1510,33 @@
   Str << "\n";
 }
 
+void InstX8632Test::emitIAS(const Cfg *Func) const {
+  assert(getSrcSize() == 2);
+  const Operand *Src0 = getSrc(0);
+  const Operand *Src1 = getSrc(1);
+  Type Ty = Src0->getType();
+  // The Reg/Addr form of test is not encodeable.
+  static const x86::AssemblerX86::GPREmitterRegOp RegEmitter = {
+      &x86::AssemblerX86::test, NULL, &x86::AssemblerX86::test};
+  static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = {
+      &x86::AssemblerX86::test, &x86::AssemblerX86::test};
+  if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
+    if (SrcVar0->hasReg()) {
+      emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
+    } else {
+      llvm_unreachable("Nothing actually generates this so it's untested");
+      x86::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
+                                 ->stackVarToAsmOperand(SrcVar0));
+      emitIASAddrOpTyGPR(Func, Ty, StackAddr, Src1, AddrEmitter);
+    }
+  } else if (const OperandX8632Mem *SrcMem0 =
+                 llvm::dyn_cast<OperandX8632Mem>(Src0)) {
+    llvm_unreachable("Nothing actually generates this so it's untested");
+    x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
+    emitIASAddrOpTyGPR(Func, Ty, SrcMem0->toAsmAddress(Asm), Src1, AddrEmitter);
+  }
+}
+
 void InstX8632Test::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "test." << getSrc(0)->getType() << " ";
@@ -1474,6 +1591,19 @@
   Str << "\n";
 }
 
+void InstX8632StoreP::emitIAS(const Cfg *Func) const {
+  x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
+  intptr_t StartPosition = Asm->GetPosition();
+  assert(getSrcSize() == 2);
+  const Variable *Src = llvm::cast<Variable>(getSrc(0));
+  const OperandX8632Mem *DestMem = llvm::cast<OperandX8632Mem>(getSrc(1));
+  assert(Src->hasReg());
+  Asm->movups(DestMem->toAsmAddress(Asm),
+              RegX8632::getEncodedXmm(Src->getRegNum()));
+  Ostream &Str = Func->getContext()->getStrEmit();
+  emitIASBytes(Str, Asm, StartPosition);
+}
+
 void InstX8632StoreP::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "storep." << getSrc(0)->getType() << " ";
@@ -1494,6 +1624,19 @@
   Str << "\n";
 }
 
+void InstX8632StoreQ::emitIAS(const Cfg *Func) const {
+  x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
+  intptr_t StartPosition = Asm->GetPosition();
+  assert(getSrcSize() == 2);
+  const Variable *Src = llvm::cast<Variable>(getSrc(0));
+  const OperandX8632Mem *DestMem = llvm::cast<OperandX8632Mem>(getSrc(1));
+  assert(Src->hasReg());
+  Asm->movq(DestMem->toAsmAddress(Asm),
+            RegX8632::getEncodedXmm(Src->getRegNum()));
+  Ostream &Str = Func->getContext()->getStrEmit();
+  emitIASBytes(Str, Asm, StartPosition);
+}
+
 void InstX8632StoreQ::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "storeq." << getSrc(0)->getType() << " ";
@@ -1621,6 +1764,17 @@
   Str << "\n";
 }
 
+template <> void InstX8632Movp::emitIAS(const Cfg *Func) const {
+  assert(getSrcSize() == 1);
+  assert(isVectorType(getDest()->getType()));
+  const Variable *Dest = getDest();
+  const Operand *Src = getSrc(0);
+  const static x86::AssemblerX86::XmmEmitterMovOps Emitter = {
+      &x86::AssemblerX86::movups, &x86::AssemblerX86::movups,
+      &x86::AssemblerX86::movups};
+  emitIASMovlikeXMM(Func, Dest, Src, Emitter);
+}
+
 template <> void InstX8632Movq::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 1);
@@ -1633,6 +1787,34 @@
   Str << "\n";
 }
 
+template <> void InstX8632Movq::emitIAS(const Cfg *Func) const {
+  assert(getSrcSize() == 1);
+  assert(getDest()->getType() == IceType_i64 ||
+         getDest()->getType() == IceType_f64);
+  const Variable *Dest = getDest();
+  const Operand *Src = getSrc(0);
+  const static x86::AssemblerX86::XmmEmitterMovOps Emitter = {
+      &x86::AssemblerX86::movq, &x86::AssemblerX86::movq,
+      &x86::AssemblerX86::movq};
+  emitIASMovlikeXMM(Func, Dest, Src, Emitter);
+}
+
+template <> void InstX8632MovssRegs::emitIAS(const Cfg *Func) const {
+  // This is Binop variant is only intended to be used for reg-reg moves
+  // where part of the Dest register is untouched.
+  assert(getSrcSize() == 2);
+  const Variable *Dest = getDest();
+  assert(Dest == getSrc(0));
+  const Variable *Src = llvm::cast<Variable>(getSrc(1));
+  assert(Dest->hasReg() && Src->hasReg());
+  x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
+  intptr_t StartPosition = Asm->GetPosition();
+  Asm->movss(RegX8632::getEncodedXmm(Dest->getRegNum()),
+             RegX8632::getEncodedXmm(Src->getRegNum()));
+  Ostream &Str = Func->getContext()->getStrEmit();
+  emitIASBytes(Str, Asm, StartPosition);
+}
+
 void InstX8632Movsx::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 1);
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 8bac6c7..5449c78 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -200,7 +200,7 @@
     Movd,
     Movp,
     Movq,
-    Movss,
+    MovssRegs,
     Movsx,
     Movzx,
     Mul,
@@ -521,9 +521,9 @@
   static const x86::AssemblerX86::GPREmitterRegOp Emitter;
 };
 
-void emitIASVarOperandTyXMM(const Cfg *Func, Type Ty, const Variable *Var,
-                            const Operand *Src,
-                            const x86::AssemblerX86::XmmEmitterTwoOps &Emitter);
+void emitIASRegOpTyXMM(const Cfg *Func, Type Ty, const Variable *Var,
+                       const Operand *Src,
+                       const x86::AssemblerX86::XmmEmitterRegOp &Emitter);
 
 template <InstX8632::InstKindX8632 K>
 class InstX8632UnaryopXmm : public InstX8632 {
@@ -544,7 +544,7 @@
   void emitIAS(const Cfg *Func) const override {
     Type Ty = getDest()->getType();
     assert(getSrcSize() == 1);
-    emitIASVarOperandTyXMM(Func, Ty, getDest(), getSrc(0), Emitter);
+    emitIASRegOpTyXMM(Func, Ty, getDest(), getSrc(0), Emitter);
   }
   void dump(const Cfg *Func) const override {
     Ostream &Str = Func->getContext()->getStrDump();
@@ -563,7 +563,7 @@
   InstX8632UnaryopXmm &operator=(const InstX8632UnaryopXmm &) = delete;
   ~InstX8632UnaryopXmm() override {}
   static const char *Opcode;
-  static const x86::AssemblerX86::XmmEmitterTwoOps Emitter;
+  static const x86::AssemblerX86::XmmEmitterRegOp Emitter;
 };
 
 // See the definition of emitTwoAddress() for a description of
@@ -571,37 +571,6 @@
 void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func,
                     bool ShiftHack = false);
 
-template <InstX8632::InstKindX8632 K> class InstX8632Binop : public InstX8632 {
-public:
-  // Create a binary-op instruction (not yet migrated to integrated assembler)
-  static InstX8632Binop *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX8632Binop>())
-        InstX8632Binop(Func, Dest, Source);
-  }
-  void emit(const Cfg *Func) const override {
-    const bool ShiftHack = false;
-    emitTwoAddress(Opcode, this, Func, ShiftHack);
-  }
-  void dump(const Cfg *Func) const override {
-    Ostream &Str = Func->getContext()->getStrDump();
-    dumpDest(Func);
-    Str << " = " << Opcode << "." << getDest()->getType() << " ";
-    dumpSources(Func);
-  }
-  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
-
-private:
-  InstX8632Binop(Cfg *Func, Variable *Dest, Operand *Source)
-      : InstX8632(Func, K, 2, Dest) {
-    addSource(Dest);
-    addSource(Source);
-  }
-  InstX8632Binop(const InstX8632Binop &) = delete;
-  InstX8632Binop &operator=(const InstX8632Binop &) = delete;
-  ~InstX8632Binop() override {}
-  static const char *Opcode;
-};
-
 void emitIASGPRShift(const Cfg *Func, Type Ty, const Variable *Var,
                      const Operand *Src,
                      const x86::AssemblerX86::GPREmitterShiftOp &Emitter);
@@ -700,7 +669,7 @@
     if (NeedsElementType)
       Ty = typeElementType(Ty);
     assert(getSrcSize() == 2);
-    emitIASVarOperandTyXMM(Func, Ty, getDest(), getSrc(1), Emitter);
+    emitIASRegOpTyXMM(Func, Ty, getDest(), getSrc(1), Emitter);
   }
   void dump(const Cfg *Func) const override {
     Ostream &Str = Func->getContext()->getStrDump();
@@ -720,7 +689,7 @@
   InstX8632BinopXmm &operator=(const InstX8632BinopXmm &) = delete;
   ~InstX8632BinopXmm() override {}
   static const char *Opcode;
-  static const x86::AssemblerX86::XmmEmitterTwoOps Emitter;
+  static const x86::AssemblerX86::XmmEmitterRegOp Emitter;
 };
 
 void emitIASXmmShift(const Cfg *Func, Type Ty, const Variable *Var,
@@ -866,6 +835,7 @@
   }
   bool isSimpleAssign() const override { return true; }
   void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override { emit(Func); }
   void dump(const Cfg *Func) const override {
     Ostream &Str = Func->getContext()->getStrDump();
     Str << Opcode << "." << getDest()->getType() << " ";
@@ -935,13 +905,14 @@
 typedef InstX8632BinopXmmShift<InstX8632::Psra> InstX8632Psra;
 typedef InstX8632BinopXmm<InstX8632::Pcmpeq, true> InstX8632Pcmpeq;
 typedef InstX8632BinopXmm<InstX8632::Pcmpgt, true> InstX8632Pcmpgt;
-// TODO: movss is only a binary operation when the source and dest
-// operands are both registers.  In other cases, it behaves like a copy
-// (mov-like) operation.  Eventually, InstX8632Movss should assert that
-// both its source and dest operands are registers, and the lowering
-// code should use _mov instead of _movss in cases where a copy
-// operation is intended.
-typedef InstX8632Binop<InstX8632::Movss> InstX8632Movss;
+// movss is only a binary operation when the source and dest
+// operands are both registers (the high bits of dest are left untouched).
+// In other cases, it behaves like a copy (mov-like) operation (and the
+// high bits of dest are cleared).
+// InstX8632Movss will assert that both its source and dest operands are
+// registers, so the lowering code should use _mov instead of _movss
+// in cases where a copy operation is intended.
+typedef InstX8632BinopXmm<InstX8632::MovssRegs, false> InstX8632MovssRegs;
 typedef InstX8632Ternop<InstX8632::Idiv> InstX8632Idiv;
 typedef InstX8632Ternop<InstX8632::Div> InstX8632Div;
 typedef InstX8632Ternop<InstX8632::Insertps> InstX8632Insertps;
@@ -1163,6 +1134,7 @@
         InstX8632Icmp(Func, Src1, Src2);
   }
   void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Icmp); }
 
@@ -1199,6 +1171,7 @@
     return new (Func->allocate<InstX8632UD2>()) InstX8632UD2(Func);
   }
   void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, UD2); }
 
@@ -1217,6 +1190,7 @@
         InstX8632Test(Func, Source1, Source2);
   }
   void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Test); }
 
@@ -1265,38 +1239,43 @@
   ~InstX8632Store() override {}
 };
 
+// This is essentially a vector "mov" instruction with an OperandX8632Mem
+// operand instead of Variable as the destination.  It's important
+// for liveness that there is no Dest operand. The source must be an
+// Xmm register, since Dest is mem.
 class InstX8632StoreP : public InstX8632 {
 public:
-  static InstX8632StoreP *create(Cfg *Func, Operand *Value, OperandX8632 *Mem) {
+  static InstX8632StoreP *create(Cfg *Func, Variable *Value,
+                                 OperandX8632Mem *Mem) {
     return new (Func->allocate<InstX8632StoreP>())
         InstX8632StoreP(Func, Value, Mem);
   }
   void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, StoreP); }
 
 private:
-  InstX8632StoreP(Cfg *Func, Operand *Value, OperandX8632 *Mem);
+  InstX8632StoreP(Cfg *Func, Variable *Value, OperandX8632Mem *Mem);
   InstX8632StoreP(const InstX8632StoreP &) = delete;
   InstX8632StoreP &operator=(const InstX8632StoreP &) = delete;
   ~InstX8632StoreP() override {}
 };
 
-// This is essentially a "movq" instruction with an OperandX8632Mem
-// operand instead of Variable as the destination.  It's important
-// for liveness that there is no Dest operand.
 class InstX8632StoreQ : public InstX8632 {
 public:
-  static InstX8632StoreQ *create(Cfg *Func, Operand *Value, OperandX8632 *Mem) {
+  static InstX8632StoreQ *create(Cfg *Func, Variable *Value,
+                                 OperandX8632Mem *Mem) {
     return new (Func->allocate<InstX8632StoreQ>())
         InstX8632StoreQ(Func, Value, Mem);
   }
   void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, StoreQ); }
 
 private:
-  InstX8632StoreQ(Cfg *Func, Operand *Value, OperandX8632 *Mem);
+  InstX8632StoreQ(Cfg *Func, Variable *Value, OperandX8632Mem *Mem);
   InstX8632StoreQ(const InstX8632StoreQ &) = delete;
   InstX8632StoreQ &operator=(const InstX8632StoreQ &) = delete;
   ~InstX8632StoreQ() override {}
@@ -1535,6 +1514,9 @@
 template <> void InstX8632Imul::emitIAS(const Cfg *Func) const;
 template <> void InstX8632Cbwdq::emitIAS(const Cfg *Func) const;
 template <> void InstX8632Movd::emitIAS(const Cfg *Func) const;
+template <> void InstX8632Movp::emitIAS(const Cfg *Func) const;
+template <> void InstX8632Movq::emitIAS(const Cfg *Func) const;
+template <> void InstX8632MovssRegs::emitIAS(const Cfg *Func) const;
 template <> void InstX8632Pmull::emitIAS(const Cfg *Func) const;
 
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index eb3cb70..f6b305e 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -308,8 +308,8 @@
   void _movq(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Movq::create(Func, Dest, Src0));
   }
-  void _movss(Variable *Dest, Operand *Src0) {
-    Context.insert(InstX8632Movss::create(Func, Dest, Src0));
+  void _movss(Variable *Dest, Variable *Src0) {
+    Context.insert(InstX8632MovssRegs::create(Func, Dest, Src0));
   }
   void _movsx(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Movsx::create(Func, Dest, Src0));
@@ -422,10 +422,10 @@
   void _store(Operand *Value, OperandX8632 *Mem) {
     Context.insert(InstX8632Store::create(Func, Value, Mem));
   }
-  void _storep(Operand *Value, OperandX8632 *Mem) {
+  void _storep(Variable *Value, OperandX8632Mem *Mem) {
     Context.insert(InstX8632StoreP::create(Func, Value, Mem));
   }
-  void _storeq(Operand *Value, OperandX8632 *Mem) {
+  void _storeq(Variable *Value, OperandX8632Mem *Mem) {
     Context.insert(InstX8632StoreQ::create(Func, Value, Mem));
   }
   void _sub(Variable *Dest, Operand *Src0) {
diff --git a/src/assembler_ia32.cpp b/src/assembler_ia32.cpp
index a245ca9..8a2b449 100644
--- a/src/assembler_ia32.cpp
+++ b/src/assembler_ia32.cpp
@@ -340,12 +340,20 @@
   EmitOperand(src, dst);
 }
 
+void AssemblerX86::movq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x7E);
+  EmitRegisterOperand(dst, src);
+}
+
 void AssemblerX86::movq(const Address &dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
   EmitUint8(0x0F);
   EmitUint8(0xD6);
-  EmitOperand(src, Operand(dst));
+  EmitOperand(src, dst);
 }
 
 void AssemblerX86::movq(XmmRegister dst, const Address &src) {
@@ -353,7 +361,7 @@
   EmitUint8(0xF3);
   EmitUint8(0x0F);
   EmitUint8(0x7E);
-  EmitOperand(dst, Operand(src));
+  EmitOperand(dst, src);
 }
 
 void AssemblerX86::addss(Type Ty, XmmRegister dst, XmmRegister src) {
@@ -463,6 +471,13 @@
   EmitXmmRegisterOperand(dst, src);
 }
 
+void AssemblerX86::movups(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x10);
+  EmitRegisterOperand(dst, src);
+}
+
 void AssemblerX86::movups(XmmRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1289,52 +1304,90 @@
   EmitUint8(0xF7);
 }
 
-void AssemblerX86::cmpl(GPRRegister reg, const Immediate &imm) {
+void AssemblerX86::cmp(Type Ty, GPRRegister reg, const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitComplex(BrokenType, 7, Operand(reg), imm);
+  if (isByteSizedType(Ty)) {
+    EmitComplexI8(7, Operand(reg), imm);
+    return;
+  }
+  if (Ty == IceType_i16)
+    EmitOperandSizeOverride();
+  EmitComplex(Ty, 7, Operand(reg), imm);
 }
 
-void AssemblerX86::cmpl(GPRRegister reg0, GPRRegister reg1) {
+void AssemblerX86::cmp(Type Ty, GPRRegister reg0, GPRRegister reg1) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0x3B);
-  EmitOperand(reg0, Operand(reg1));
+  if (Ty == IceType_i16)
+    EmitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    EmitUint8(0x3A);
+  else
+    EmitUint8(0x3B);
+  EmitRegisterOperand(reg0, reg1);
 }
 
-void AssemblerX86::cmpl(GPRRegister reg, const Address &address) {
+void AssemblerX86::cmp(Type Ty, GPRRegister reg, const Address &address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0x3B);
+  if (Ty == IceType_i16)
+    EmitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    EmitUint8(0x3A);
+  else
+    EmitUint8(0x3B);
   EmitOperand(reg, address);
 }
 
-void AssemblerX86::cmpl(const Address &address, GPRRegister reg) {
+void AssemblerX86::cmp(Type Ty, const Address &address, GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0x39);
+  if (Ty == IceType_i16)
+    EmitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    EmitUint8(0x38);
+  else
+    EmitUint8(0x39);
   EmitOperand(reg, address);
 }
 
-void AssemblerX86::cmpl(const Address &address, const Immediate &imm) {
+void AssemblerX86::cmp(Type Ty, const Address &address, const Immediate &imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitComplex(BrokenType, 7, address, imm);
+  if (isByteSizedType(Ty)) {
+    EmitComplexI8(7, address, imm);
+    return;
+  }
+  if (Ty == IceType_i16)
+    EmitOperandSizeOverride();
+  EmitComplex(Ty, 7, address, imm);
 }
 
-void AssemblerX86::cmpb(const Address &address, const Immediate &imm) {
-  assert(imm.is_int8());
+void AssemblerX86::test(Type Ty, GPRRegister reg1, GPRRegister reg2) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0x80);
-  EmitOperand(7, address);
-  EmitUint8(imm.value() & 0xFF);
-}
-
-void AssemblerX86::testl(GPRRegister reg1, GPRRegister reg2) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0x85);
+  if (Ty == IceType_i16)
+    EmitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    EmitUint8(0x84);
+  else
+    EmitUint8(0x85);
   EmitRegisterOperand(reg1, reg2);
 }
 
-void AssemblerX86::testl(GPRRegister reg, const Immediate &immediate) {
+void AssemblerX86::test(Type Ty, const Address &addr, GPRRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (Ty == IceType_i16)
+    EmitOperandSizeOverride();
+  if (isByteSizedType(Ty))
+    EmitUint8(0x84);
+  else
+    EmitUint8(0x85);
+  EmitOperand(reg, addr);
+}
+
+void AssemblerX86::test(Type Ty, GPRRegister reg, const Immediate &immediate) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   // For registers that have a byte variant (EAX, EBX, ECX, and EDX)
   // we only test the byte register to keep the encoding short.
+  // This is legal even if the register had high bits set since
+  // this only sets flags registers based on the "AND" of the two operands,
+  // and the immediate had zeros at those high bits.
   if (immediate.is_uint8() && reg < 4) {
     // Use zero-extended 8-bit immediate.
     if (reg == RegX8632::Encoded_Reg_eax) {
@@ -1346,12 +1399,35 @@
     EmitUint8(immediate.value() & 0xFF);
   } else if (reg == RegX8632::Encoded_Reg_eax) {
     // Use short form if the destination is EAX.
+    if (Ty == IceType_i16)
+      EmitOperandSizeOverride();
     EmitUint8(0xA9);
-    EmitImmediate(BrokenType, immediate);
+    EmitImmediate(Ty, immediate);
   } else {
+    if (Ty == IceType_i16)
+      EmitOperandSizeOverride();
     EmitUint8(0xF7);
-    EmitOperand(0, Operand(reg));
-    EmitImmediate(BrokenType, immediate);
+    EmitRegisterOperand(0, reg);
+    EmitImmediate(Ty, immediate);
+  }
+}
+
+void AssemblerX86::test(Type Ty, const Address &addr,
+                        const Immediate &immediate) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  // If the immediate is short, we only test the byte addr to keep the
+  // encoding short.
+  if (immediate.is_uint8()) {
+    // Use zero-extended 8-bit immediate.
+    EmitUint8(0xF6);
+    EmitOperand(0, addr);
+    EmitUint8(immediate.value() & 0xFF);
+  } else {
+    if (Ty == IceType_i16)
+      EmitOperandSizeOverride();
+    EmitUint8(0xF7);
+    EmitOperand(0, addr);
+    EmitImmediate(Ty, immediate);
   }
 }
 
@@ -2013,6 +2089,12 @@
   EmitUint8(0xF4);
 }
 
+void AssemblerX86::ud2() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x0B);
+}
+
 void AssemblerX86::j(CondX86::BrCond condition, Label *label, bool near) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   if (label->IsBound()) {
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index 9ceab25..fe03134 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
@@ -361,16 +361,31 @@
     // In practice, we always normalize the Dest to a Register first.
   };
 
+  typedef void (AssemblerX86::*TypedEmitAddrGPR)(Type, const Address &,
+                                                 GPRRegister);
+  typedef void (AssemblerX86::*TypedEmitAddrImm)(Type, const Address &,
+                                                 const Immediate &);
+  struct GPREmitterAddrOp {
+    TypedEmitAddrGPR AddrGPR;
+    TypedEmitAddrImm AddrImm;
+  };
+
   // Operations to emit XMM instructions (and dispatch on operand type).
   typedef void (AssemblerX86::*TypedEmitXmmXmm)(Type, XmmRegister, XmmRegister);
   typedef void (AssemblerX86::*TypedEmitXmmAddr)(Type, XmmRegister,
                                                  const Address &);
-  typedef void (AssemblerX86::*TypedEmitAddrXmm)(Type, const Address &,
-                                                 XmmRegister);
-  struct XmmEmitterTwoOps {
+  struct XmmEmitterRegOp {
     TypedEmitXmmXmm XmmXmm;
     TypedEmitXmmAddr XmmAddr;
-    TypedEmitAddrXmm AddrXmm;
+  };
+
+  typedef void (AssemblerX86::*EmitXmmXmm)(XmmRegister, XmmRegister);
+  typedef void (AssemblerX86::*EmitXmmAddr)(XmmRegister, const Address &);
+  typedef void (AssemblerX86::*EmitAddrXmm)(const Address &, XmmRegister);
+  struct XmmEmitterMovOps {
+    EmitXmmXmm XmmXmm;
+    EmitXmmAddr XmmAddr;
+    EmitAddrXmm AddrXmm;
   };
 
   typedef void (AssemblerX86::*TypedEmitXmmImm)(Type, XmmRegister,
@@ -442,6 +457,7 @@
   void movd(GPRRegister dst, XmmRegister src);
   void movd(const Address &dst, XmmRegister src);
 
+  void movq(XmmRegister dst, XmmRegister src);
   void movq(const Address &dst, XmmRegister src);
   void movq(XmmRegister dst, const Address &src);
 
@@ -460,6 +476,7 @@
 
   void movaps(XmmRegister dst, XmmRegister src);
 
+  void movups(XmmRegister dst, XmmRegister src);
   void movups(XmmRegister dst, const Address &src);
   void movups(const Address &dst, XmmRegister src);
 
@@ -591,15 +608,16 @@
 
   void fincstp();
 
-  void cmpl(GPRRegister reg, const Immediate &imm);
-  void cmpl(GPRRegister reg0, GPRRegister reg1);
-  void cmpl(GPRRegister reg, const Address &address);
-  void cmpl(const Address &address, GPRRegister reg);
-  void cmpl(const Address &address, const Immediate &imm);
-  void cmpb(const Address &address, const Immediate &imm);
+  void cmp(Type Ty, GPRRegister reg0, GPRRegister reg1);
+  void cmp(Type Ty, GPRRegister reg, const Address &address);
+  void cmp(Type Ty, GPRRegister reg, const Immediate &imm);
+  void cmp(Type Ty, const Address &address, GPRRegister reg);
+  void cmp(Type Ty, const Address &address, const Immediate &imm);
 
-  void testl(GPRRegister reg1, GPRRegister reg2);
-  void testl(GPRRegister reg, const Immediate &imm);
+  void test(Type Ty, GPRRegister reg0, GPRRegister reg1);
+  void test(Type Ty, GPRRegister reg, const Immediate &imm);
+  void test(Type Ty, const Address &address, GPRRegister reg);
+  void test(Type Ty, const Address &address, const Immediate &imm);
 
   void And(Type Ty, GPRRegister dst, GPRRegister src);
   void And(Type Ty, GPRRegister dst, const Address &address);
@@ -698,6 +716,7 @@
   void nop(int size = 1);
   void int3();
   void hlt();
+  void ud2();
 
   void j(CondX86::BrCond condition, Label *label, bool near = kFarJump);
   void j(CondX86::BrCond condition, const ConstantRelocatable *label);