emitIAS for movsx and movzx.

Force dest to be the full 32-bit reg instead of sometimes being
a 16-bit reg. This is to save on a operand size prefix (and
avoid passing the DestTy down to the dispatchers).

BUG=none
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/647223004
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 6b1e40c..e41fe82 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -287,16 +287,6 @@
   addSource(Mem);
 }
 
-InstX8632Movsx::InstX8632Movsx(Cfg *Func, Variable *Dest, Operand *Source)
-    : InstX8632(Func, InstX8632::Movsx, 1, Dest) {
-  addSource(Source);
-}
-
-InstX8632Movzx::InstX8632Movzx(Cfg *Func, Variable *Dest, Operand *Source)
-    : InstX8632(Func, InstX8632::Movzx, 1, Dest) {
-  addSource(Source);
-}
-
 InstX8632Nop::InstX8632Nop(Cfg *Func, InstX8632Nop::NopVariant Variant)
     : InstX8632(Func, InstX8632::Nop, 0, NULL), Variant(Variant) {}
 
@@ -516,6 +506,7 @@
   emitIASBytes(Func, Asm, StartPosition);
 }
 
+template <bool VarCanBeByte, bool SrcCanBeByte>
 void emitIASRegOpTyGPR(const Cfg *Func, Type Ty, const Variable *Var,
                        const Operand *Src,
                        const x86::AssemblerX86::GPREmitterRegOp &Emitter) {
@@ -524,11 +515,14 @@
   assert(Var->hasReg());
   // We cheat a little and use GPRRegister even for byte operations.
   RegX8632::GPRRegister VarReg =
-      RegX8632::getEncodedByteRegOrGPR(Ty, Var->getRegNum());
+      VarCanBeByte ? RegX8632::getEncodedByteRegOrGPR(Ty, Var->getRegNum())
+                   : RegX8632::getEncodedGPR(Var->getRegNum());
   if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
     if (SrcVar->hasReg()) {
       RegX8632::GPRRegister SrcReg =
-          RegX8632::getEncodedByteRegOrGPR(Ty, SrcVar->getRegNum());
+          SrcCanBeByte
+              ? RegX8632::getEncodedByteRegOrGPR(Ty, SrcVar->getRegNum())
+              : RegX8632::getEncodedGPR(SrcVar->getRegNum());
       (Asm->*(Emitter.GPRGPR))(Ty, VarReg, SrcReg);
     } else {
       x86::Address SrcStackAddr = static_cast<TargetX8632 *>(Func->getTarget())
@@ -799,6 +793,8 @@
 template <> const char *InstX8632Bsr::Opcode = "bsr";
 template <> const char *InstX8632Lea::Opcode = "lea";
 template <> const char *InstX8632Movd::Opcode = "movd";
+template <> const char *InstX8632Movsx::Opcode = "movsx";
+template <> const char *InstX8632Movzx::Opcode = "movzx";
 template <> const char *InstX8632Sqrtss::Opcode = "sqrtss";
 template <> const char *InstX8632Cbwdq::Opcode = "cbw/cwd/cdq";
 // Mov-like ops
@@ -869,6 +865,12 @@
 template <>
 const x86::AssemblerX86::GPREmitterRegOp InstX8632Lea::Emitter = {
     /* reg/reg and reg/imm are illegal */ NULL, &x86::AssemblerX86::lea, NULL};
+template <>
+const x86::AssemblerX86::GPREmitterRegOp InstX8632Movsx::Emitter = {
+    &x86::AssemblerX86::movsx, &x86::AssemblerX86::movsx, NULL};
+template <>
+const x86::AssemblerX86::GPREmitterRegOp InstX8632Movzx::Emitter = {
+    &x86::AssemblerX86::movzx, &x86::AssemblerX86::movzx, NULL};
 
 // Unary XMM ops
 template <>
@@ -2117,42 +2119,27 @@
   emitIASBytes(Func, Asm, StartPosition);
 }
 
-void InstX8632Movsx::emit(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrEmit();
+template <> void InstX8632Movsx::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 1);
-  Str << "\tmovsx\t";
-  getDest()->emit(Func);
-  Str << ", ";
-  getSrc(0)->emit(Func);
-  Str << "\n";
+  const Variable *Dest = getDest();
+  const Operand *Src = getSrc(0);
+  // Dest must be a > 8-bit register, but Src can be 8-bit. In practice
+  // we just use the full register for Dest to avoid having an
+  // OperandSizeOverride prefix. It also allows us to only dispatch on SrcTy.
+  Type SrcTy = Src->getType();
+  assert(typeWidthInBytes(Dest->getType()) > 1);
+  assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
+  emitIASRegOpTyGPR<false, true>(Func, SrcTy, Dest, Src, Emitter);
 }
 
-void InstX8632Movsx::dump(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "movsx." << getDest()->getType() << "." << getSrc(0)->getType();
-  Str << " ";
-  dumpDest(Func);
-  Str << ", ";
-  dumpSources(Func);
-}
-
-void InstX8632Movzx::emit(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrEmit();
+template <> void InstX8632Movzx::emitIAS(const Cfg *Func) const {
   assert(getSrcSize() == 1);
-  Str << "\tmovzx\t";
-  getDest()->emit(Func);
-  Str << ", ";
-  getSrc(0)->emit(Func);
-  Str << "\n";
-}
-
-void InstX8632Movzx::dump(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrDump();
-  Str << "movzx." << getDest()->getType() << "." << getSrc(0)->getType();
-  Str << " ";
-  dumpDest(Func);
-  Str << ", ";
-  dumpSources(Func);
+  const Variable *Dest = getDest();
+  const Operand *Src = getSrc(0);
+  Type SrcTy = Src->getType();
+  assert(typeWidthInBytes(Dest->getType()) > 1);
+  assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
+  emitIASRegOpTyGPR<false, true>(Func, SrcTy, Dest, Src, Emitter);
 }
 
 void InstX8632Nop::emit(const Cfg *Func) const {
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index b57f3ba..8fa4bdb 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -485,11 +485,12 @@
 
 // Emit a two-operand (GPR) instruction, where the dest operand is a
 // Variable that's guaranteed to be a register.
+template <bool VarCanBeByte = true, bool SrcCanBeByte = true>
 void emitIASRegOpTyGPR(const Cfg *Func, Type Ty, const Variable *Dst,
                        const Operand *Src,
                        const x86::AssemblerX86::GPREmitterRegOp &Emitter);
 
-// Instructions of the form x := op(y)
+// Instructions of the form x := op(y).
 template <InstX8632::InstKindX8632 K>
 class InstX8632UnaryopGPR : public InstX8632 {
   InstX8632UnaryopGPR(const InstX8632UnaryopGPR &) = delete;
@@ -519,7 +520,7 @@
   void dump(const Cfg *Func) const override {
     Ostream &Str = Func->getContext()->getStrDump();
     dumpDest(Func);
-    Str << " = " << Opcode << "." << getDest()->getType() << " ";
+    Str << " = " << Opcode << "." << getSrc(0)->getType() << " ";
     dumpSources(Func);
   }
   static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
@@ -886,6 +887,8 @@
 typedef InstX8632UnaryopGPR<InstX8632::Lea> InstX8632Lea;
 // Cbwdq instruction - wrapper for cbw, cwd, and cdq
 typedef InstX8632UnaryopGPR<InstX8632::Cbwdq> InstX8632Cbwdq;
+typedef InstX8632UnaryopGPR<InstX8632::Movsx> InstX8632Movsx;
+typedef InstX8632UnaryopGPR<InstX8632::Movzx> InstX8632Movzx;
 typedef InstX8632UnaryopXmm<InstX8632::Movd> InstX8632Movd;
 typedef InstX8632UnaryopXmm<InstX8632::Sqrtss> InstX8632Sqrtss;
 // Move/assignment instruction - wrapper for mov/movss/movsd.
@@ -1323,46 +1326,6 @@
   ~InstX8632StoreQ() override {}
 };
 
-// Movsx - copy from a narrower integer type to a wider integer
-// type, with sign extension.
-class InstX8632Movsx : public InstX8632 {
-  InstX8632Movsx(const InstX8632Movsx &) = delete;
-  InstX8632Movsx &operator=(const InstX8632Movsx &) = delete;
-
-public:
-  static InstX8632Movsx *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX8632Movsx>())
-        InstX8632Movsx(Func, Dest, Source);
-  }
-  void emit(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Movsx); }
-
-private:
-  InstX8632Movsx(Cfg *Func, Variable *Dest, Operand *Source);
-  ~InstX8632Movsx() override {}
-};
-
-// Movzx - copy from a narrower integer type to a wider integer
-// type, with zero extension.
-class InstX8632Movzx : public InstX8632 {
-  InstX8632Movzx(const InstX8632Movzx &) = delete;
-  InstX8632Movzx &operator=(const InstX8632Movzx &) = delete;
-
-public:
-  static InstX8632Movzx *create(Cfg *Func, Variable *Dest, Operand *Source) {
-    return new (Func->allocate<InstX8632Movzx>())
-        InstX8632Movzx(Func, Dest, Source);
-  }
-  void emit(const Cfg *Func) const override;
-  void dump(const Cfg *Func) const override;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Movzx); }
-
-private:
-  InstX8632Movzx(Cfg *Func, Variable *Dest, Operand *Source);
-  ~InstX8632Movzx() override {}
-};
-
 // Nop instructions of varying length
 class InstX8632Nop : public InstX8632 {
   InstX8632Nop(const InstX8632Nop &) = delete;
@@ -1573,6 +1536,8 @@
 template <> void InstX8632Pblendvb::emitIAS(const Cfg *Func) const;
 template <> void InstX8632Pextr::emitIAS(const Cfg *Func) const;
 template <> void InstX8632Pinsr::emitIAS(const Cfg *Func) const;
+template <> void InstX8632Movsx::emitIAS(const Cfg *Func) const;
+template <> void InstX8632Movzx::emitIAS(const Cfg *Func) const;
 template <> void InstX8632Pmull::emitIAS(const Cfg *Func) const;
 template <> void InstX8632Pshufd::emitIAS(const Cfg *Func) const;
 template <> void InstX8632Shufps::emitIAS(const Cfg *Func) const;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 232acdd..0319768 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2018,8 +2018,16 @@
     } else if (Src0RM->getType() == IceType_i1) {
       // t = Src0RM; t &= 1; Dest = t
       Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
-      Variable *T = makeReg(IceType_i32);
-      _movzx(T, Src0RM);
+      Type DestTy = Dest->getType();
+      Variable *T;
+      if (DestTy == IceType_i8) {
+        T = makeReg(DestTy);
+        _mov(T, Src0RM);
+      } else {
+        // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
+        T = makeReg(IceType_i32);
+        _movzx(T, Src0RM);
+      }
       _and(T, One);
       _mov(Dest, T);
     } else {
diff --git a/src/assembler_ia32.cpp b/src/assembler_ia32.cpp
index 550bc99..a253b8d 100644
--- a/src/assembler_ia32.cpp
+++ b/src/assembler_ia32.cpp
@@ -189,59 +189,39 @@
   }
 }
 
-void AssemblerX86::movzxb(GPRRegister dst, ByteRegister src) {
+void AssemblerX86::movzx(Type SrcTy, GPRRegister dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
   EmitUint8(0x0F);
-  EmitUint8(0xB6);
+  EmitUint8(ByteSized ? 0xB6 : 0xB7);
   EmitRegisterOperand(dst, src);
 }
 
-void AssemblerX86::movzxb(GPRRegister dst, const Address &src) {
+void AssemblerX86::movzx(Type SrcTy, GPRRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
   EmitUint8(0x0F);
-  EmitUint8(0xB6);
+  EmitUint8(ByteSized ? 0xB6 : 0xB7);
   EmitOperand(dst, src);
 }
 
-void AssemblerX86::movsxb(GPRRegister dst, ByteRegister src) {
+void AssemblerX86::movsx(Type SrcTy, GPRRegister dst, GPRRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
   EmitUint8(0x0F);
-  EmitUint8(0xBE);
+  EmitUint8(ByteSized ? 0xBE : 0xBF);
   EmitRegisterOperand(dst, src);
 }
 
-void AssemblerX86::movsxb(GPRRegister dst, const Address &src) {
+void AssemblerX86::movsx(Type SrcTy, GPRRegister dst, const Address &src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  bool ByteSized = isByteSizedType(SrcTy);
+  assert(ByteSized || SrcTy == IceType_i16);
   EmitUint8(0x0F);
-  EmitUint8(0xBE);
-  EmitOperand(dst, src);
-}
-
-void AssemblerX86::movzxw(GPRRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0x0F);
-  EmitUint8(0xB7);
-  EmitRegisterOperand(dst, src);
-}
-
-void AssemblerX86::movzxw(GPRRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0x0F);
-  EmitUint8(0xB7);
-  EmitOperand(dst, src);
-}
-
-void AssemblerX86::movsxw(GPRRegister dst, GPRRegister src) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0x0F);
-  EmitUint8(0xBF);
-  EmitRegisterOperand(dst, src);
-}
-
-void AssemblerX86::movsxw(GPRRegister dst, const Address &src) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0x0F);
-  EmitUint8(0xBF);
+  EmitUint8(ByteSized ? 0xBE : 0xBF);
   EmitOperand(dst, src);
 }
 
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index 5c312fb..e33838d 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
@@ -483,15 +483,10 @@
   void mov(Type Ty, const Address &dst, GPRRegister src);
   void mov(Type Ty, const Address &dst, const Immediate &imm);
 
-  void movzxb(GPRRegister dst, ByteRegister src);
-  void movzxb(GPRRegister dst, const Address &src);
-  void movsxb(GPRRegister dst, ByteRegister src);
-  void movsxb(GPRRegister dst, const Address &src);
-
-  void movzxw(GPRRegister dst, GPRRegister src);
-  void movzxw(GPRRegister dst, const Address &src);
-  void movsxw(GPRRegister dst, GPRRegister src);
-  void movsxw(GPRRegister dst, const Address &src);
+  void movzx(Type Ty, GPRRegister dst, GPRRegister src);
+  void movzx(Type Ty, GPRRegister dst, const Address &src);
+  void movsx(Type Ty, GPRRegister dst, GPRRegister src);
+  void movsx(Type Ty, GPRRegister dst, const Address &src);
 
   void lea(Type Ty, GPRRegister dst, const Address &src);
 
diff --git a/tests_lit/llvm2ice_tests/convert.ll b/tests_lit/llvm2ice_tests/convert.ll
index c349b5d..0a32858 100644
--- a/tests_lit/llvm2ice_tests/convert.ll
+++ b/tests_lit/llvm2ice_tests/convert.ll
@@ -37,7 +37,7 @@
 }
 ; CHECK-LABEL: from_int8
 ; CHECK: mov {{.*}}, byte ptr [
-; CHECK: movsx
+; CHECK: movsx e{{.*}}, {{[a-d]l|byte ptr}}
 ; CHECK: mov word ptr [
 ; CHECK: movsx
 ; CHECK: mov dword ptr [
@@ -66,9 +66,9 @@
 ; CHECK-LABEL: from_int16
 ; CHECK: mov {{.*}}, word ptr [
 ; CHECK: [0]
-; CHECK: movsx
+; CHECK: movsx e{{.*}}, {{.*x|[ds]i|bp|word ptr}}
 ; CHECK: [4]
-; CHECK: movsx
+; CHECK: movsx e{{.*}}, {{.*x|[ds]i|bp|word ptr}}
 ; CHECK: sar {{.*}}, 31
 ; CHECK: [8]
 
@@ -133,7 +133,7 @@
 }
 ; CHECK-LABEL: from_uint8
 ; CHECK: [16]
-; CHECK: movzx
+; CHECK: movzx e{{.*}}, {{[a-d]l|byte ptr}}
 ; CHECK: [2]
 ; CHECK: movzx
 ; CHECK: [4]
@@ -159,9 +159,9 @@
 ; CHECK-LABEL: from_uint16
 ; CHECK: [18]
 ; CHECK: [0]
-; CHECK: movzx
+; CHECK: movzx e{{.*}}, {{.*x|[ds]i|bp|word ptr}}
 ; CHECK: [4]
-; CHECK: movzx
+; CHECK: movzx e{{.*}}, {{.*x|[ds]i|bp|word ptr}}
 ; CHECK: mov {{.*}}, 0
 ; CHECK: [8]
 
diff --git a/tests_lit/llvm2ice_tests/test_i1.ll b/tests_lit/llvm2ice_tests/test_i1.ll
index 697f0d1..c5d5ee5 100644
--- a/tests_lit/llvm2ice_tests/test_i1.ll
+++ b/tests_lit/llvm2ice_tests/test_i1.ll
@@ -63,8 +63,7 @@
 ; CHECK-LABEL: testZextI8
 ; match the trunc instruction
 ; CHECK: and {{.*}}, 1
-; match the zext i1 instruction
-; CHECK: movzx
+; match the zext i1 instruction (NOTE: no mov need between i1 and i8).
 ; CHECK: and {{.*}}, 1
 
 ; Test zext to i16.
@@ -78,9 +77,9 @@
 ; CHECK-LABEL: testZextI16
 ; match the trunc instruction
 ; CHECK: and {{.*}}, 1
-; match the zext i1 instruction
-; CHECK: movzx
-; CHECK: and {{.*}}, 1
+; match the zext i1 instruction (note 32-bit reg is used because it's shorter).
+; CHECK: movzx [[REG:e.*]], {{[a-d]l|byte ptr}}
+; CHECK: and [[REG]], 1
 
 ; Test zext to i32.
 define internal i32 @testZextI32(i32 %arg) {
@@ -138,7 +137,7 @@
 ; match the trunc instruction
 ; CHECK: and {{.*}}, 1
 ; match the sext i1 instruction
-; CHECK: movzx [[REG:.*]],
+; CHECK: movzx e[[REG:.*]], {{[a-d]l|byte ptr}}
 ; CHECK-NEXT: shl [[REG]], 15
 ; CHECK-NEXT: sar [[REG]], 15