emitIAS for movsx and movzx.
Force dest to be the full 32-bit reg instead of sometimes being
a 16-bit reg. This is to save on a operand size prefix (and
avoid passing the DestTy down to the dispatchers).
BUG=none
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/647223004
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 6b1e40c..e41fe82 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -287,16 +287,6 @@
addSource(Mem);
}
-InstX8632Movsx::InstX8632Movsx(Cfg *Func, Variable *Dest, Operand *Source)
- : InstX8632(Func, InstX8632::Movsx, 1, Dest) {
- addSource(Source);
-}
-
-InstX8632Movzx::InstX8632Movzx(Cfg *Func, Variable *Dest, Operand *Source)
- : InstX8632(Func, InstX8632::Movzx, 1, Dest) {
- addSource(Source);
-}
-
InstX8632Nop::InstX8632Nop(Cfg *Func, InstX8632Nop::NopVariant Variant)
: InstX8632(Func, InstX8632::Nop, 0, NULL), Variant(Variant) {}
@@ -516,6 +506,7 @@
emitIASBytes(Func, Asm, StartPosition);
}
+template <bool VarCanBeByte, bool SrcCanBeByte>
void emitIASRegOpTyGPR(const Cfg *Func, Type Ty, const Variable *Var,
const Operand *Src,
const x86::AssemblerX86::GPREmitterRegOp &Emitter) {
@@ -524,11 +515,14 @@
assert(Var->hasReg());
// We cheat a little and use GPRRegister even for byte operations.
RegX8632::GPRRegister VarReg =
- RegX8632::getEncodedByteRegOrGPR(Ty, Var->getRegNum());
+ VarCanBeByte ? RegX8632::getEncodedByteRegOrGPR(Ty, Var->getRegNum())
+ : RegX8632::getEncodedGPR(Var->getRegNum());
if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
if (SrcVar->hasReg()) {
RegX8632::GPRRegister SrcReg =
- RegX8632::getEncodedByteRegOrGPR(Ty, SrcVar->getRegNum());
+ SrcCanBeByte
+ ? RegX8632::getEncodedByteRegOrGPR(Ty, SrcVar->getRegNum())
+ : RegX8632::getEncodedGPR(SrcVar->getRegNum());
(Asm->*(Emitter.GPRGPR))(Ty, VarReg, SrcReg);
} else {
x86::Address SrcStackAddr = static_cast<TargetX8632 *>(Func->getTarget())
@@ -799,6 +793,8 @@
template <> const char *InstX8632Bsr::Opcode = "bsr";
template <> const char *InstX8632Lea::Opcode = "lea";
template <> const char *InstX8632Movd::Opcode = "movd";
+template <> const char *InstX8632Movsx::Opcode = "movsx";
+template <> const char *InstX8632Movzx::Opcode = "movzx";
template <> const char *InstX8632Sqrtss::Opcode = "sqrtss";
template <> const char *InstX8632Cbwdq::Opcode = "cbw/cwd/cdq";
// Mov-like ops
@@ -869,6 +865,12 @@
template <>
const x86::AssemblerX86::GPREmitterRegOp InstX8632Lea::Emitter = {
/* reg/reg and reg/imm are illegal */ NULL, &x86::AssemblerX86::lea, NULL};
+template <>
+const x86::AssemblerX86::GPREmitterRegOp InstX8632Movsx::Emitter = {
+ &x86::AssemblerX86::movsx, &x86::AssemblerX86::movsx, NULL};
+template <>
+const x86::AssemblerX86::GPREmitterRegOp InstX8632Movzx::Emitter = {
+ &x86::AssemblerX86::movzx, &x86::AssemblerX86::movzx, NULL};
// Unary XMM ops
template <>
@@ -2117,42 +2119,27 @@
emitIASBytes(Func, Asm, StartPosition);
}
-void InstX8632Movsx::emit(const Cfg *Func) const {
- Ostream &Str = Func->getContext()->getStrEmit();
+template <> void InstX8632Movsx::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
- Str << "\tmovsx\t";
- getDest()->emit(Func);
- Str << ", ";
- getSrc(0)->emit(Func);
- Str << "\n";
+ const Variable *Dest = getDest();
+ const Operand *Src = getSrc(0);
+ // Dest must be a > 8-bit register, but Src can be 8-bit. In practice
+ // we just use the full register for Dest to avoid having an
+ // OperandSizeOverride prefix. It also allows us to only dispatch on SrcTy.
+ Type SrcTy = Src->getType();
+ assert(typeWidthInBytes(Dest->getType()) > 1);
+ assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
+ emitIASRegOpTyGPR<false, true>(Func, SrcTy, Dest, Src, Emitter);
}
-void InstX8632Movsx::dump(const Cfg *Func) const {
- Ostream &Str = Func->getContext()->getStrDump();
- Str << "movsx." << getDest()->getType() << "." << getSrc(0)->getType();
- Str << " ";
- dumpDest(Func);
- Str << ", ";
- dumpSources(Func);
-}
-
-void InstX8632Movzx::emit(const Cfg *Func) const {
- Ostream &Str = Func->getContext()->getStrEmit();
+template <> void InstX8632Movzx::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
- Str << "\tmovzx\t";
- getDest()->emit(Func);
- Str << ", ";
- getSrc(0)->emit(Func);
- Str << "\n";
-}
-
-void InstX8632Movzx::dump(const Cfg *Func) const {
- Ostream &Str = Func->getContext()->getStrDump();
- Str << "movzx." << getDest()->getType() << "." << getSrc(0)->getType();
- Str << " ";
- dumpDest(Func);
- Str << ", ";
- dumpSources(Func);
+ const Variable *Dest = getDest();
+ const Operand *Src = getSrc(0);
+ Type SrcTy = Src->getType();
+ assert(typeWidthInBytes(Dest->getType()) > 1);
+ assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
+ emitIASRegOpTyGPR<false, true>(Func, SrcTy, Dest, Src, Emitter);
}
void InstX8632Nop::emit(const Cfg *Func) const {
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index b57f3ba..8fa4bdb 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -485,11 +485,12 @@
// Emit a two-operand (GPR) instruction, where the dest operand is a
// Variable that's guaranteed to be a register.
+template <bool VarCanBeByte = true, bool SrcCanBeByte = true>
void emitIASRegOpTyGPR(const Cfg *Func, Type Ty, const Variable *Dst,
const Operand *Src,
const x86::AssemblerX86::GPREmitterRegOp &Emitter);
-// Instructions of the form x := op(y)
+// Instructions of the form x := op(y).
template <InstX8632::InstKindX8632 K>
class InstX8632UnaryopGPR : public InstX8632 {
InstX8632UnaryopGPR(const InstX8632UnaryopGPR &) = delete;
@@ -519,7 +520,7 @@
void dump(const Cfg *Func) const override {
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
- Str << " = " << Opcode << "." << getDest()->getType() << " ";
+ Str << " = " << Opcode << "." << getSrc(0)->getType() << " ";
dumpSources(Func);
}
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
@@ -886,6 +887,8 @@
typedef InstX8632UnaryopGPR<InstX8632::Lea> InstX8632Lea;
// Cbwdq instruction - wrapper for cbw, cwd, and cdq
typedef InstX8632UnaryopGPR<InstX8632::Cbwdq> InstX8632Cbwdq;
+typedef InstX8632UnaryopGPR<InstX8632::Movsx> InstX8632Movsx;
+typedef InstX8632UnaryopGPR<InstX8632::Movzx> InstX8632Movzx;
typedef InstX8632UnaryopXmm<InstX8632::Movd> InstX8632Movd;
typedef InstX8632UnaryopXmm<InstX8632::Sqrtss> InstX8632Sqrtss;
// Move/assignment instruction - wrapper for mov/movss/movsd.
@@ -1323,46 +1326,6 @@
~InstX8632StoreQ() override {}
};
-// Movsx - copy from a narrower integer type to a wider integer
-// type, with sign extension.
-class InstX8632Movsx : public InstX8632 {
- InstX8632Movsx(const InstX8632Movsx &) = delete;
- InstX8632Movsx &operator=(const InstX8632Movsx &) = delete;
-
-public:
- static InstX8632Movsx *create(Cfg *Func, Variable *Dest, Operand *Source) {
- return new (Func->allocate<InstX8632Movsx>())
- InstX8632Movsx(Func, Dest, Source);
- }
- void emit(const Cfg *Func) const override;
- void dump(const Cfg *Func) const override;
- static bool classof(const Inst *Inst) { return isClassof(Inst, Movsx); }
-
-private:
- InstX8632Movsx(Cfg *Func, Variable *Dest, Operand *Source);
- ~InstX8632Movsx() override {}
-};
-
-// Movzx - copy from a narrower integer type to a wider integer
-// type, with zero extension.
-class InstX8632Movzx : public InstX8632 {
- InstX8632Movzx(const InstX8632Movzx &) = delete;
- InstX8632Movzx &operator=(const InstX8632Movzx &) = delete;
-
-public:
- static InstX8632Movzx *create(Cfg *Func, Variable *Dest, Operand *Source) {
- return new (Func->allocate<InstX8632Movzx>())
- InstX8632Movzx(Func, Dest, Source);
- }
- void emit(const Cfg *Func) const override;
- void dump(const Cfg *Func) const override;
- static bool classof(const Inst *Inst) { return isClassof(Inst, Movzx); }
-
-private:
- InstX8632Movzx(Cfg *Func, Variable *Dest, Operand *Source);
- ~InstX8632Movzx() override {}
-};
-
// Nop instructions of varying length
class InstX8632Nop : public InstX8632 {
InstX8632Nop(const InstX8632Nop &) = delete;
@@ -1573,6 +1536,8 @@
template <> void InstX8632Pblendvb::emitIAS(const Cfg *Func) const;
template <> void InstX8632Pextr::emitIAS(const Cfg *Func) const;
template <> void InstX8632Pinsr::emitIAS(const Cfg *Func) const;
+template <> void InstX8632Movsx::emitIAS(const Cfg *Func) const;
+template <> void InstX8632Movzx::emitIAS(const Cfg *Func) const;
template <> void InstX8632Pmull::emitIAS(const Cfg *Func) const;
template <> void InstX8632Pshufd::emitIAS(const Cfg *Func) const;
template <> void InstX8632Shufps::emitIAS(const Cfg *Func) const;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 232acdd..0319768 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2018,8 +2018,16 @@
} else if (Src0RM->getType() == IceType_i1) {
// t = Src0RM; t &= 1; Dest = t
Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
- Variable *T = makeReg(IceType_i32);
- _movzx(T, Src0RM);
+ Type DestTy = Dest->getType();
+ Variable *T;
+ if (DestTy == IceType_i8) {
+ T = makeReg(DestTy);
+ _mov(T, Src0RM);
+ } else {
+ // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
+ T = makeReg(IceType_i32);
+ _movzx(T, Src0RM);
+ }
_and(T, One);
_mov(Dest, T);
} else {
diff --git a/src/assembler_ia32.cpp b/src/assembler_ia32.cpp
index 550bc99..a253b8d 100644
--- a/src/assembler_ia32.cpp
+++ b/src/assembler_ia32.cpp
@@ -189,59 +189,39 @@
}
}
-void AssemblerX86::movzxb(GPRRegister dst, ByteRegister src) {
+void AssemblerX86::movzx(Type SrcTy, GPRRegister dst, GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool ByteSized = isByteSizedType(SrcTy);
+ assert(ByteSized || SrcTy == IceType_i16);
EmitUint8(0x0F);
- EmitUint8(0xB6);
+ EmitUint8(ByteSized ? 0xB6 : 0xB7);
EmitRegisterOperand(dst, src);
}
-void AssemblerX86::movzxb(GPRRegister dst, const Address &src) {
+void AssemblerX86::movzx(Type SrcTy, GPRRegister dst, const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool ByteSized = isByteSizedType(SrcTy);
+ assert(ByteSized || SrcTy == IceType_i16);
EmitUint8(0x0F);
- EmitUint8(0xB6);
+ EmitUint8(ByteSized ? 0xB6 : 0xB7);
EmitOperand(dst, src);
}
-void AssemblerX86::movsxb(GPRRegister dst, ByteRegister src) {
+void AssemblerX86::movsx(Type SrcTy, GPRRegister dst, GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool ByteSized = isByteSizedType(SrcTy);
+ assert(ByteSized || SrcTy == IceType_i16);
EmitUint8(0x0F);
- EmitUint8(0xBE);
+ EmitUint8(ByteSized ? 0xBE : 0xBF);
EmitRegisterOperand(dst, src);
}
-void AssemblerX86::movsxb(GPRRegister dst, const Address &src) {
+void AssemblerX86::movsx(Type SrcTy, GPRRegister dst, const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool ByteSized = isByteSizedType(SrcTy);
+ assert(ByteSized || SrcTy == IceType_i16);
EmitUint8(0x0F);
- EmitUint8(0xBE);
- EmitOperand(dst, src);
-}
-
-void AssemblerX86::movzxw(GPRRegister dst, GPRRegister src) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x0F);
- EmitUint8(0xB7);
- EmitRegisterOperand(dst, src);
-}
-
-void AssemblerX86::movzxw(GPRRegister dst, const Address &src) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x0F);
- EmitUint8(0xB7);
- EmitOperand(dst, src);
-}
-
-void AssemblerX86::movsxw(GPRRegister dst, GPRRegister src) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x0F);
- EmitUint8(0xBF);
- EmitRegisterOperand(dst, src);
-}
-
-void AssemblerX86::movsxw(GPRRegister dst, const Address &src) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x0F);
- EmitUint8(0xBF);
+ EmitUint8(ByteSized ? 0xBE : 0xBF);
EmitOperand(dst, src);
}
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index 5c312fb..e33838d 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
@@ -483,15 +483,10 @@
void mov(Type Ty, const Address &dst, GPRRegister src);
void mov(Type Ty, const Address &dst, const Immediate &imm);
- void movzxb(GPRRegister dst, ByteRegister src);
- void movzxb(GPRRegister dst, const Address &src);
- void movsxb(GPRRegister dst, ByteRegister src);
- void movsxb(GPRRegister dst, const Address &src);
-
- void movzxw(GPRRegister dst, GPRRegister src);
- void movzxw(GPRRegister dst, const Address &src);
- void movsxw(GPRRegister dst, GPRRegister src);
- void movsxw(GPRRegister dst, const Address &src);
+ void movzx(Type Ty, GPRRegister dst, GPRRegister src);
+ void movzx(Type Ty, GPRRegister dst, const Address &src);
+ void movsx(Type Ty, GPRRegister dst, GPRRegister src);
+ void movsx(Type Ty, GPRRegister dst, const Address &src);
void lea(Type Ty, GPRRegister dst, const Address &src);
diff --git a/tests_lit/llvm2ice_tests/convert.ll b/tests_lit/llvm2ice_tests/convert.ll
index c349b5d..0a32858 100644
--- a/tests_lit/llvm2ice_tests/convert.ll
+++ b/tests_lit/llvm2ice_tests/convert.ll
@@ -37,7 +37,7 @@
}
; CHECK-LABEL: from_int8
; CHECK: mov {{.*}}, byte ptr [
-; CHECK: movsx
+; CHECK: movsx e{{.*}}, {{[a-d]l|byte ptr}}
; CHECK: mov word ptr [
; CHECK: movsx
; CHECK: mov dword ptr [
@@ -66,9 +66,9 @@
; CHECK-LABEL: from_int16
; CHECK: mov {{.*}}, word ptr [
; CHECK: [0]
-; CHECK: movsx
+; CHECK: movsx e{{.*}}, {{.*x|[ds]i|bp|word ptr}}
; CHECK: [4]
-; CHECK: movsx
+; CHECK: movsx e{{.*}}, {{.*x|[ds]i|bp|word ptr}}
; CHECK: sar {{.*}}, 31
; CHECK: [8]
@@ -133,7 +133,7 @@
}
; CHECK-LABEL: from_uint8
; CHECK: [16]
-; CHECK: movzx
+; CHECK: movzx e{{.*}}, {{[a-d]l|byte ptr}}
; CHECK: [2]
; CHECK: movzx
; CHECK: [4]
@@ -159,9 +159,9 @@
; CHECK-LABEL: from_uint16
; CHECK: [18]
; CHECK: [0]
-; CHECK: movzx
+; CHECK: movzx e{{.*}}, {{.*x|[ds]i|bp|word ptr}}
; CHECK: [4]
-; CHECK: movzx
+; CHECK: movzx e{{.*}}, {{.*x|[ds]i|bp|word ptr}}
; CHECK: mov {{.*}}, 0
; CHECK: [8]
diff --git a/tests_lit/llvm2ice_tests/test_i1.ll b/tests_lit/llvm2ice_tests/test_i1.ll
index 697f0d1..c5d5ee5 100644
--- a/tests_lit/llvm2ice_tests/test_i1.ll
+++ b/tests_lit/llvm2ice_tests/test_i1.ll
@@ -63,8 +63,7 @@
; CHECK-LABEL: testZextI8
; match the trunc instruction
; CHECK: and {{.*}}, 1
-; match the zext i1 instruction
-; CHECK: movzx
+; match the zext i1 instruction (NOTE: no mov need between i1 and i8).
; CHECK: and {{.*}}, 1
; Test zext to i16.
@@ -78,9 +77,9 @@
; CHECK-LABEL: testZextI16
; match the trunc instruction
; CHECK: and {{.*}}, 1
-; match the zext i1 instruction
-; CHECK: movzx
-; CHECK: and {{.*}}, 1
+; match the zext i1 instruction (note 32-bit reg is used because it's shorter).
+; CHECK: movzx [[REG:e.*]], {{[a-d]l|byte ptr}}
+; CHECK: and [[REG]], 1
; Test zext to i32.
define internal i32 @testZextI32(i32 %arg) {
@@ -138,7 +137,7 @@
; match the trunc instruction
; CHECK: and {{.*}}, 1
; match the sext i1 instruction
-; CHECK: movzx [[REG:.*]],
+; CHECK: movzx e[[REG:.*]], {{[a-d]l|byte ptr}}
; CHECK-NEXT: shl [[REG]], 15
; CHECK-NEXT: sar [[REG]], 15