emitIAS for movsx and movzx. Force dest to be the full 32-bit reg instead of sometimes being a 16-bit reg. This is to save on a operand size prefix (and avoid passing the DestTy down to the dispatchers). BUG=none R=stichnot@chromium.org Review URL: https://codereview.chromium.org/647223004
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp index 6b1e40c..e41fe82 100644 --- a/src/IceInstX8632.cpp +++ b/src/IceInstX8632.cpp
@@ -287,16 +287,6 @@ addSource(Mem); } -InstX8632Movsx::InstX8632Movsx(Cfg *Func, Variable *Dest, Operand *Source) - : InstX8632(Func, InstX8632::Movsx, 1, Dest) { - addSource(Source); -} - -InstX8632Movzx::InstX8632Movzx(Cfg *Func, Variable *Dest, Operand *Source) - : InstX8632(Func, InstX8632::Movzx, 1, Dest) { - addSource(Source); -} - InstX8632Nop::InstX8632Nop(Cfg *Func, InstX8632Nop::NopVariant Variant) : InstX8632(Func, InstX8632::Nop, 0, NULL), Variant(Variant) {} @@ -516,6 +506,7 @@ emitIASBytes(Func, Asm, StartPosition); } +template <bool VarCanBeByte, bool SrcCanBeByte> void emitIASRegOpTyGPR(const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src, const x86::AssemblerX86::GPREmitterRegOp &Emitter) { @@ -524,11 +515,14 @@ assert(Var->hasReg()); // We cheat a little and use GPRRegister even for byte operations. RegX8632::GPRRegister VarReg = - RegX8632::getEncodedByteRegOrGPR(Ty, Var->getRegNum()); + VarCanBeByte ? RegX8632::getEncodedByteRegOrGPR(Ty, Var->getRegNum()) + : RegX8632::getEncodedGPR(Var->getRegNum()); if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) { if (SrcVar->hasReg()) { RegX8632::GPRRegister SrcReg = - RegX8632::getEncodedByteRegOrGPR(Ty, SrcVar->getRegNum()); + SrcCanBeByte + ? RegX8632::getEncodedByteRegOrGPR(Ty, SrcVar->getRegNum()) + : RegX8632::getEncodedGPR(SrcVar->getRegNum()); (Asm->*(Emitter.GPRGPR))(Ty, VarReg, SrcReg); } else { x86::Address SrcStackAddr = static_cast<TargetX8632 *>(Func->getTarget()) @@ -799,6 +793,8 @@ template <> const char *InstX8632Bsr::Opcode = "bsr"; template <> const char *InstX8632Lea::Opcode = "lea"; template <> const char *InstX8632Movd::Opcode = "movd"; +template <> const char *InstX8632Movsx::Opcode = "movsx"; +template <> const char *InstX8632Movzx::Opcode = "movzx"; template <> const char *InstX8632Sqrtss::Opcode = "sqrtss"; template <> const char *InstX8632Cbwdq::Opcode = "cbw/cwd/cdq"; // Mov-like ops @@ -869,6 +865,12 @@ template <> const x86::AssemblerX86::GPREmitterRegOp InstX8632Lea::Emitter = { /* reg/reg and reg/imm are illegal */ NULL, &x86::AssemblerX86::lea, NULL}; +template <> +const x86::AssemblerX86::GPREmitterRegOp InstX8632Movsx::Emitter = { + &x86::AssemblerX86::movsx, &x86::AssemblerX86::movsx, NULL}; +template <> +const x86::AssemblerX86::GPREmitterRegOp InstX8632Movzx::Emitter = { + &x86::AssemblerX86::movzx, &x86::AssemblerX86::movzx, NULL}; // Unary XMM ops template <> @@ -2117,42 +2119,27 @@ emitIASBytes(Func, Asm, StartPosition); } -void InstX8632Movsx::emit(const Cfg *Func) const { - Ostream &Str = Func->getContext()->getStrEmit(); +template <> void InstX8632Movsx::emitIAS(const Cfg *Func) const { assert(getSrcSize() == 1); - Str << "\tmovsx\t"; - getDest()->emit(Func); - Str << ", "; - getSrc(0)->emit(Func); - Str << "\n"; + const Variable *Dest = getDest(); + const Operand *Src = getSrc(0); + // Dest must be a > 8-bit register, but Src can be 8-bit. In practice + // we just use the full register for Dest to avoid having an + // OperandSizeOverride prefix. It also allows us to only dispatch on SrcTy. + Type SrcTy = Src->getType(); + assert(typeWidthInBytes(Dest->getType()) > 1); + assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy)); + emitIASRegOpTyGPR<false, true>(Func, SrcTy, Dest, Src, Emitter); } -void InstX8632Movsx::dump(const Cfg *Func) const { - Ostream &Str = Func->getContext()->getStrDump(); - Str << "movsx." << getDest()->getType() << "." << getSrc(0)->getType(); - Str << " "; - dumpDest(Func); - Str << ", "; - dumpSources(Func); -} - -void InstX8632Movzx::emit(const Cfg *Func) const { - Ostream &Str = Func->getContext()->getStrEmit(); +template <> void InstX8632Movzx::emitIAS(const Cfg *Func) const { assert(getSrcSize() == 1); - Str << "\tmovzx\t"; - getDest()->emit(Func); - Str << ", "; - getSrc(0)->emit(Func); - Str << "\n"; -} - -void InstX8632Movzx::dump(const Cfg *Func) const { - Ostream &Str = Func->getContext()->getStrDump(); - Str << "movzx." << getDest()->getType() << "." << getSrc(0)->getType(); - Str << " "; - dumpDest(Func); - Str << ", "; - dumpSources(Func); + const Variable *Dest = getDest(); + const Operand *Src = getSrc(0); + Type SrcTy = Src->getType(); + assert(typeWidthInBytes(Dest->getType()) > 1); + assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy)); + emitIASRegOpTyGPR<false, true>(Func, SrcTy, Dest, Src, Emitter); } void InstX8632Nop::emit(const Cfg *Func) const {
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h index b57f3ba..8fa4bdb 100644 --- a/src/IceInstX8632.h +++ b/src/IceInstX8632.h
@@ -485,11 +485,12 @@ // Emit a two-operand (GPR) instruction, where the dest operand is a // Variable that's guaranteed to be a register. +template <bool VarCanBeByte = true, bool SrcCanBeByte = true> void emitIASRegOpTyGPR(const Cfg *Func, Type Ty, const Variable *Dst, const Operand *Src, const x86::AssemblerX86::GPREmitterRegOp &Emitter); -// Instructions of the form x := op(y) +// Instructions of the form x := op(y). template <InstX8632::InstKindX8632 K> class InstX8632UnaryopGPR : public InstX8632 { InstX8632UnaryopGPR(const InstX8632UnaryopGPR &) = delete; @@ -519,7 +520,7 @@ void dump(const Cfg *Func) const override { Ostream &Str = Func->getContext()->getStrDump(); dumpDest(Func); - Str << " = " << Opcode << "." << getDest()->getType() << " "; + Str << " = " << Opcode << "." << getSrc(0)->getType() << " "; dumpSources(Func); } static bool classof(const Inst *Inst) { return isClassof(Inst, K); } @@ -886,6 +887,8 @@ typedef InstX8632UnaryopGPR<InstX8632::Lea> InstX8632Lea; // Cbwdq instruction - wrapper for cbw, cwd, and cdq typedef InstX8632UnaryopGPR<InstX8632::Cbwdq> InstX8632Cbwdq; +typedef InstX8632UnaryopGPR<InstX8632::Movsx> InstX8632Movsx; +typedef InstX8632UnaryopGPR<InstX8632::Movzx> InstX8632Movzx; typedef InstX8632UnaryopXmm<InstX8632::Movd> InstX8632Movd; typedef InstX8632UnaryopXmm<InstX8632::Sqrtss> InstX8632Sqrtss; // Move/assignment instruction - wrapper for mov/movss/movsd. @@ -1323,46 +1326,6 @@ ~InstX8632StoreQ() override {} }; -// Movsx - copy from a narrower integer type to a wider integer -// type, with sign extension. -class InstX8632Movsx : public InstX8632 { - InstX8632Movsx(const InstX8632Movsx &) = delete; - InstX8632Movsx &operator=(const InstX8632Movsx &) = delete; - -public: - static InstX8632Movsx *create(Cfg *Func, Variable *Dest, Operand *Source) { - return new (Func->allocate<InstX8632Movsx>()) - InstX8632Movsx(Func, Dest, Source); - } - void emit(const Cfg *Func) const override; - void dump(const Cfg *Func) const override; - static bool classof(const Inst *Inst) { return isClassof(Inst, Movsx); } - -private: - InstX8632Movsx(Cfg *Func, Variable *Dest, Operand *Source); - ~InstX8632Movsx() override {} -}; - -// Movzx - copy from a narrower integer type to a wider integer -// type, with zero extension. -class InstX8632Movzx : public InstX8632 { - InstX8632Movzx(const InstX8632Movzx &) = delete; - InstX8632Movzx &operator=(const InstX8632Movzx &) = delete; - -public: - static InstX8632Movzx *create(Cfg *Func, Variable *Dest, Operand *Source) { - return new (Func->allocate<InstX8632Movzx>()) - InstX8632Movzx(Func, Dest, Source); - } - void emit(const Cfg *Func) const override; - void dump(const Cfg *Func) const override; - static bool classof(const Inst *Inst) { return isClassof(Inst, Movzx); } - -private: - InstX8632Movzx(Cfg *Func, Variable *Dest, Operand *Source); - ~InstX8632Movzx() override {} -}; - // Nop instructions of varying length class InstX8632Nop : public InstX8632 { InstX8632Nop(const InstX8632Nop &) = delete; @@ -1573,6 +1536,8 @@ template <> void InstX8632Pblendvb::emitIAS(const Cfg *Func) const; template <> void InstX8632Pextr::emitIAS(const Cfg *Func) const; template <> void InstX8632Pinsr::emitIAS(const Cfg *Func) const; +template <> void InstX8632Movsx::emitIAS(const Cfg *Func) const; +template <> void InstX8632Movzx::emitIAS(const Cfg *Func) const; template <> void InstX8632Pmull::emitIAS(const Cfg *Func) const; template <> void InstX8632Pshufd::emitIAS(const Cfg *Func) const; template <> void InstX8632Shufps::emitIAS(const Cfg *Func) const;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp index 232acdd..0319768 100644 --- a/src/IceTargetLoweringX8632.cpp +++ b/src/IceTargetLoweringX8632.cpp
@@ -2018,8 +2018,16 @@ } else if (Src0RM->getType() == IceType_i1) { // t = Src0RM; t &= 1; Dest = t Constant *One = Ctx->getConstantInt32(IceType_i32, 1); - Variable *T = makeReg(IceType_i32); - _movzx(T, Src0RM); + Type DestTy = Dest->getType(); + Variable *T; + if (DestTy == IceType_i8) { + T = makeReg(DestTy); + _mov(T, Src0RM); + } else { + // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. + T = makeReg(IceType_i32); + _movzx(T, Src0RM); + } _and(T, One); _mov(Dest, T); } else {
diff --git a/src/assembler_ia32.cpp b/src/assembler_ia32.cpp index 550bc99..a253b8d 100644 --- a/src/assembler_ia32.cpp +++ b/src/assembler_ia32.cpp
@@ -189,59 +189,39 @@ } } -void AssemblerX86::movzxb(GPRRegister dst, ByteRegister src) { +void AssemblerX86::movzx(Type SrcTy, GPRRegister dst, GPRRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + bool ByteSized = isByteSizedType(SrcTy); + assert(ByteSized || SrcTy == IceType_i16); EmitUint8(0x0F); - EmitUint8(0xB6); + EmitUint8(ByteSized ? 0xB6 : 0xB7); EmitRegisterOperand(dst, src); } -void AssemblerX86::movzxb(GPRRegister dst, const Address &src) { +void AssemblerX86::movzx(Type SrcTy, GPRRegister dst, const Address &src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + bool ByteSized = isByteSizedType(SrcTy); + assert(ByteSized || SrcTy == IceType_i16); EmitUint8(0x0F); - EmitUint8(0xB6); + EmitUint8(ByteSized ? 0xB6 : 0xB7); EmitOperand(dst, src); } -void AssemblerX86::movsxb(GPRRegister dst, ByteRegister src) { +void AssemblerX86::movsx(Type SrcTy, GPRRegister dst, GPRRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + bool ByteSized = isByteSizedType(SrcTy); + assert(ByteSized || SrcTy == IceType_i16); EmitUint8(0x0F); - EmitUint8(0xBE); + EmitUint8(ByteSized ? 0xBE : 0xBF); EmitRegisterOperand(dst, src); } -void AssemblerX86::movsxb(GPRRegister dst, const Address &src) { +void AssemblerX86::movsx(Type SrcTy, GPRRegister dst, const Address &src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + bool ByteSized = isByteSizedType(SrcTy); + assert(ByteSized || SrcTy == IceType_i16); EmitUint8(0x0F); - EmitUint8(0xBE); - EmitOperand(dst, src); -} - -void AssemblerX86::movzxw(GPRRegister dst, GPRRegister src) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0x0F); - EmitUint8(0xB7); - EmitRegisterOperand(dst, src); -} - -void AssemblerX86::movzxw(GPRRegister dst, const Address &src) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0x0F); - EmitUint8(0xB7); - EmitOperand(dst, src); -} - -void AssemblerX86::movsxw(GPRRegister dst, GPRRegister src) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0x0F); - EmitUint8(0xBF); - EmitRegisterOperand(dst, src); -} - -void AssemblerX86::movsxw(GPRRegister dst, const Address &src) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0x0F); - EmitUint8(0xBF); + EmitUint8(ByteSized ? 0xBE : 0xBF); EmitOperand(dst, src); }
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h index 5c312fb..e33838d 100644 --- a/src/assembler_ia32.h +++ b/src/assembler_ia32.h
@@ -483,15 +483,10 @@ void mov(Type Ty, const Address &dst, GPRRegister src); void mov(Type Ty, const Address &dst, const Immediate &imm); - void movzxb(GPRRegister dst, ByteRegister src); - void movzxb(GPRRegister dst, const Address &src); - void movsxb(GPRRegister dst, ByteRegister src); - void movsxb(GPRRegister dst, const Address &src); - - void movzxw(GPRRegister dst, GPRRegister src); - void movzxw(GPRRegister dst, const Address &src); - void movsxw(GPRRegister dst, GPRRegister src); - void movsxw(GPRRegister dst, const Address &src); + void movzx(Type Ty, GPRRegister dst, GPRRegister src); + void movzx(Type Ty, GPRRegister dst, const Address &src); + void movsx(Type Ty, GPRRegister dst, GPRRegister src); + void movsx(Type Ty, GPRRegister dst, const Address &src); void lea(Type Ty, GPRRegister dst, const Address &src);
diff --git a/tests_lit/llvm2ice_tests/convert.ll b/tests_lit/llvm2ice_tests/convert.ll index c349b5d..0a32858 100644 --- a/tests_lit/llvm2ice_tests/convert.ll +++ b/tests_lit/llvm2ice_tests/convert.ll
@@ -37,7 +37,7 @@ } ; CHECK-LABEL: from_int8 ; CHECK: mov {{.*}}, byte ptr [ -; CHECK: movsx +; CHECK: movsx e{{.*}}, {{[a-d]l|byte ptr}} ; CHECK: mov word ptr [ ; CHECK: movsx ; CHECK: mov dword ptr [ @@ -66,9 +66,9 @@ ; CHECK-LABEL: from_int16 ; CHECK: mov {{.*}}, word ptr [ ; CHECK: [0] -; CHECK: movsx +; CHECK: movsx e{{.*}}, {{.*x|[ds]i|bp|word ptr}} ; CHECK: [4] -; CHECK: movsx +; CHECK: movsx e{{.*}}, {{.*x|[ds]i|bp|word ptr}} ; CHECK: sar {{.*}}, 31 ; CHECK: [8] @@ -133,7 +133,7 @@ } ; CHECK-LABEL: from_uint8 ; CHECK: [16] -; CHECK: movzx +; CHECK: movzx e{{.*}}, {{[a-d]l|byte ptr}} ; CHECK: [2] ; CHECK: movzx ; CHECK: [4] @@ -159,9 +159,9 @@ ; CHECK-LABEL: from_uint16 ; CHECK: [18] ; CHECK: [0] -; CHECK: movzx +; CHECK: movzx e{{.*}}, {{.*x|[ds]i|bp|word ptr}} ; CHECK: [4] -; CHECK: movzx +; CHECK: movzx e{{.*}}, {{.*x|[ds]i|bp|word ptr}} ; CHECK: mov {{.*}}, 0 ; CHECK: [8]
diff --git a/tests_lit/llvm2ice_tests/test_i1.ll b/tests_lit/llvm2ice_tests/test_i1.ll index 697f0d1..c5d5ee5 100644 --- a/tests_lit/llvm2ice_tests/test_i1.ll +++ b/tests_lit/llvm2ice_tests/test_i1.ll
@@ -63,8 +63,7 @@ ; CHECK-LABEL: testZextI8 ; match the trunc instruction ; CHECK: and {{.*}}, 1 -; match the zext i1 instruction -; CHECK: movzx +; match the zext i1 instruction (NOTE: no mov need between i1 and i8). ; CHECK: and {{.*}}, 1 ; Test zext to i16. @@ -78,9 +77,9 @@ ; CHECK-LABEL: testZextI16 ; match the trunc instruction ; CHECK: and {{.*}}, 1 -; match the zext i1 instruction -; CHECK: movzx -; CHECK: and {{.*}}, 1 +; match the zext i1 instruction (note 32-bit reg is used because it's shorter). +; CHECK: movzx [[REG:e.*]], {{[a-d]l|byte ptr}} +; CHECK: and [[REG]], 1 ; Test zext to i32. define internal i32 @testZextI32(i32 %arg) { @@ -138,7 +137,7 @@ ; match the trunc instruction ; CHECK: and {{.*}}, 1 ; match the sext i1 instruction -; CHECK: movzx [[REG:.*]], +; CHECK: movzx e[[REG:.*]], {{[a-d]l|byte ptr}} ; CHECK-NEXT: shl [[REG]], 15 ; CHECK-NEXT: sar [[REG]], 15