Subzero. Native 64-bit int arithmetic on x86-64.
This CL modifies the x86 instruction selection template to allow native
64-bit GPR support. It also enables x86-64 crosstests.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1273153002.
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 9d872d2..c34b776 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -243,9 +243,9 @@
// Cross Xmm/GPR cast instructions.
template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
- typedef void (AssemblerX86Base::*TypedEmitRegs)(Type, DReg_t, SReg_t);
+ typedef void (AssemblerX86Base::*TypedEmitRegs)(Type, DReg_t, Type, SReg_t);
typedef void (AssemblerX86Base::*TypedEmitAddr)(
- Type, DReg_t, const typename Traits::Address &);
+ Type, DReg_t, Type, const typename Traits::Address &);
TypedEmitRegs RegReg;
TypedEmitAddr RegAddr;
@@ -299,7 +299,14 @@
typename Traits::GPRRegister src);
void mov(Type Ty, const typename Traits::Address &dst, const Immediate &imm);
- void movFromAh(const typename Traits::GPRRegister dst);
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, void>::type
+ movabs(const typename Traits::GPRRegister Dst, uint64_t Imm64);
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, void>::type
+ movabs(const typename Traits::GPRRegister, uint64_t) {
+ llvm::report_fatal_error("movabs is only supported in 64-bit x86 targets.");
+ }
void movzx(Type Ty, typename Traits::GPRRegister dst,
typename Traits::GPRRegister src);
@@ -328,11 +335,13 @@
void movss(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
- void movd(typename Traits::XmmRegister dst, typename Traits::GPRRegister src);
- void movd(typename Traits::XmmRegister dst,
+ void movd(Type SrcTy, typename Traits::XmmRegister dst,
+ typename Traits::GPRRegister src);
+ void movd(Type SrcTy, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
- void movd(typename Traits::GPRRegister dst, typename Traits::XmmRegister src);
- void movd(const typename Traits::Address &dst,
+ void movd(Type DestTy, typename Traits::GPRRegister dst,
+ typename Traits::XmmRegister src);
+ void movd(Type DestTy, const typename Traits::Address &dst,
typename Traits::XmmRegister src);
void movq(typename Traits::XmmRegister dst, typename Traits::XmmRegister src);
@@ -504,9 +513,9 @@
void cvttps2dq(Type, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
- void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst,
+ void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, Type SrcTy,
typename Traits::GPRRegister src);
- void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst,
+ void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, Type SrcTy,
const typename Traits::Address &src);
void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
@@ -514,9 +523,9 @@
void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
- void cvttss2si(Type SrcTy, typename Traits::GPRRegister dst,
+ void cvttss2si(Type DestTy, typename Traits::GPRRegister dst, Type SrcTy,
typename Traits::XmmRegister src);
- void cvttss2si(Type SrcTy, typename Traits::GPRRegister dst,
+ void cvttss2si(Type DestTy, typename Traits::GPRRegister dst, Type SrcTy,
const typename Traits::Address &src);
void ucomiss(Type Ty, typename Traits::XmmRegister a,
@@ -719,6 +728,12 @@
void cbw();
void cwd();
void cdq();
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, void>::type cqo();
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, void>::type cqo() {
+ llvm::report_fatal_error("CQO is only available in 64-bit x86 backends.");
+ }
void div(Type Ty, typename Traits::GPRRegister reg);
void div(Type Ty, const typename Traits::Address &address);
@@ -936,7 +951,7 @@
typename Traits::GPRRegister>::value;
return IsGPR && (Reg & 0x04) != 0 && (Reg & 0x08) == 0 &&
- isByteSizedArithType(Ty);
+ isByteSizedType(Ty);
};
// assembleAndEmitRex is used for determining which (if any) rex prefix should
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index f785756..2cb039a 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -207,6 +207,8 @@
emitUint8(0xB0 + gprEncoding(dst));
emitUint8(imm.value() & 0xFF);
} else {
+ // TODO(jpp): When removing the assertion above ensure that in x86-64 we
+ // emit a 64-bit immediate.
emitUint8(0xB8 + gprEncoding(dst));
emitImmediate(Ty, imm);
}
@@ -279,9 +281,34 @@
}
template <class Machine>
+template <typename T>
+typename std::enable_if<T::Is64Bit, void>::type
+AssemblerX86Base<Machine>::movabs(const typename Traits::GPRRegister Dst,
+ uint64_t Imm64) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ const bool NeedsRexW = (Imm64 & ~0xFFFFFFFFull) != 0;
+ const Type RexType = NeedsRexW ? RexTypeForceRexW : RexTypeIrrelevant;
+ emitRexB(RexType, Dst);
+ emitUint8(0xB8 | gprEncoding(Dst));
+ // When emitting Imm64, we don't have to mask out the upper 32 bits for
+ // emitInt32 will/should only emit a 32-bit constant. In reality, we are
+ // paranoid, so we go ahead an mask the upper bits out anyway.
+ emitInt32(Imm64 & 0xFFFFFFFF);
+ if (NeedsRexW)
+ emitInt32((Imm64 >> 32) & 0xFFFFFFFF);
+}
+
+template <class Machine>
void AssemblerX86Base<Machine>::movzx(Type SrcTy,
typename Traits::GPRRegister dst,
typename Traits::GPRRegister src) {
+ if (Traits::Is64Bit && SrcTy == IceType_i32) {
+ // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
+ // operand to 64-bit.
+ mov(IceType_i32, dst, src);
+ return;
+ }
+
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
bool ByteSized = isByteSizedType(SrcTy);
assert(ByteSized || SrcTy == IceType_i16);
@@ -295,6 +322,13 @@
void AssemblerX86Base<Machine>::movzx(Type SrcTy,
typename Traits::GPRRegister dst,
const typename Traits::Address &src) {
+ if (Traits::Is64Bit && SrcTy == IceType_i32) {
+ // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
+ // operand to 64-bit.
+ mov(IceType_i32, dst, src);
+ return;
+ }
+
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
bool ByteSized = isByteSizedType(SrcTy);
assert(ByteSized || SrcTy == IceType_i16);
@@ -359,7 +393,7 @@
if (Ty == IceType_i16)
emitOperandSizeOverride();
else
- assert(Ty == IceType_i32);
+ assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
emitRexRB(Ty, dst, src);
emitUint8(0x0F);
emitUint8(0x40 + cond);
@@ -375,7 +409,7 @@
if (Ty == IceType_i16)
emitOperandSizeOverride();
else
- assert(Ty == IceType_i32);
+ assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
emitRex(Ty, src, dst);
emitUint8(0x0F);
emitUint8(0x40 + cond);
@@ -423,44 +457,48 @@
}
template <class Machine>
-void AssemblerX86Base<Machine>::movd(typename Traits::XmmRegister dst,
+void AssemblerX86Base<Machine>::movd(Type SrcTy,
+ typename Traits::XmmRegister dst,
typename Traits::GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
- emitRexRB(RexTypeIrrelevant, dst, src);
+ emitRexRB(SrcTy, dst, src);
emitUint8(0x0F);
emitUint8(0x6E);
emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
}
template <class Machine>
-void AssemblerX86Base<Machine>::movd(typename Traits::XmmRegister dst,
+void AssemblerX86Base<Machine>::movd(Type SrcTy,
+ typename Traits::XmmRegister dst,
const typename Traits::Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
- emitRex(RexTypeIrrelevant, src, dst);
+ emitRex(SrcTy, src, dst);
emitUint8(0x0F);
emitUint8(0x6E);
emitOperand(gprEncoding(dst), src);
}
template <class Machine>
-void AssemblerX86Base<Machine>::movd(typename Traits::GPRRegister dst,
+void AssemblerX86Base<Machine>::movd(Type DestTy,
+ typename Traits::GPRRegister dst,
typename Traits::XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
- emitRexRB(RexTypeIrrelevant, src, dst);
+ emitRexRB(DestTy, src, dst);
emitUint8(0x0F);
emitUint8(0x7E);
emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
}
template <class Machine>
-void AssemblerX86Base<Machine>::movd(const typename Traits::Address &dst,
+void AssemblerX86Base<Machine>::movd(Type DestTy,
+ const typename Traits::Address &dst,
typename Traits::XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
- emitRex(RexTypeIrrelevant, dst, src);
+ emitRex(DestTy, dst, src);
emitUint8(0x0F);
emitUint8(0x7E);
emitOperand(gprEncoding(src), dst);
@@ -1343,7 +1381,7 @@
// Load 32-bit immediate value into tmp1.
mov(IceType_i32, tmp1, imm);
// Move value from tmp1 into dst.
- movd(dst, tmp1);
+ movd(IceType_i32, dst, tmp1);
// Broadcast low lane into other three lanes.
shufps(RexTypeIrrelevant, dst, dst, Immediate(0x0));
}
@@ -1487,10 +1525,11 @@
template <class Machine>
void AssemblerX86Base<Machine>::cvtsi2ss(Type DestTy,
typename Traits::XmmRegister dst,
+ Type SrcTy,
typename Traits::GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
- emitRexRB(RexTypeIrrelevant, dst, src);
+ emitRexRB(SrcTy, dst, src);
emitUint8(0x0F);
emitUint8(0x2A);
emitXmmRegisterOperand(dst, src);
@@ -1499,10 +1538,11 @@
template <class Machine>
void AssemblerX86Base<Machine>::cvtsi2ss(Type DestTy,
typename Traits::XmmRegister dst,
+ Type SrcTy,
const typename Traits::Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
- emitRex(RexTypeIrrelevant, src, dst);
+ emitRex(SrcTy, src, dst);
emitUint8(0x0F);
emitUint8(0x2A);
emitOperand(gprEncoding(dst), src);
@@ -1534,24 +1574,26 @@
}
template <class Machine>
-void AssemblerX86Base<Machine>::cvttss2si(Type SrcTy,
+void AssemblerX86Base<Machine>::cvttss2si(Type DestTy,
typename Traits::GPRRegister dst,
+ Type SrcTy,
typename Traits::XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
- emitRexRB(RexTypeIrrelevant, dst, src);
+ emitRexRB(DestTy, dst, src);
emitUint8(0x0F);
emitUint8(0x2C);
emitXmmRegisterOperand(dst, src);
}
template <class Machine>
-void AssemblerX86Base<Machine>::cvttss2si(Type SrcTy,
+void AssemblerX86Base<Machine>::cvttss2si(Type DestTy,
typename Traits::GPRRegister dst,
+ Type SrcTy,
const typename Traits::Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
- emitRex(RexTypeIrrelevant, src, dst);
+ emitRex(DestTy, src, dst);
emitUint8(0x0F);
emitUint8(0x2C);
emitOperand(gprEncoding(dst), src);
@@ -2401,6 +2443,15 @@
}
template <class Machine>
+template <typename T>
+typename std::enable_if<T::Is64Bit, void>::type
+AssemblerX86Base<Machine>::cqo() {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitRexB(RexTypeForceRexW, RexRegIrrelevant);
+ emitUint8(0x99);
+}
+
+template <class Machine>
void AssemblerX86Base<Machine>::div(Type Ty, typename Traits::GPRRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
if (Ty == IceType_i16)
@@ -2459,7 +2510,8 @@
void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister dst,
typename Traits::GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- assert(Ty == IceType_i16 || Ty == IceType_i32);
+ assert(Ty == IceType_i16 || Ty == IceType_i32 ||
+ (Traits::Is64Bit && Ty == IceType_i64));
if (Ty == IceType_i16)
emitOperandSizeOverride();
emitRexRB(Ty, dst, src);
@@ -2472,7 +2524,8 @@
void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister reg,
const typename Traits::Address &address) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- assert(Ty == IceType_i16 || Ty == IceType_i32);
+ assert(Ty == IceType_i16 || Ty == IceType_i32 ||
+ (Traits::Is64Bit && Ty == IceType_i64));
if (Ty == IceType_i16)
emitOperandSizeOverride();
emitRex(Ty, address, reg);
@@ -2790,8 +2843,7 @@
void AssemblerX86Base<Machine>::bswap(Type Ty,
typename Traits::GPRRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- assert(Ty == IceType_i32);
- (void)Ty;
+ assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
emitRexB(Ty, reg);
emitUint8(0x0F);
emitUint8(0xC8 | gprEncoding(reg));
diff --git a/src/IceELFSection.h b/src/IceELFSection.h
index 5cf89a5..961d8d2 100644
--- a/src/IceELFSection.h
+++ b/src/IceELFSection.h
@@ -362,8 +362,7 @@
llvm::report_fatal_error("Missing symbol mentioned in reloc");
if (IsELF64) {
- llvm_unreachable(
- "Not tested -- check that Fixup.offset() is correct even for pc-rel");
+ // TODO(jpp): check that Fixup.offset() is correct even for pc-rel.
Elf64_Rela Rela;
Rela.r_offset = Fixup.position();
Rela.setSymbolAndType(Symbol->getNumber(), Fixup.kind());
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index c6d6abf..3a56e1b 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -206,7 +206,7 @@
} else if (const auto CR =
llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
Disp = CR->getOffset();
- Fixup = Asm->createFixup(llvm::ELF::R_386_32, CR);
+ Fixup = Asm->createFixup(RelFixup, CR);
} else {
llvm_unreachable("Unexpected offset type");
}
diff --git a/src/IceInstX8664.cpp b/src/IceInstX8664.cpp
index 3709180..49dc9d8 100644
--- a/src/IceInstX8664.cpp
+++ b/src/IceInstX8664.cpp
@@ -179,8 +179,8 @@
Disp = static_cast<int32_t>(CI->getValue());
} else if (const auto CR =
llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
- Disp = CR->getOffset();
- Fixup = Asm->createFixup(llvm::ELF::R_386_32, CR);
+ Disp = CR->getOffset() - 4;
+ Fixup = Asm->createFixup(PcRelFixup, CR);
} else {
llvm_unreachable("Unexpected offset type");
}
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 6d39005..b0eb1ad 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -1100,6 +1100,8 @@
: public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movsx> {
public:
static InstX86Movsx *create(Cfg *Func, Variable *Dest, Operand *Src) {
+ assert(typeWidthInBytes(Dest->getType()) >
+ typeWidthInBytes(Src->getType()));
return new (Func->allocate<InstX86Movsx>()) InstX86Movsx(Func, Dest, Src);
}
@@ -1116,6 +1118,8 @@
: public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movzx> {
public:
static InstX86Movzx *create(Cfg *Func, Variable *Dest, Operand *Src) {
+ assert(typeWidthInBytes(Dest->getType()) >
+ typeWidthInBytes(Src->getType()));
return new (Func->allocate<InstX86Movzx>()) InstX86Movzx(Func, Dest, Src);
}
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 34417cf..4d26210 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -729,7 +729,8 @@
} else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
(Asm->*(Emitter.GPRImm))(Ty, VarReg, Immediate(Imm->getValue()));
} else if (const auto Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
- AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Reloc);
+ AssemblerFixup *Fixup =
+ Asm->createFixup(InstX86Base<Machine>::Traits::RelFixup, Reloc);
(Asm->*(Emitter.GPRImm))(Ty, VarReg, Immediate(Reloc->getOffset(), Fixup));
} else if (const auto Split = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::VariableSplit>(Src)) {
@@ -758,7 +759,8 @@
} else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
(Asm->*(Emitter.AddrImm))(Ty, Addr, Immediate(Imm->getValue()));
} else if (const auto Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
- AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Reloc);
+ AssemblerFixup *Fixup =
+ Asm->createFixup(InstX86Base<Machine>::Traits::RelFixup, Reloc);
(Asm->*(Emitter.AddrImm))(Ty, Addr, Immediate(Reloc->getOffset(), Fixup));
} else {
llvm_unreachable("Unexpected operand type");
@@ -929,8 +931,8 @@
template <class Machine, typename DReg_t, typename SReg_t,
DReg_t (*destEnc)(int32_t), SReg_t (*srcEnc)(int32_t)>
-void emitIASCastRegOp(const Cfg *Func, Type DispatchTy, const Variable *Dest,
- const Operand *Src,
+void emitIASCastRegOp(const Cfg *Func, Type DestTy, const Variable *Dest,
+ Type SrcTy, const Operand *Src,
const typename InstX86Base<Machine>::Traits::Assembler::
template CastEmitterRegOp<DReg_t, SReg_t> &Emitter) {
typename InstX86Base<Machine>::Traits::Assembler *Asm =
@@ -940,18 +942,18 @@
if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
if (SrcVar->hasReg()) {
SReg_t SrcReg = srcEnc(SrcVar->getRegNum());
- (Asm->*(Emitter.RegReg))(DispatchTy, DestReg, SrcReg);
+ (Asm->*(Emitter.RegReg))(DestTy, DestReg, SrcTy, SrcReg);
} else {
typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
Func->getTarget())
->stackVarToAsmOperand(SrcVar);
- (Asm->*(Emitter.RegAddr))(DispatchTy, DestReg, SrcStackAddr);
+ (Asm->*(Emitter.RegAddr))(DestTy, DestReg, SrcTy, SrcStackAddr);
}
} else if (const auto Mem = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
Mem->emitSegmentOverride(Asm);
- (Asm->*(Emitter.RegAddr))(DispatchTy, DestReg, Mem->toAsmAddress(Asm));
+ (Asm->*(Emitter.RegAddr))(DestTy, DestReg, SrcTy, Mem->toAsmAddress(Asm));
} else {
llvm_unreachable("Unexpected operand type");
}
@@ -1387,17 +1389,26 @@
case IceType_i8:
assert(this->getDest()->getRegNum() ==
InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
- Str << "\tcbtw";
+ Str << "\t"
+ << "cbtw";
break;
case IceType_i16:
assert(this->getDest()->getRegNum() ==
InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
- Str << "\tcwtd";
+ Str << "\t"
+ << "cwtd";
break;
case IceType_i32:
assert(this->getDest()->getRegNum() ==
InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
- Str << "\tcltd";
+ Str << "\t"
+ << "cltd";
+ break;
+ case IceType_i64:
+ assert(this->getDest()->getRegNum() ==
+ InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+ Str << "\t"
+ << "cdto";
break;
}
}
@@ -1430,6 +1441,11 @@
InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
Asm->cdq();
break;
+ case IceType_i64:
+ assert(this->getDest()->getRegNum() ==
+ InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+ Asm->cqo();
+ break;
}
}
@@ -1592,7 +1608,8 @@
assert(this->getSrcSize() == 2);
Operand *Src = this->getSrc(1);
Type SrcTy = Src->getType();
- assert(SrcTy == IceType_i16 || SrcTy == IceType_i32);
+ assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
+ (InstX86Base<Machine>::Traits::Is64Bit));
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
@@ -1814,7 +1831,11 @@
switch (Variant) {
case Si2ss: {
assert(isScalarIntegerType(SrcTy));
- assert(typeWidthInBytes(SrcTy) <= 4);
+ if (!InstX86Base<Machine>::Traits::Is64Bit) {
+ assert(typeWidthInBytes(SrcTy) <= 4);
+ } else {
+ assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
+ }
assert(isScalarFloatingType(DestTy));
static const typename InstX86Base<Machine>::Traits::Assembler::
template CastEmitterRegOp<
@@ -1828,13 +1849,17 @@
typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR>(
- Func, DestTy, Dest, Src, Emitter);
+ Func, DestTy, Dest, SrcTy, Src, Emitter);
return;
}
case Tss2si: {
assert(isScalarFloatingType(SrcTy));
assert(isScalarIntegerType(DestTy));
- assert(typeWidthInBytes(DestTy) <= 4);
+ if (!InstX86Base<Machine>::Traits::Is64Bit) {
+ assert(typeWidthInBytes(DestTy) <= 4);
+ } else {
+ assert(DestTy == IceType_i32 || DestTy == IceType_i64);
+ }
static const typename InstX86Base<Machine>::Traits::Assembler::
template CastEmitterRegOp<
typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
@@ -1847,7 +1872,7 @@
typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm>(
- Func, SrcTy, Dest, Src, Emitter);
+ Func, DestTy, Dest, SrcTy, Src, Emitter);
return;
}
case Float2float: {
@@ -2244,6 +2269,10 @@
this->getDest()->emit(Func);
}
+inline bool isIntegerConstant(const Operand *Op) {
+ return llvm::isa<ConstantInteger32>(Op) || llvm::isa<ConstantInteger64>(Op);
+}
+
template <class Machine> void InstX86Mov<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
@@ -2252,11 +2281,16 @@
Operand *Src = this->getSrc(0);
Type SrcTy = Src->getType();
Type DestTy = this->getDest()->getType();
- Str << "\tmov"
- << (!isScalarFloatingType(DestTy)
- ? this->getWidthString(SrcTy)
- : InstX86Base<Machine>::Traits::TypeAttributes[DestTy].SdSsString)
- << "\t";
+ if (InstX86Base<Machine>::Traits::Is64Bit && DestTy == IceType_i64 &&
+ isIntegerConstant(Src)) {
+ Str << "\tmovabs\t";
+ } else {
+ Str << "\tmov"
+ << (!isScalarFloatingType(DestTy)
+ ? this->getWidthString(SrcTy)
+ : InstX86Base<Machine>::Traits::TypeAttributes[DestTy]
+ .SdSsString) << "\t";
+ }
// For an integer truncation operation, src is wider than dest.
// Ideally, we use a mov instruction whose data width matches the
// narrower dest. This is a problem if e.g. src is a register like
@@ -2320,6 +2354,20 @@
assert(isScalarIntegerType(DestTy));
// Widen DestTy for truncation (see above note). We should only do this
// when both Src and Dest are integer types.
+ if (InstX86Base<Machine>::Traits::Is64Bit && DestTy == IceType_i64 &&
+ isIntegerConstant(Src)) {
+ uint64_t Value = -1;
+ if (const auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src)) {
+ Value = C64->getValue();
+ } else {
+ Value = llvm::cast<ConstantInteger32>(Src)->getValue();
+ }
+ Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>()
+ ->movabs(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+ Dest->getRegNum()),
+ Value);
+ return;
+ }
if (isScalarIntegerType(SrcTy)) {
DestTy = SrcTy;
}
@@ -2363,14 +2411,19 @@
const auto SrcVar = llvm::cast<Variable>(this->getSrc(0));
// For insert/extract element (one of Src/Dest is an Xmm vector and
// the other is an int type).
- if (SrcVar->getType() == IceType_i32) {
- assert(isVectorType(Dest->getType()));
+ if (SrcVar->getType() == IceType_i32 ||
+ (InstX86Base<Machine>::Traits::Is64Bit &&
+ SrcVar->getType() == IceType_i64)) {
+ assert(isVectorType(Dest->getType()) ||
+ (isScalarFloatingType(Dest->getType()) &&
+ typeWidthInBytes(SrcVar->getType()) ==
+ typeWidthInBytes(Dest->getType())));
assert(Dest->hasReg());
typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister DestReg =
InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
Dest->getRegNum());
if (SrcVar->hasReg()) {
- Asm->movd(DestReg,
+ Asm->movd(SrcVar->getType(), DestReg,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
SrcVar->getRegNum()));
} else {
@@ -2378,17 +2431,23 @@
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
Func->getTarget())
->stackVarToAsmOperand(SrcVar));
- Asm->movd(DestReg, StackAddr);
+ Asm->movd(SrcVar->getType(), DestReg, StackAddr);
}
} else {
- assert(isVectorType(SrcVar->getType()));
+ assert(isVectorType(SrcVar->getType()) ||
+ (isScalarFloatingType(SrcVar->getType()) &&
+ typeWidthInBytes(SrcVar->getType()) ==
+ typeWidthInBytes(Dest->getType())));
assert(SrcVar->hasReg());
- assert(Dest->getType() == IceType_i32);
+ assert(Dest->getType() == IceType_i32 ||
+ (InstX86Base<Machine>::Traits::Is64Bit &&
+ Dest->getType() == IceType_i64));
typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister SrcReg =
InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
SrcVar->getRegNum());
if (Dest->hasReg()) {
- Asm->movd(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+ Asm->movd(Dest->getType(),
+ InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
Dest->getRegNum()),
SrcReg);
} else {
@@ -2396,7 +2455,7 @@
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
Func->getTarget())
->stackVarToAsmOperand(Dest));
- Asm->movd(StackAddr, SrcReg);
+ Asm->movd(Dest->getType(), StackAddr, SrcReg);
}
}
}
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 6724a61..466564d 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -792,7 +792,7 @@
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
for (const JumpTableData &JT : Ctx->getJumpTables())
- Writer->writeJumpTable(JT, llvm::ELF::R_386_32);
+ Writer->writeJumpTable(JT, TargetX8632::Traits::RelFixup);
} break;
case FT_Asm:
// Already emitted from Cfg
@@ -821,7 +821,8 @@
switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
- Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
+ Writer->writeDataSection(Vars, TargetX8632::Traits::RelFixup,
+ SectionSuffix);
} break;
case FT_Asm:
case FT_Iasm: {
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index 29066aa..e0acbd6 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -68,6 +68,7 @@
static const GPRRegister Encoded_Reg_Accumulator = RegX8632::Encoded_Reg_eax;
static const GPRRegister Encoded_Reg_Counter = RegX8632::Encoded_Reg_ecx;
static const FixupKind PcRelFixup = llvm::ELF::R_386_PC32;
+ static const FixupKind RelFixup = llvm::ELF::R_386_32;
class Operand {
public:
@@ -272,6 +273,7 @@
};
static const char *TargetName;
+ static constexpr Type WordType = IceType_i32;
static IceString getRegName(SizeT RegNum, Type Ty) {
assert(RegNum < RegisterSet::Reg_NUM);
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 9056648..41d24cc 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -123,7 +123,7 @@
}
// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
-// OperandList in lowerCall. std::max() was supposed to work, but it doesn't.
+// OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
} // end of anonymous namespace
@@ -239,7 +239,6 @@
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
- Variable *ReturnRegHi = nullptr;
if (Dest) {
switch (Dest->getType()) {
case IceType_NUM:
@@ -250,12 +249,8 @@
case IceType_i8:
case IceType_i16:
case IceType_i32:
- ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
- break;
case IceType_i64:
- // TODO(jpp): return i64 in a GPR.
- ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
- ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+ ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
break;
case IceType_f32:
case IceType_f64:
@@ -271,27 +266,16 @@
}
}
- Operand *CallTarget = legalize(Instr->getCallTarget());
+ Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm);
const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
if (NeedSandboxing) {
- if (llvm::isa<Constant>(CallTarget)) {
- _bundle_lock(InstBundleLock::Opt_AlignToEnd);
- } else {
- Variable *CallTargetVar = nullptr;
- _mov(CallTargetVar, CallTarget);
- _bundle_lock(InstBundleLock::Opt_AlignToEnd);
- const SizeT BundleSize =
- 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
- _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
- CallTarget = CallTargetVar;
- }
+ llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
}
Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
Context.insert(NewCall);
- if (NeedSandboxing)
- _bundle_unlock();
- if (ReturnRegHi)
- Context.insert(InstFakeDef::create(Func, ReturnRegHi));
+ if (NeedSandboxing) {
+ llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
+ }
// Add the appropriate offset to esp. The call instruction takes care
// of resetting the stack offset during emission.
@@ -315,25 +299,11 @@
assert(ReturnReg && "x86-64 always returns value on registers.");
- // Assign the result of the call to Dest.
- if (ReturnRegHi) {
- assert(Dest->getType() == IceType_i64);
- split64(Dest);
- Variable *DestLo = Dest->getLo();
- Variable *DestHi = Dest->getHi();
- _mov(DestLo, ReturnReg);
- _mov(DestHi, ReturnRegHi);
- return;
- }
-
- assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 ||
- Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
- Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
- isVectorType(Dest->getType()));
-
- if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
+ if (isVectorType(Dest->getType())) {
_movp(Dest, ReturnReg);
} else {
+ assert(isScalarFloatingType(Dest->getType()) ||
+ isScalarIntegerType(Dest->getType()));
_mov(Dest, ReturnReg);
}
}
@@ -356,36 +326,36 @@
++i) {
Variable *Arg = Args[i];
Type Ty = Arg->getType();
- if ((isVectorType(Ty) || isScalarFloatingType(Ty)) &&
- NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
- // Replace Arg in the argument list with the home register. Then
- // generate an instruction in the prolog to copy the home register
- // to the assigned location of Arg.
- int32_t RegNum = getRegisterForXmmArgNum(NumXmmArgs);
+ Variable *RegisterArg = nullptr;
+ int32_t RegNum = Variable::NoRegister;
+ if ((isVectorType(Ty) || isScalarFloatingType(Ty))) {
+ if (NumXmmArgs >= Traits::X86_MAX_XMM_ARGS) {
+ continue;
+ }
+ RegNum = getRegisterForXmmArgNum(NumXmmArgs);
++NumXmmArgs;
- Variable *RegisterArg = Func->makeVariable(Ty);
- if (BuildDefs::dump())
- RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
- RegisterArg->setRegNum(RegNum);
- RegisterArg->setIsArg();
- Arg->setIsArg(false);
-
- Args[i] = RegisterArg;
- Context.insert(InstAssign::create(Func, Arg, RegisterArg));
- } else if (isScalarIntegerType(Ty) &&
- NumGprArgs < Traits::X86_MAX_GPR_ARGS) {
- int32_t RegNum = getRegisterForGprArgNum(NumGprArgs);
+ RegisterArg = Func->makeVariable(Ty);
+ } else if (isScalarIntegerType(Ty)) {
+ if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) {
+ continue;
+ }
+ RegNum = getRegisterForGprArgNum(NumGprArgs);
++NumGprArgs;
- Variable *RegisterArg = Func->makeVariable(Ty);
- if (BuildDefs::dump())
- RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
- RegisterArg->setRegNum(RegNum);
- RegisterArg->setIsArg();
- Arg->setIsArg(false);
-
- Args[i] = RegisterArg;
- Context.insert(InstAssign::create(Func, Arg, RegisterArg));
+ RegisterArg = Func->makeVariable(Ty);
}
+ assert(RegNum != Variable::NoRegister);
+ assert(RegisterArg != nullptr);
+ // Replace Arg in the argument list with the home register. Then
+ // generate an instruction in the prolog to copy the home register
+ // to the assigned location of Arg.
+ if (BuildDefs::dump())
+ RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
+ RegisterArg->setRegNum(RegNum);
+ RegisterArg->setIsArg();
+ Arg->setIsArg(false);
+
+ Args[i] = RegisterArg;
+ Context.insert(InstAssign::create(Func, Arg, RegisterArg));
}
}
@@ -393,19 +363,11 @@
Variable *Reg = nullptr;
if (Inst->hasRetValue()) {
Operand *Src0 = legalize(Inst->getRetValue());
- // TODO(jpp): this is not needed.
- if (Src0->getType() == IceType_i64) {
- Variable *eax =
- legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
- Variable *edx =
- legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
- Reg = eax;
- Context.insert(InstFakeUse::create(Func, edx));
- } else if (isScalarFloatingType(Src0->getType())) {
- _fld(Src0);
- } else if (isVectorType(Src0->getType())) {
+ if (isVectorType(Src0->getType()) ||
+ isScalarFloatingType(Src0->getType())) {
Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
} else {
+ assert(isScalarIntegerType(Src0->getType()));
_mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
}
}
@@ -577,19 +539,17 @@
unsigned NumGPRArgs = 0;
for (Variable *Arg : Args) {
// Skip arguments passed in registers.
- if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
- ++NumXmmArgs;
- continue;
- }
- if (isScalarFloatingType(Arg->getType()) &&
- NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
- ++NumXmmArgs;
- continue;
- }
- if (isScalarIntegerType(Arg->getType()) &&
- NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
- ++NumGPRArgs;
- continue;
+ if (isVectorType(Arg->getType()) || isScalarFloatingType(Arg->getType())) {
+ if (NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
+ ++NumXmmArgs;
+ continue;
+ }
+ } else {
+ assert(isScalarIntegerType(Arg->getType()));
+ if (NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
+ ++NumGPRArgs;
+ continue;
+ }
}
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
}
@@ -679,23 +639,9 @@
}
}
- if (!Ctx->getFlags().getUseSandboxing())
- return;
- // Change the original ret instruction into a sandboxed return sequence.
- // t:ecx = pop
- // bundle_lock
- // and t, ~31
- // jmp *t
- // bundle_unlock
- // FakeUse <original_ret_operand>
- Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
- _pop(T_ecx);
- lowerIndirectJump(T_ecx);
- if (RI->getSrcSize()) {
- Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
- Context.insert(InstFakeUse::create(Func, RetValue));
+ if (Ctx->getFlags().getUseSandboxing()) {
+ llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
}
- RI->setDeleted();
}
void TargetX8664::emitJumpTable(const Cfg *Func,
@@ -858,8 +804,7 @@
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
for (const JumpTableData &JumpTable : Ctx->getJumpTables())
- // TODO(jpp): not 386.
- Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
+ Writer->writeJumpTable(JumpTable, TargetX8664::Traits::RelFixup);
} break;
case FT_Asm:
// Already emitted from Cfg
@@ -888,8 +833,8 @@
switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
- // TODO(jpp): not 386.
- Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
+ Writer->writeDataSection(Vars, TargetX8664::Traits::RelFixup,
+ SectionSuffix);
} break;
case FT_Asm:
case FT_Iasm: {
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h
index 89fc203..4a12004 100644
--- a/src/IceTargetLoweringX8664Traits.h
+++ b/src/IceTargetLoweringX8664Traits.h
@@ -66,7 +66,8 @@
using RegisterSet = ::Ice::RegX8664;
static const GPRRegister Encoded_Reg_Accumulator = RegX8664::Encoded_Reg_eax;
static const GPRRegister Encoded_Reg_Counter = RegX8664::Encoded_Reg_ecx;
- static const FixupKind PcRelFixup = llvm::ELF::R_386_PC32; // TODO(jpp): ???
+ static const FixupKind PcRelFixup = llvm::ELF::R_X86_64_PC32;
+ static const FixupKind RelFixup = llvm::ELF::R_X86_64_32S;
class Operand {
public:
@@ -270,8 +271,8 @@
static Address ofConstPool(Assembler *Asm, const Constant *Imm) {
// TODO(jpp): ???
- AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Imm);
- const RelocOffsetT Offset = 0;
+ AssemblerFixup *Fixup = Asm->createFixup(RelFixup, Imm);
+ const RelocOffsetT Offset = 4;
return Address(ABSOLUTE, Offset, Fixup);
}
};
@@ -293,6 +294,7 @@
};
static const char *TargetName;
+ static constexpr Type WordType = IceType_i64;
static IceString getRegName(SizeT RegNum, Type Ty) {
assert(RegNum < RegisterSet::Reg_NUM);
@@ -331,7 +333,7 @@
#define X(val, encode, name64, name32, name16, name8, scratch, preserved, \
stackptr, frameptr, isInt, isFP) \
(*IntegerRegisters)[RegisterSet::val] = isInt; \
- (*IntegerRegistersI8)[RegisterSet::val] = 1; \
+ (*IntegerRegistersI8)[RegisterSet::val] = isInt; \
(*FloatRegisters)[RegisterSet::val] = isFP; \
(*VectorRegisters)[RegisterSet::val] = isFP; \
(*ScratchRegs)[RegisterSet::val] = scratch;
@@ -450,7 +452,7 @@
/// address.
static const uint32_t X86_STACK_ALIGNMENT_BYTES;
/// Size of the return address on the stack
- static const uint32_t X86_RET_IP_SIZE_BYTES = 4;
+ static const uint32_t X86_RET_IP_SIZE_BYTES = 8;
/// The number of different NOP instructions
static const uint32_t X86_NUM_NOP_VARIANTS = 5;
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 342c97b..da863f4 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -21,6 +21,7 @@
#include "IceInst.h"
#include "IceSwitchLowering.h"
#include "IceTargetLowering.h"
+#include "IceUtils.h"
#include <type_traits>
#include <utility>
@@ -80,10 +81,9 @@
: Traits::RegisterSet::Reg_esp;
}
size_t typeWidthInBytesOnStack(Type Ty) const override {
- // Round up to the next multiple of 4 bytes. In particular, i1,
- // i8, and i16 are rounded up to 4 bytes.
- // TODO(jpp): this needs to round to multiples of 8 bytes in x86-64.
- return (typeWidthInBytes(Ty) + 3) & ~3;
+ // Round up to the next multiple of WordType bytes.
+ const uint32_t WordSizeInBytes = typeWidthInBytes(Traits::WordType);
+ return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes);
}
SizeT getMinJumpTableSize() const override { return 4; }
@@ -98,14 +98,40 @@
void emit(const ConstantDouble *C) const final;
void initNodeForLowering(CfgNode *Node) override;
- /// Ensure that a 64-bit Variable has been split into 2 32-bit
+ /// x86-32: Ensure that a 64-bit Variable has been split into 2 32-bit
/// Variables, creating them if necessary. This is needed for all
/// I64 operations, and it is needed for pushing F64 arguments for
/// function calls using the 32-bit push instruction (though the
/// latter could be done by directly writing to the stack).
- void split64(Variable *Var);
- Operand *loOperand(Operand *Operand);
- Operand *hiOperand(Operand *Operand);
+ ///
+ /// x86-64: Complains loudly if invoked because the cpu can handle
+ /// 64-bit types natively.
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, void>::type split64(Variable *Var);
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, void>::type split64(Variable *) {
+ llvm::report_fatal_error(
+ "Hey, yo! This is x86-64. Watcha doin'? (split64)");
+ }
+
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, Operand>::type *
+ loOperand(Operand *Operand);
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, Operand>::type *loOperand(Operand *) {
+ llvm::report_fatal_error(
+ "Hey, yo! This is x86-64. Watcha doin'? (loOperand)");
+ }
+
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, Operand>::type *
+ hiOperand(Operand *Operand);
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, Operand>::type *hiOperand(Operand *) {
+ llvm::report_fatal_error(
+ "Hey, yo! This is x86-64. Watcha doin'? (hiOperand)");
+ }
+
void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset, size_t &InArgsSizeBytes);
typename Traits::Address stackVarToAsmOperand(const Variable *Var) const;
@@ -128,6 +154,19 @@
void lowerExtractElement(const InstExtractElement *Inst) override;
void lowerFcmp(const InstFcmp *Inst) override;
void lowerIcmp(const InstIcmp *Inst) override;
+ /// Complains loudly if invoked because the cpu can handle 64-bit types
+ /// natively.
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, void>::type
+ lowerIcmp64(const InstIcmp *) {
+ llvm::report_fatal_error(
+ "Hey, yo! This is x86-64. Watcha doin'? (lowerIcmp64)");
+ }
+ /// x86lowerIcmp64 handles 64-bit icmp lowering.
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, void>::type
+ lowerIcmp64(const InstIcmp *Inst);
+
void lowerIntrinsicCall(const InstIntrinsicCall *Inst) override;
void lowerInsertElement(const InstInsertElement *Inst) override;
void lowerLoad(const InstLoad *Inst) override;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 8dad58e..e190b5d 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -77,6 +77,7 @@
public:
enum BoolFoldingProducerKind {
PK_None,
+ // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
PK_Icmp32,
PK_Icmp64,
PK_Fcmp,
@@ -120,7 +121,7 @@
typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
if (llvm::isa<InstIcmp>(Instr)) {
- if (Instr->getSrc(0)->getType() != IceType_i64)
+ if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
return PK_Icmp32;
return PK_None; // TODO(stichnot): actually PK_Icmp64;
}
@@ -643,10 +644,10 @@
} else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
// An AtomicLoad intrinsic qualifies as long as it has a valid
// memory ordering, and can be implemented in a single
- // instruction (i.e., not i64).
+ // instruction (i.e., not i64 on x86-32).
Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
if (ID == Intrinsics::AtomicLoad &&
- Intrin->getDest()->getType() != IceType_i64 &&
+ (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
Intrinsics::isMemoryOrderValid(
ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
LoadDest = Intrin->getDest();
@@ -724,6 +725,10 @@
template <class Machine>
Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
+ // Special case: never allow partial reads/writes to/from %rBP and %rSP.
+ if (RegNum == Traits::RegisterSet::Reg_esp ||
+ RegNum == Traits::RegisterSet::Reg_ebp)
+ Ty = Traits::WordType;
if (Ty == IceType_void)
Ty = IceType_i32;
if (PhysicalRegisters[Ty].empty())
@@ -770,7 +775,7 @@
}
if (Offset)
Str << Offset;
- const Type FrameSPTy = IceType_i32;
+ const Type FrameSPTy = Traits::WordType;
Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
}
@@ -810,8 +815,7 @@
Variable *Lo = Arg->getLo();
Variable *Hi = Arg->getHi();
Type Ty = Arg->getType();
- if (Lo && Hi && Ty == IceType_i64) {
- // TODO(jpp): This special case is not needed for x86-64.
+ if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) {
assert(Lo->getType() != IceType_i64); // don't want infinite recursion
assert(Hi->getType() != IceType_i64); // don't want infinite recursion
finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
@@ -824,7 +828,7 @@
Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
if (Arg->hasReg()) {
- assert(Ty != IceType_i64);
+ assert(Ty != IceType_i64 || Traits::Is64Bit);
typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
if (isVectorType(Arg->getType())) {
@@ -840,11 +844,13 @@
}
template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
- // TODO(jpp): this is wrong for x86-64.
- return IceType_i32;
+ return Traits::WordType;
}
-template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {
+template <class Machine>
+template <typename T>
+typename std::enable_if<!T::Is64Bit, void>::type
+TargetX86Base<Machine>::split64(Variable *Var) {
switch (Var->getType()) {
default:
return;
@@ -876,7 +882,9 @@
}
template <class Machine>
-Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) {
+template <typename T>
+typename std::enable_if<!T::Is64Bit, Operand>::type *
+TargetX86Base<Machine>::loOperand(Operand *Operand) {
assert(Operand->getType() == IceType_i64 ||
Operand->getType() == IceType_f64);
if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
@@ -905,7 +913,9 @@
}
template <class Machine>
-Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) {
+template <typename T>
+typename std::enable_if<!T::Is64Bit, Operand>::type *
+TargetX86Base<Machine>::hiOperand(Operand *Operand) {
assert(Operand->getType() == IceType_i64 ||
Operand->getType() == IceType_f64);
if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
@@ -1107,8 +1117,8 @@
if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
std::swap(Src0, Src1);
}
- if (Dest->getType() == IceType_i64) {
- // These helper-call-involved instructions are lowered in this
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ // These x86-32 helper-call-involved instructions are lowered in this
// separate switch. This is because loOperand() and hiOperand()
// may insert redundant instructions for constant blinding and
// pooling. Such redundant instructions will fail liveness analysis
@@ -1656,7 +1666,8 @@
Context.insert(InstFakeUse::create(Func, T_eax));
} else {
Constant *Zero = Ctx->getConstantZero(IceType_i32);
- _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
+ T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
+ _mov(T_edx, Zero);
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
_div(T_edx, Src1, T);
_mov(Dest, T_edx);
@@ -1721,7 +1732,7 @@
_mov(Dest, T);
Context.insert(InstFakeUse::create(Func, T_eax));
} else {
- T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+ T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
_cbwdq(T_edx, T);
_idiv(T_edx, Src1, T);
@@ -1765,7 +1776,7 @@
Variable *Dest = Inst->getDest();
Operand *Src0 = Inst->getSrc(0);
assert(Dest->getType() == Src0->getType());
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
Src0 = legalize(Src0);
Operand *Src0Lo = loOperand(Src0);
Operand *Src0Hi = hiOperand(Src0);
@@ -1870,7 +1881,7 @@
_psra(T, ShiftConstant);
_movp(Dest, T);
}
- } else if (Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
Constant *Shift = Ctx->getConstantInt32(31);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
@@ -1930,7 +1941,7 @@
_movp(T, Src0RM);
_pand(T, OneMask);
_movp(Dest, T);
- } else if (Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// t1=movzx src; dst.lo=t1; dst.hi=0
Constant *Zero = Ctx->getConstantZero(IceType_i32);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
@@ -1951,13 +1962,16 @@
// t = Src0RM; t &= 1; Dest = t
Constant *One = Ctx->getConstantInt32(1);
Type DestTy = Dest->getType();
- Variable *T;
+ Variable *T = nullptr;
if (DestTy == IceType_i8) {
- T = makeReg(DestTy);
_mov(T, Src0RM);
} else {
+ assert(DestTy != IceType_i1);
+ assert(Traits::Is64Bit || DestTy != IceType_i64);
// Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
- T = makeReg(IceType_i32);
+ // In x86-64 we need to widen T to 64-bits to ensure that T -- if
+ // written to the stack (i.e., in -Om1) will be fully zero-extended.
+ T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32);
_movzx(T, Src0RM);
}
_and(T, One);
@@ -1982,7 +1996,7 @@
_movp(Dest, T);
} else {
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
- if (Src0->getType() == IceType_i64)
+ if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
Src0 = loOperand(Src0);
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
// t1 = trunc Src0RM; Dest = t1
@@ -2013,7 +2027,7 @@
Variable *T = makeReg(Dest->getType());
_cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
_movp(Dest, T);
- } else if (Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// Use a helper for converting floating-point values to 64-bit
// integers. SSE2 appears to have no way to convert from xmm
// registers to something like the edx:eax register pair, and
@@ -2032,7 +2046,15 @@
} else {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
- Variable *T_1 = makeReg(IceType_i32);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Dest->getType() != IceType_i64);
+ T_1 = makeReg(IceType_i32);
+ }
+ // cvt() requires its integer argument to be a GPR.
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
@@ -2050,14 +2072,18 @@
Call->addArg(Inst->getSrc(0));
lowerCall(Call);
} else if (Dest->getType() == IceType_i64 ||
- Dest->getType() == IceType_i32) {
+ (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
// Use a helper for both x86-32 and x86-64.
- split64(Dest);
+ if (!Traits::Is64Bit)
+ split64(Dest);
const SizeT MaxSrcs = 1;
Type DestType = Dest->getType();
Type SrcType = Inst->getSrc(0)->getType();
IceString TargetString;
- if (isInt32Asserting32Or64(DestType)) {
+ if (Traits::Is64Bit) {
+ TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
+ : H_fptoui_f64_i64;
+ } else if (isInt32Asserting32Or64(DestType)) {
TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
: H_fptoui_f64_i32;
} else {
@@ -2071,7 +2097,15 @@
} else {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
- Variable *T_1 = makeReg(IceType_i32);
+ assert(Dest->getType() != IceType_i64);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Dest->getType() == IceType_i32) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Dest->getType() != IceType_i32);
+ T_1 = makeReg(IceType_i32);
+ }
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
@@ -2090,7 +2124,7 @@
Variable *T = makeReg(Dest->getType());
_cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
_movp(Dest, T);
- } else if (Inst->getSrc(0)->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
// Use a helper for x86-32.
const SizeT MaxSrcs = 1;
Type DestType = Dest->getType();
@@ -2106,9 +2140,16 @@
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// Sign-extend the operand.
// t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
- Variable *T_1 = makeReg(IceType_i32);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Src0RM->getType() != IceType_i64);
+ T_1 = makeReg(IceType_i32);
+ }
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
- if (Src0RM->getType() == IceType_i32)
+ if (Src0RM->getType() == T_1->getType())
_mov(T_1, Src0RM);
else
_movsx(T_1, Src0RM);
@@ -2126,7 +2167,7 @@
Call->addArg(Src0);
lowerCall(Call);
} else if (Src0->getType() == IceType_i64 ||
- Src0->getType() == IceType_i32) {
+ (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
// Use a helper for x86-32 and x86-64. Also use a helper for
// i32 on x86-32.
const SizeT MaxSrcs = 1;
@@ -2147,9 +2188,17 @@
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
// Zero-extend the operand.
// t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
- Variable *T_1 = makeReg(IceType_i32);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Src0RM->getType() != IceType_i64);
+ assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
+ T_1 = makeReg(IceType_i32);
+ }
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
- if (Src0RM->getType() == IceType_i32)
+ if (Src0RM->getType() == T_1->getType())
_mov(T_1, Src0RM);
else
_movzx(T_1, Src0RM);
@@ -2205,77 +2254,96 @@
_mov(Dest, Spill);
} break;
case IceType_i64: {
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- assert(Src0RM->getType() == IceType_f64);
- // a.i64 = bitcast b.f64 ==>
- // s.f64 = spill b.f64
- // t_lo.i32 = lo(s.f64)
- // a_lo.i32 = t_lo.i32
- // t_hi.i32 = hi(s.f64)
- // a_hi.i32 = t_hi.i32
- Operand *SpillLo, *SpillHi;
- if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
- typename Traits::SpillVariable *SpillVar =
- Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
- SpillVar->setLinkedTo(Src0Var);
- Variable *Spill = SpillVar;
- Spill->setWeight(RegWeight::Zero);
- _movq(Spill, Src0RM);
- SpillLo = Traits::VariableSplit::create(Func, Spill,
- Traits::VariableSplit::Low);
- SpillHi = Traits::VariableSplit::create(Func, Spill,
- Traits::VariableSplit::High);
+ assert(Src0->getType() == IceType_f64);
+ if (Traits::Is64Bit) {
+ // Movd requires its fp argument (in this case, the bitcast source) to
+ // be an xmm register.
+ Variable *Src0R = legalizeToReg(Src0);
+ Variable *T = makeReg(IceType_i64);
+ _movd(T, Src0R);
+ _mov(Dest, T);
} else {
- SpillLo = loOperand(Src0RM);
- SpillHi = hiOperand(Src0RM);
+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ // a.i64 = bitcast b.f64 ==>
+ // s.f64 = spill b.f64
+ // t_lo.i32 = lo(s.f64)
+ // a_lo.i32 = t_lo.i32
+ // t_hi.i32 = hi(s.f64)
+ // a_hi.i32 = t_hi.i32
+ Operand *SpillLo, *SpillHi;
+ if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
+ typename Traits::SpillVariable *SpillVar =
+ Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
+ SpillVar->setLinkedTo(Src0Var);
+ Variable *Spill = SpillVar;
+ Spill->setWeight(RegWeight::Zero);
+ _movq(Spill, Src0RM);
+ SpillLo = Traits::VariableSplit::create(Func, Spill,
+ Traits::VariableSplit::Low);
+ SpillHi = Traits::VariableSplit::create(Func, Spill,
+ Traits::VariableSplit::High);
+ } else {
+ SpillLo = loOperand(Src0RM);
+ SpillHi = hiOperand(Src0RM);
+ }
+
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Variable *T_Lo = makeReg(IceType_i32);
+ Variable *T_Hi = makeReg(IceType_i32);
+
+ _mov(T_Lo, SpillLo);
+ _mov(DestLo, T_Lo);
+ _mov(T_Hi, SpillHi);
+ _mov(DestHi, T_Hi);
}
-
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *T_Lo = makeReg(IceType_i32);
- Variable *T_Hi = makeReg(IceType_i32);
-
- _mov(T_Lo, SpillLo);
- _mov(DestLo, T_Lo);
- _mov(T_Hi, SpillHi);
- _mov(DestHi, T_Hi);
} break;
case IceType_f64: {
- Src0 = legalize(Src0);
assert(Src0->getType() == IceType_i64);
- if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
- Variable *T = Func->makeVariable(Dest->getType());
- _movq(T, Src0);
- _movq(Dest, T);
- break;
- }
- // a.f64 = bitcast b.i64 ==>
- // t_lo.i32 = b_lo.i32
- // FakeDef(s.f64)
- // lo(s.f64) = t_lo.i32
- // t_hi.i32 = b_hi.i32
- // hi(s.f64) = t_hi.i32
- // a.f64 = s.f64
- typename Traits::SpillVariable *SpillVar =
- Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
- SpillVar->setLinkedTo(Dest);
- Variable *Spill = SpillVar;
- Spill->setWeight(RegWeight::Zero);
+ if (Traits::Is64Bit) {
+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ Variable *T = makeReg(IceType_f64);
+ // Movd requires its fp argument (in this case, the bitcast destination)
+ // to be an xmm register.
+ T->setWeightInfinite();
+ _movd(T, Src0RM);
+ _mov(Dest, T);
+ } else {
+ Src0 = legalize(Src0);
+ if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
+ Variable *T = Func->makeVariable(Dest->getType());
+ _movq(T, Src0);
+ _movq(Dest, T);
+ break;
+ }
+ // a.f64 = bitcast b.i64 ==>
+ // t_lo.i32 = b_lo.i32
+ // FakeDef(s.f64)
+ // lo(s.f64) = t_lo.i32
+ // t_hi.i32 = b_hi.i32
+ // hi(s.f64) = t_hi.i32
+ // a.f64 = s.f64
+ typename Traits::SpillVariable *SpillVar =
+ Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
+ SpillVar->setLinkedTo(Dest);
+ Variable *Spill = SpillVar;
+ Spill->setWeight(RegWeight::Zero);
- Variable *T_Lo = nullptr, *T_Hi = nullptr;
- typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
- Func, Spill, Traits::VariableSplit::Low);
- typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
- Func, Spill, Traits::VariableSplit::High);
- _mov(T_Lo, loOperand(Src0));
- // Technically, the Spill is defined after the _store happens, but
- // SpillLo is considered a "use" of Spill so define Spill before it
- // is used.
- Context.insert(InstFakeDef::create(Func, Spill));
- _store(T_Lo, SpillLo);
- _mov(T_Hi, hiOperand(Src0));
- _store(T_Hi, SpillHi);
- _movq(Dest, Spill);
+ Variable *T_Lo = nullptr, *T_Hi = nullptr;
+ typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
+ Func, Spill, Traits::VariableSplit::Low);
+ typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
+ Func, Spill, Traits::VariableSplit::High);
+ _mov(T_Lo, loOperand(Src0));
+ // Technically, the Spill is defined after the _store happens, but
+ // SpillLo is considered a "use" of Spill so define Spill before it
+ // is used.
+ Context.insert(InstFakeDef::create(Func, Spill));
+ _store(T_Lo, SpillLo);
+ _mov(T_Hi, hiOperand(Src0));
+ _store(T_Hi, SpillHi);
+ _movq(Dest, Spill);
+ }
} break;
case IceType_v8i1: {
assert(Src0->getType() == IceType_i8);
@@ -2615,32 +2683,8 @@
return;
}
- // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
- if (Src0->getType() == IceType_i64) {
- InstIcmp::ICond Condition = Inst->getCondition();
- size_t Index = static_cast<size_t>(Condition);
- assert(Index < Traits::TableIcmp64Size);
- Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
- Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
- Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
- Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- Constant *One = Ctx->getConstantInt32(1);
- typename Traits::Insts::Label *LabelFalse =
- Traits::Insts::Label::create(Func, this);
- typename Traits::Insts::Label *LabelTrue =
- Traits::Insts::Label::create(Func, this);
- _mov(Dest, One);
- _cmp(Src0HiRM, Src1HiRI);
- if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
- _br(Traits::TableIcmp64[Index].C1, LabelTrue);
- if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
- _br(Traits::TableIcmp64[Index].C2, LabelFalse);
- _cmp(Src0LoRM, Src1LoRI);
- _br(Traits::TableIcmp64[Index].C3, LabelTrue);
- Context.insert(LabelFalse);
- _mov_nonkillable(Dest, Zero);
- Context.insert(LabelTrue);
+ if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
+ lowerIcmp64(Inst);
return;
}
@@ -2650,6 +2694,40 @@
_setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));
}
+template <typename Machine>
+template <typename T>
+typename std::enable_if<!T::Is64Bit, void>::type
+TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Inst) {
+ // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
+ Operand *Src0 = legalize(Inst->getSrc(0));
+ Operand *Src1 = legalize(Inst->getSrc(1));
+ Variable *Dest = Inst->getDest();
+ InstIcmp::ICond Condition = Inst->getCondition();
+ size_t Index = static_cast<size_t>(Condition);
+ assert(Index < Traits::TableIcmp64Size);
+ Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
+ Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
+ Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
+ Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
+ Constant *Zero = Ctx->getConstantZero(IceType_i32);
+ Constant *One = Ctx->getConstantInt32(1);
+ typename Traits::Insts::Label *LabelFalse =
+ Traits::Insts::Label::create(Func, this);
+ typename Traits::Insts::Label *LabelTrue =
+ Traits::Insts::Label::create(Func, this);
+ _mov(Dest, One);
+ _cmp(Src0HiRM, Src1HiRI);
+ if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
+ _br(Traits::TableIcmp64[Index].C1, LabelTrue);
+ if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
+ _br(Traits::TableIcmp64[Index].C2, LabelFalse);
+ _cmp(Src0LoRM, Src1LoRI);
+ _br(Traits::TableIcmp64[Index].C3, LabelTrue);
+ Context.insert(LabelFalse);
+ _mov_nonkillable(Dest, Zero);
+ Context.insert(LabelTrue);
+}
+
template <class Machine>
void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
Operand *SourceVectNotLegalized = Inst->getSrc(0);
@@ -2848,7 +2926,7 @@
return;
}
Variable *Dest = Instr->getDest();
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// Follow what GCC does and use a movq instead of what lowerLoad()
// normally does (split the load into two).
// Thus, this skips load/arithmetic op folding. Load/arithmetic folding
@@ -2898,7 +2976,7 @@
// Add a fence after the store to make it visible.
Operand *Value = Instr->getArg(0);
Operand *Ptr = Instr->getArg(1);
- if (Value->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
// Use a movq instead of what lowerStore() normally does
// (split the store into two), following what GCC does.
// Cast the bits from int -> to an xmm register first.
@@ -2922,7 +3000,7 @@
Operand *Val = Instr->getArg(0);
// In 32-bit mode, bswap only works on 32-bit arguments, and the
// argument must be a register. Use rotate left for 16-bit bswap.
- if (Val->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
Val = legalizeUndef(Val);
Variable *T_Lo = legalizeToReg(loOperand(Val));
Variable *T_Hi = legalizeToReg(hiOperand(Val));
@@ -2932,7 +3010,8 @@
_bswap(T_Hi);
_mov(DestLo, T_Hi);
_mov(DestHi, T_Lo);
- } else if (Val->getType() == IceType_i32) {
+ } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) ||
+ Val->getType() == IceType_i32) {
Variable *T = legalizeToReg(Val);
_bswap(T);
_mov(Dest, T);
@@ -2949,11 +3028,28 @@
}
case Intrinsics::Ctpop: {
Variable *Dest = Instr->getDest();
+ Variable *T = nullptr;
Operand *Val = Instr->getArg(0);
- InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
- ? H_call_ctpop_i32
- : H_call_ctpop_i64,
- Dest, 1);
+ Type ValTy = Val->getType();
+ assert(ValTy == IceType_i32 || ValTy == IceType_i64);
+
+ if (!Traits::Is64Bit) {
+ T = Dest;
+ } else {
+ T = makeReg(IceType_i64);
+ if (ValTy == IceType_i32) {
+ // in x86-64, __popcountsi2 is not defined, so we cheat a bit by
+ // converting it to a 64-bit value, and using ctpop_i64. _movzx should
+ // ensure we will not have any bits set on Val's upper 32 bits.
+ Variable *V = makeReg(IceType_i64);
+ _movzx(V, Val);
+ Val = V;
+ }
+ ValTy = IceType_i64;
+ }
+
+ InstCall *Call = makeHelperCall(
+ ValTy == IceType_i32 ? H_call_ctpop_i32 : H_call_ctpop_i64, T, 1);
Call->addArg(Val);
lowerCall(Call);
// The popcount helpers always return 32-bit values, while the intrinsic's
@@ -2961,10 +3057,33 @@
// (in 64-bit mode). Thus, clear the upper bits of the dest just in case
// the user doesn't do that in the IR. If the user does that in the IR,
// then this zero'ing instruction is dead and gets optimized out.
- if (Val->getType() == IceType_i64) {
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- _mov(DestHi, Zero);
+ if (!Traits::Is64Bit) {
+ assert(T == Dest);
+ if (Val->getType() == IceType_i64) {
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Constant *Zero = Ctx->getConstantZero(IceType_i32);
+ _mov(DestHi, Zero);
+ }
+ } else {
+ assert(Val->getType() == IceType_i64);
+ // T is 64 bit. It needs to be copied to dest. We need to:
+ //
+ // T_1.32 = trunc T.64 to i32
+ // T_2.64 = zext T_1.32 to i64
+ // Dest.<<right_size>> = T_2.<<right_size>>
+ //
+ // which ensures the upper 32 bits will always be cleared. Just doing a
+ //
+ // mov Dest.32 = trunc T.32 to i32
+ //
+ // is dangerous because there's a chance the compiler will optimize this
+ // copy out. To use _movzx we need two new registers (one 32-, and
+ // another 64-bit wide.)
+ Variable *T_1 = makeReg(IceType_i32);
+ _mov(T_1, T);
+ Variable *T_2 = makeReg(IceType_i64);
+ _movzx(T_2, T_1);
+ _mov(Dest, T_2);
}
return;
}
@@ -2974,7 +3093,7 @@
Operand *Val = legalize(Instr->getArg(0));
Operand *FirstVal;
Operand *SecondVal = nullptr;
- if (Val->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
FirstVal = loOperand(Val);
SecondVal = hiOperand(Val);
} else {
@@ -2991,7 +3110,7 @@
Operand *Val = legalize(Instr->getArg(0));
Operand *FirstVal;
Operand *SecondVal = nullptr;
- if (Val->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
FirstVal = hiOperand(Val);
SecondVal = loOperand(Val);
} else {
@@ -3099,7 +3218,7 @@
void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
Operand *Ptr, Operand *Expected,
Operand *Desired) {
- if (Expected->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Expected->getType() == IceType_i64) {
// Reserve the pre-colored registers first, before adding any more
// infinite-weight variables from formMemoryOperand's legalization.
Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
@@ -3217,7 +3336,7 @@
Func->setError("Unknown AtomicRMW operation");
return;
case Intrinsics::AtomicAdd: {
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// All the fall-through paths must set this to true, but use this
// for asserting.
NeedsCmpxchg = true;
@@ -3235,7 +3354,7 @@
return;
}
case Intrinsics::AtomicSub: {
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
NeedsCmpxchg = true;
Op_Lo = &TargetX86Base<Machine>::_sub;
Op_Hi = &TargetX86Base<Machine>::_sbb;
@@ -3272,7 +3391,7 @@
Op_Hi = &TargetX86Base<Machine>::_xor;
break;
case Intrinsics::AtomicExchange:
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
NeedsCmpxchg = true;
// NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
// just need to be moved to the ecx and ebx registers.
@@ -3326,7 +3445,7 @@
// If Op_{Lo,Hi} are nullptr, then just copy the value.
Val = legalize(Val);
Type Ty = Val->getType();
- if (Ty == IceType_i64) {
+ if (!Traits::Is64Bit && Ty == IceType_i64) {
Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
@@ -3458,7 +3577,7 @@
if (!Cttz) {
_xor(T_Dest, ThirtyOne);
}
- if (Ty == IceType_i32) {
+ if (Traits::Is64Bit || Ty == IceType_i32) {
_mov(Dest, T_Dest);
return;
}
@@ -4138,7 +4257,7 @@
std::swap(SrcT, SrcF);
Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
}
- if (DestTy == IceType_i64) {
+ if (!Traits::Is64Bit && DestTy == IceType_i64) {
SrcT = legalizeUndef(SrcT);
SrcF = legalizeUndef(SrcF);
// Set the low portion.
@@ -4160,7 +4279,8 @@
return;
}
- assert(DestTy == IceType_i16 || DestTy == IceType_i32);
+ assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
+ (Traits::Is64Bit && DestTy == IceType_i64));
Variable *T = nullptr;
SrcF = legalize(SrcF);
_mov(T, SrcF);
@@ -4177,7 +4297,7 @@
formMemoryOperand(Addr, Value->getType());
Type Ty = NewAddr->getType();
- if (Ty == IceType_i64) {
+ if (!Traits::Is64Bit && Ty == IceType_i64) {
Value = legalizeUndef(Value);
Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
@@ -4225,7 +4345,7 @@
uint64_t Min, uint64_t Max) {
// TODO(ascull): 64-bit should not reach here but only because it is not
// implemented yet. This should be able to handle the 64-bit case.
- assert(Comparison->getType() != IceType_i64);
+ assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
// Subtracting 0 is a nop so don't do it
if (Min != 0) {
// Avoid clobbering the comparison by copying it
@@ -4324,7 +4444,7 @@
assert(CaseClusters.size() != 0); // Should always be at least one
- if (Src0->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
Src0 = legalize(Src0); // get Base/Index into physical registers
Operand *Src0Lo = loOperand(Src0);
Operand *Src0Hi = hiOperand(Src0);
@@ -4529,7 +4649,7 @@
Operand *Src = RMW->getData();
Type Ty = Src->getType();
typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
- if (Ty == IceType_i64) {
+ if (!Traits::Is64Bit && Ty == IceType_i64) {
Src = legalizeUndef(Src);
Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
@@ -4563,7 +4683,8 @@
return;
}
} else {
- // i8, i16, i32
+ // x86-32: i8, i16, i32
+ // x86-64: i8, i16, i32, i64
switch (RMW->getOp()) {
default:
// TODO(stichnot): Implement other arithmetic operators.
@@ -4608,8 +4729,14 @@
/// turned into zeroes, since loOperand() and hiOperand() don't expect
/// Undef input.
template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
- // Pause constant blinding or pooling, blinding or pooling will be done later
- // during phi lowering assignments
+ if (Traits::Is64Bit) {
+ // On x86-64 we don't need to prelower phis -- the architecture can handle
+ // 64-bit integer natively.
+ return;
+ }
+
+ // Pause constant blinding or pooling, blinding or pooling will be done
+ // later during phi lowering assignments
BoolFlagSaver B(RandomizationPoolingPaused, true);
PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
this, Context.getNode(), Func);
@@ -4770,6 +4897,16 @@
// There should be no constants of vector type (other than undef).
assert(!isVectorType(Ty));
+ // If the operand is a 64 bit constant integer we need to legalize it to a
+ // register in x86-64.
+ if (Traits::Is64Bit) {
+ if (llvm::isa<ConstantInteger64>(Const)) {
+ Variable *V = copyToReg(Const, RegNum);
+ V->setWeightInfinite();
+ return V;
+ }
+ }
+
// If the operand is an 32 bit constant integer, we should check
// whether we need to randomize it or pool it.
if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
@@ -4907,7 +5044,7 @@
template <class Machine>
Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
// There aren't any 64-bit integer registers for x86-32.
- assert(Type != IceType_i64);
+ assert(Traits::Is64Bit || Type != IceType_i64);
Variable *Reg = Func->makeVariable(Type);
if (RegNum == Variable::NoRegister)
Reg->setWeightInfinite();
@@ -4939,8 +5076,15 @@
}
template <class Machine>
-void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const {
- llvm::report_fatal_error("Not expecting to emit 64-bit integers");
+void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const {
+ if (!Traits::Is64Bit) {
+ llvm::report_fatal_error("Not expecting to emit 64-bit integers");
+ } else {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Ctx->getStrEmit();
+ Str << getConstantPrefix() << C->getValue();
+ }
}
template <class Machine>
@@ -5085,8 +5229,8 @@
MemOperand->getBase(), Mask1);
// If we have already assigned a physical register, we must come from
// advancedPhiLowering()=>lowerAssign(). In this case we should reuse
- // the assigned register as this assignment is that start of its use-def
- // chain. So we add RegNum argument here.
+ // the assigned register as this assignment is that start of its
+ // use-def chain. So we add RegNum argument here.
Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
_lea(RegTemp, TempMemOperand);
// As source operand doesn't use the dstreg, we don't need to add