Subzero. Implements x86-64 lowerCall.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077
R=jvoung@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/1266673003.
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 1bcb857..0a426c7 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -881,20 +881,19 @@
JumpTableDataList GlobalContext::getJumpTables() {
JumpTableDataList JumpTables(*getJumpTableList());
if (getFlags().shouldReorderPooledConstants()) {
- // If reorder-pooled-constants option is set to true, we need to shuffle the
- // constant pool before emitting it.
- RandomShuffle(JumpTables.begin(), JumpTables.end(), [this](uint64_t N) {
- return (uint32_t)getRNG().next(N);
- });
+ // If reorder-pooled-constants option is set to true, we need to shuffle the
+ // constant pool before emitting it.
+ RandomShuffle(JumpTables.begin(), JumpTables.end(),
+ [this](uint64_t N) { return (uint32_t)getRNG().next(N); });
} else {
// Make order deterministic by sorting into functions and then ID of the
// jump table within that function.
- std::sort(JumpTables.begin(), JumpTables.end(), [](const JumpTableData &A,
- const JumpTableData &B) {
- if (A.getFunctionName() != B.getFunctionName())
- return A.getFunctionName() < B.getFunctionName();
- return A.getId() < B.getId();
- });
+ std::sort(JumpTables.begin(), JumpTables.end(),
+ [](const JumpTableData &A, const JumpTableData &B) {
+ if (A.getFunctionName() != B.getFunctionName())
+ return A.getFunctionName() < B.getFunctionName();
+ return A.getId() < B.getId();
+ });
}
return JumpTables;
}
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 44939dd..d765660 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -20,9 +20,6 @@
///
/// ::Ice::X8632::Traits::Insts::Mov::create
///
-/// In the future, this file might be used to declare X8632 specific
-/// instructions (e.g., FLD, and FSTP.)
-///
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICEINSTX8632_H
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index a42e122..8adfab2 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -21,6 +21,14 @@
namespace Ice {
+//------------------------------------------------------------------------------
+// ______ ______ ______ __ ______ ______
+// /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\
+// \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \
+// \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\
+// \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/
+//
+//------------------------------------------------------------------------------
namespace X86Internal {
const MachineTraits<TargetX8632>::TableFcmpType
MachineTraits<TargetX8632>::TableFcmp[] = {
@@ -399,4 +407,214 @@
} // end of namespace dummy3
} // end of anonymous namespace
+//------------------------------------------------------------------------------
+// __ ______ __ __ ______ ______ __ __ __ ______
+// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
+// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
+// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
+// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
+//
+//------------------------------------------------------------------------------
+void TargetX8632::lowerCall(const InstCall *Instr) {
+ // x86-32 calling convention:
+ //
+ // * At the point before the call, the stack must be aligned to 16
+ // bytes.
+ //
+ // * The first four arguments of vector type, regardless of their
+ // position relative to the other arguments in the argument list, are
+ // placed in registers xmm0 - xmm3.
+ //
+ // * Other arguments are pushed onto the stack in right-to-left order,
+ // such that the left-most argument ends up on the top of the stack at
+ // the lowest memory address.
+ //
+ // * Stack arguments of vector type are aligned to start at the next
+ // highest multiple of 16 bytes. Other stack arguments are aligned to
+ // 4 bytes.
+ //
+ // This intends to match the section "IA-32 Function Calling
+ // Convention" of the document "OS X ABI Function Call Guide" by
+ // Apple.
+ NeedsStackAlignment = true;
+
+ typedef std::vector<Operand *> OperandList;
+ OperandList XmmArgs;
+ OperandList StackArgs, StackArgLocations;
+ uint32_t ParameterAreaSizeBytes = 0;
+
+ // Classify each argument operand according to the location where the
+ // argument is passed.
+ for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
+ Operand *Arg = Instr->getArg(i);
+ Type Ty = Arg->getType();
+ // The PNaCl ABI requires the width of arguments to be at least 32 bits.
+ assert(typeWidthInBytes(Ty) >= 4);
+ if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
+ XmmArgs.push_back(Arg);
+ } else {
+ StackArgs.push_back(Arg);
+ if (isVectorType(Arg->getType())) {
+ ParameterAreaSizeBytes =
+ Traits::applyStackAlignment(ParameterAreaSizeBytes);
+ }
+ Variable *esp =
+ Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+ Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
+ StackArgLocations.push_back(
+ Traits::X86OperandMem::create(Func, Ty, esp, Loc));
+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
+ }
+ }
+
+ // Adjust the parameter area so that the stack is aligned. It is
+ // assumed that the stack is already aligned at the start of the
+ // calling sequence.
+ ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
+
+ // Subtract the appropriate amount for the argument area. This also
+ // takes care of setting the stack adjustment during emission.
+ //
+ // TODO: If for some reason the call instruction gets dead-code
+ // eliminated after lowering, we would need to ensure that the
+ // pre-call and the post-call esp adjustment get eliminated as well.
+ if (ParameterAreaSizeBytes) {
+ _adjust_stack(ParameterAreaSizeBytes);
+ }
+
+ // Copy arguments that are passed on the stack to the appropriate
+ // stack locations.
+ for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
+ lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
+ }
+
+ // Copy arguments to be passed in registers to the appropriate
+ // registers.
+ // TODO: Investigate the impact of lowering arguments passed in
+ // registers after lowering stack arguments as opposed to the other
+ // way around. Lowering register arguments after stack arguments may
+ // reduce register pressure. On the other hand, lowering register
+ // arguments first (before stack arguments) may result in more compact
+ // code, as the memory operand displacements may end up being smaller
+ // before any stack adjustment is done.
+ for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
+ Variable *Reg =
+ legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
+ // Generate a FakeUse of register arguments so that they do not get
+ // dead code eliminated as a result of the FakeKill of scratch
+ // registers after the call.
+ Context.insert(InstFakeUse::create(Func, Reg));
+ }
+ // Generate the call instruction. Assign its result to a temporary
+ // with high register allocation weight.
+ Variable *Dest = Instr->getDest();
+ // ReturnReg doubles as ReturnRegLo as necessary.
+ Variable *ReturnReg = nullptr;
+ Variable *ReturnRegHi = nullptr;
+ if (Dest) {
+ switch (Dest->getType()) {
+ case IceType_NUM:
+ llvm_unreachable("Invalid Call dest type");
+ break;
+ case IceType_void:
+ break;
+ case IceType_i1:
+ case IceType_i8:
+ case IceType_i16:
+ case IceType_i32:
+ ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
+ break;
+ case IceType_i64:
+ ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+ ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+ break;
+ case IceType_f32:
+ case IceType_f64:
+ // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
+ // the fstp instruction.
+ break;
+ case IceType_v4i1:
+ case IceType_v8i1:
+ case IceType_v16i1:
+ case IceType_v16i8:
+ case IceType_v8i16:
+ case IceType_v4i32:
+ case IceType_v4f32:
+ ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
+ break;
+ }
+ }
+ Operand *CallTarget = legalize(Instr->getCallTarget());
+ const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
+ if (NeedSandboxing) {
+ if (llvm::isa<Constant>(CallTarget)) {
+ _bundle_lock(InstBundleLock::Opt_AlignToEnd);
+ } else {
+ Variable *CallTargetVar = nullptr;
+ _mov(CallTargetVar, CallTarget);
+ _bundle_lock(InstBundleLock::Opt_AlignToEnd);
+ const SizeT BundleSize =
+ 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
+ _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
+ CallTarget = CallTargetVar;
+ }
+ }
+ Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
+ Context.insert(NewCall);
+ if (NeedSandboxing)
+ _bundle_unlock();
+ if (ReturnRegHi)
+ Context.insert(InstFakeDef::create(Func, ReturnRegHi));
+
+ // Add the appropriate offset to esp. The call instruction takes care
+ // of resetting the stack offset during emission.
+ if (ParameterAreaSizeBytes) {
+ Variable *esp =
+ Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+ _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
+ }
+
+ // Insert a register-kill pseudo instruction.
+ Context.insert(InstFakeKill::create(Func, NewCall));
+
+ // Generate a FakeUse to keep the call live if necessary.
+ if (Instr->hasSideEffects() && ReturnReg) {
+ Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
+ Context.insert(FakeUse);
+ }
+
+ if (!Dest)
+ return;
+
+ // Assign the result of the call to Dest.
+ if (ReturnReg) {
+ if (ReturnRegHi) {
+ assert(Dest->getType() == IceType_i64);
+ split64(Dest);
+ Variable *DestLo = Dest->getLo();
+ Variable *DestHi = Dest->getHi();
+ _mov(DestLo, ReturnReg);
+ _mov(DestHi, ReturnRegHi);
+ } else {
+ assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
+ Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
+ isVectorType(Dest->getType()));
+ if (isVectorType(Dest->getType())) {
+ _movp(Dest, ReturnReg);
+ } else {
+ _mov(Dest, ReturnReg);
+ }
+ }
+ } else if (isScalarFloatingType(Dest->getType())) {
+ // Special treatment for an FP function which returns its result in
+ // st(0).
+ // If Dest ends up being a physical xmm register, the fstp emit code
+ // will route st(0) through a temporary stack slot.
+ _fstp(Dest);
+ // Create a fake use of Dest in case it actually isn't used,
+ // because st(0) still needs to be popped.
+ Context.insert(InstFakeUse::create(Func, Dest));
+ }
+}
+
} // end of namespace Ice
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index f49e673..d086135 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -41,6 +41,9 @@
static TargetX8632 *create(Cfg *Func) { return new TargetX8632(Func); }
+protected:
+ void lowerCall(const InstCall *Instr) override;
+
private:
friend class ::Ice::X86Internal::TargetX86Base<TargetX8632>;
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 1fcf0b9..f5d4ead 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -21,6 +21,14 @@
namespace Ice {
+//------------------------------------------------------------------------------
+// ______ ______ ______ __ ______ ______
+// /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\
+// \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \
+// \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\
+// \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/
+//
+//------------------------------------------------------------------------------
namespace X86Internal {
const MachineTraits<TargetX8664>::TableFcmpType
MachineTraits<TargetX8664>::TableFcmp[] = {
@@ -81,6 +89,286 @@
} // end of namespace X86Internal
+//------------------------------------------------------------------------------
+// __ ______ __ __ ______ ______ __ __ __ ______
+// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
+// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
+// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
+// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
+//
+//------------------------------------------------------------------------------
+namespace {
+static inline TargetX8664::Traits::RegisterSet::AllRegisters
+getRegisterForXmmArgNum(uint32_t ArgNum) {
+ assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS);
+ return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(
+ TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum);
+}
+
+static inline TargetX8664::Traits::RegisterSet::AllRegisters
+getRegisterForGprArgNum(uint32_t ArgNum) {
+ assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS);
+ static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = {
+ TargetX8664::Traits::RegisterSet::Reg_edi,
+ TargetX8664::Traits::RegisterSet::Reg_esi,
+ TargetX8664::Traits::RegisterSet::Reg_edx,
+ TargetX8664::Traits::RegisterSet::Reg_ecx,
+ TargetX8664::Traits::RegisterSet::Reg_r8d,
+ TargetX8664::Traits::RegisterSet::Reg_r9d,
+ };
+ static_assert(llvm::array_lengthof(GprForArgNum) ==
+ TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS,
+ "Mismatch between MAX_GPR_ARGS and GprForArgNum.");
+ return GprForArgNum[ArgNum];
+}
+
+// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
+// OperandList in lowerCall. std::max() was supposed to work, but it doesn't.
+constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
+
+} // end of anonymous namespace
+
+void TargetX8664::lowerCall(const InstCall *Instr) {
+ // x86-64 calling convention:
+ //
+ // * At the point before the call, the stack must be aligned to 16
+ // bytes.
+ //
+ // * The first eight arguments of vector/fp type, regardless of their
+ // position relative to the other arguments in the argument list, are
+ // placed in registers %xmm0 - %xmm7.
+ //
+ // * The first six arguments of integer types, regardless of their
+ // position relative to the other arguments in the argument list, are
+ // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
+ //
+ // * Other arguments are pushed onto the stack in right-to-left order,
+ // such that the left-most argument ends up on the top of the stack at
+ // the lowest memory address.
+ //
+ // * Stack arguments of vector type are aligned to start at the next
+ // highest multiple of 16 bytes. Other stack arguments are aligned to
+ // 8 bytes.
+ //
+ // This intends to match the section "Function Calling Sequence" of the
+ // document "System V Application Binary Interface."
+ NeedsStackAlignment = true;
+
+ using OperandList =
+ llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
+ Traits::X86_MAX_GPR_ARGS)>;
+ OperandList XmmArgs;
+ OperandList GprArgs;
+ OperandList StackArgs, StackArgLocations;
+ uint32_t ParameterAreaSizeBytes = 0;
+
+ // Classify each argument operand according to the location where the
+ // argument is passed.
+ for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
+ Operand *Arg = Instr->getArg(i);
+ Type Ty = Arg->getType();
+ // The PNaCl ABI requires the width of arguments to be at least 32 bits.
+ assert(typeWidthInBytes(Ty) >= 4);
+ if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
+ XmmArgs.push_back(Arg);
+ } else if (isScalarFloatingType(Ty) &&
+ XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
+ XmmArgs.push_back(Arg);
+ } else if (isScalarIntegerType(Ty) &&
+ GprArgs.size() < Traits::X86_MAX_GPR_ARGS) {
+ GprArgs.push_back(Arg);
+ } else {
+ StackArgs.push_back(Arg);
+ if (isVectorType(Arg->getType())) {
+ ParameterAreaSizeBytes =
+ Traits::applyStackAlignment(ParameterAreaSizeBytes);
+ }
+ Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+ Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
+ StackArgLocations.push_back(
+ Traits::X86OperandMem::create(Func, Ty, esp, Loc));
+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
+ }
+ }
+
+ // Adjust the parameter area so that the stack is aligned. It is
+ // assumed that the stack is already aligned at the start of the
+ // calling sequence.
+ ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
+
+ // Subtract the appropriate amount for the argument area. This also
+ // takes care of setting the stack adjustment during emission.
+ //
+ // TODO: If for some reason the call instruction gets dead-code
+ // eliminated after lowering, we would need to ensure that the
+ // pre-call and the post-call esp adjustment get eliminated as well.
+ if (ParameterAreaSizeBytes) {
+ _adjust_stack(ParameterAreaSizeBytes);
+ }
+
+ // Copy arguments that are passed on the stack to the appropriate
+ // stack locations.
+ for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
+ lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
+ }
+
+ // Copy arguments to be passed in registers to the appropriate
+ // registers.
+ // TODO: Investigate the impact of lowering arguments passed in
+ // registers after lowering stack arguments as opposed to the other
+ // way around. Lowering register arguments after stack arguments may
+ // reduce register pressure. On the other hand, lowering register
+ // arguments first (before stack arguments) may result in more compact
+ // code, as the memory operand displacements may end up being smaller
+ // before any stack adjustment is done.
+ for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
+ Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));
+ // Generate a FakeUse of register arguments so that they do not get
+ // dead code eliminated as a result of the FakeKill of scratch
+ // registers after the call.
+ Context.insert(InstFakeUse::create(Func, Reg));
+ }
+
+ for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
+ Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i));
+ Context.insert(InstFakeUse::create(Func, Reg));
+ }
+
+ // Generate the call instruction. Assign its result to a temporary
+ // with high register allocation weight.
+ Variable *Dest = Instr->getDest();
+ // ReturnReg doubles as ReturnRegLo as necessary.
+ Variable *ReturnReg = nullptr;
+ Variable *ReturnRegHi = nullptr;
+ if (Dest) {
+ switch (Dest->getType()) {
+ case IceType_NUM:
+ llvm_unreachable("Invalid Call dest type");
+ break;
+ case IceType_void:
+ break;
+ case IceType_i1:
+ case IceType_i8:
+ case IceType_i16:
+ case IceType_i32:
+ ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
+ break;
+ case IceType_i64:
+ // TODO(jpp): return i64 in a GPR.
+ ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+ ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+ break;
+ case IceType_f32:
+ case IceType_f64:
+ case IceType_v4i1:
+ case IceType_v8i1:
+ case IceType_v16i1:
+ case IceType_v16i8:
+ case IceType_v8i16:
+ case IceType_v4i32:
+ case IceType_v4f32:
+ ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
+ break;
+ }
+ }
+
+ Operand *CallTarget = legalize(Instr->getCallTarget());
+ const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
+ if (NeedSandboxing) {
+ if (llvm::isa<Constant>(CallTarget)) {
+ _bundle_lock(InstBundleLock::Opt_AlignToEnd);
+ } else {
+ Variable *CallTargetVar = nullptr;
+ _mov(CallTargetVar, CallTarget);
+ _bundle_lock(InstBundleLock::Opt_AlignToEnd);
+ const SizeT BundleSize =
+ 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
+ _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
+ CallTarget = CallTargetVar;
+ }
+ }
+ Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
+ Context.insert(NewCall);
+ if (NeedSandboxing)
+ _bundle_unlock();
+ if (ReturnRegHi)
+ Context.insert(InstFakeDef::create(Func, ReturnRegHi));
+
+ // Add the appropriate offset to esp. The call instruction takes care
+ // of resetting the stack offset during emission.
+ if (ParameterAreaSizeBytes) {
+ Variable *Esp =
+ Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
+ _add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
+ }
+
+ // Insert a register-kill pseudo instruction.
+ Context.insert(InstFakeKill::create(Func, NewCall));
+
+ // Generate a FakeUse to keep the call live if necessary.
+ if (Instr->hasSideEffects() && ReturnReg) {
+ Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
+ Context.insert(FakeUse);
+ }
+
+ if (!Dest)
+ return;
+
+ assert(ReturnReg && "x86-64 always returns value on registers.");
+
+ // Assign the result of the call to Dest.
+ if (ReturnRegHi) {
+ assert(Dest->getType() == IceType_i64);
+ split64(Dest);
+ Variable *DestLo = Dest->getLo();
+ Variable *DestHi = Dest->getHi();
+ _mov(DestLo, ReturnReg);
+ _mov(DestHi, ReturnRegHi);
+ return;
+ }
+
+ assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 ||
+ Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
+ Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
+ isVectorType(Dest->getType()));
+
+ if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
+ _movp(Dest, ReturnReg);
+ } else {
+ _mov(Dest, ReturnReg);
+ }
+}
+
+void TargetDataX8664::lowerJumpTables() {
+ switch (Ctx->getFlags().getOutFileType()) {
+ case FT_Elf: {
+ ELFObjectWriter *Writer = Ctx->getObjectWriter();
+ for (const JumpTableData &JumpTable : Ctx->getJumpTables())
+ // TODO(jpp): not 386.
+ Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
+ } break;
+ case FT_Asm:
+ // Already emitted from Cfg
+ break;
+ case FT_Iasm: {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Ctx->getStrEmit();
+ for (const JumpTableData &JT : Ctx->getJumpTables()) {
+ Str << "\t.section\t.rodata." << JT.getFunctionName()
+ << "$jumptable,\"a\",@progbits\n";
+ Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
+ Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
+
+ // On X8664 ILP32 pointers are 32-bit hence the use of .long
+ for (intptr_t TargetOffset : JT.getTargetOffsets())
+ Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
+ Str << "\n";
+ }
+ } break;
+ }
+}
+
namespace {
template <typename T> struct PoolTypeConverter {};
@@ -236,36 +524,6 @@
Str << "\n";
}
-void TargetDataX8664::lowerJumpTables() {
- switch (Ctx->getFlags().getOutFileType()) {
- case FT_Elf: {
- ELFObjectWriter *Writer = Ctx->getObjectWriter();
- for (const JumpTableData &JT : Ctx->getJumpTables())
- // TODO(jpp): not 386.
- Writer->writeJumpTable(JT, llvm::ELF::R_386_32);
- } break;
- case FT_Asm:
- // Already emitted from Cfg
- break;
- case FT_Iasm: {
- if (!BuildDefs::dump())
- return;
- Ostream &Str = Ctx->getStrEmit();
- for (const JumpTableData &JT : Ctx->getJumpTables()) {
- Str << "\t.section\t.rodata." << JT.getFunctionName()
- << "$jumptable,\"a\",@progbits\n";
- Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
- Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
-
- // On X8664 ILP32 pointers are 32-bit hence the use of .long
- for (intptr_t TargetOffset : JT.getTargetOffsets())
- Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
- Str << "\n";
- }
- } break;
- }
-}
-
void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,
const IceString &SectionSuffix) {
switch (Ctx->getFlags().getOutFileType()) {
diff --git a/src/IceTargetLoweringX8664.h b/src/IceTargetLoweringX8664.h
index 8c4329d..1e012b5 100644
--- a/src/IceTargetLoweringX8664.h
+++ b/src/IceTargetLoweringX8664.h
@@ -38,6 +38,9 @@
public:
static TargetX8664 *create(Cfg *Func) { return new TargetX8664(Func); }
+protected:
+ void lowerCall(const InstCall *Instr) override;
+
private:
friend class ::Ice::X86Internal::TargetX86Base<TargetX8664>;
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h
index cc22171..89fc203 100644
--- a/src/IceTargetLoweringX8664Traits.h
+++ b/src/IceTargetLoweringX8664Traits.h
@@ -439,7 +439,9 @@
}
/// The maximum number of arguments to pass in XMM registers
- static const uint32_t X86_MAX_XMM_ARGS = 4;
+ static const uint32_t X86_MAX_XMM_ARGS = 8;
+ /// The maximum number of arguments to pass in GPR registers
+ static const uint32_t X86_MAX_GPR_ARGS = 6;
/// The number of bits in a byte
static const uint32_t X86_CHAR_BIT = 8;
/// Stack alignment. This is defined in IceTargetLoweringX8664.cpp because it
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index d89d747..1e33a96 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -83,6 +83,7 @@
size_t typeWidthInBytesOnStack(Type Ty) const override {
// Round up to the next multiple of 4 bytes. In particular, i1,
// i8, and i16 are rounded up to 4 bytes.
+ // TODO(jpp): this needs to round to multiples of 8 bytes in x86-64.
return (typeWidthInBytes(Ty) + 3) & ~3;
}
@@ -127,7 +128,6 @@
void lowerArithmetic(const InstArithmetic *Inst) override;
void lowerAssign(const InstAssign *Inst) override;
void lowerBr(const InstBr *Inst) override;
- void lowerCall(const InstCall *Inst) override;
void lowerCast(const InstCast *Inst) override;
void lowerExtractElement(const InstExtractElement *Inst) override;
void lowerFcmp(const InstFcmp *Inst) override;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 3ec094e..245861c 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -2126,209 +2126,6 @@
}
template <class Machine>
-void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) {
- // x86-32 calling convention:
- //
- // * At the point before the call, the stack must be aligned to 16
- // bytes.
- //
- // * The first four arguments of vector type, regardless of their
- // position relative to the other arguments in the argument list, are
- // placed in registers xmm0 - xmm3.
- //
- // * Other arguments are pushed onto the stack in right-to-left order,
- // such that the left-most argument ends up on the top of the stack at
- // the lowest memory address.
- //
- // * Stack arguments of vector type are aligned to start at the next
- // highest multiple of 16 bytes. Other stack arguments are aligned to
- // 4 bytes.
- //
- // This intends to match the section "IA-32 Function Calling
- // Convention" of the document "OS X ABI Function Call Guide" by
- // Apple.
- NeedsStackAlignment = true;
-
- typedef std::vector<Operand *> OperandList;
- OperandList XmmArgs;
- OperandList StackArgs, StackArgLocations;
- uint32_t ParameterAreaSizeBytes = 0;
-
- // Classify each argument operand according to the location where the
- // argument is passed.
- for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
- Operand *Arg = Instr->getArg(i);
- Type Ty = Arg->getType();
- // The PNaCl ABI requires the width of arguments to be at least 32 bits.
- assert(typeWidthInBytes(Ty) >= 4);
- if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
- XmmArgs.push_back(Arg);
- } else {
- StackArgs.push_back(Arg);
- if (isVectorType(Arg->getType())) {
- ParameterAreaSizeBytes =
- Traits::applyStackAlignment(ParameterAreaSizeBytes);
- }
- Variable *esp =
- Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
- Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
- StackArgLocations.push_back(
- Traits::X86OperandMem::create(Func, Ty, esp, Loc));
- ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
- }
- }
-
- // Adjust the parameter area so that the stack is aligned. It is
- // assumed that the stack is already aligned at the start of the
- // calling sequence.
- ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
-
- // Subtract the appropriate amount for the argument area. This also
- // takes care of setting the stack adjustment during emission.
- //
- // TODO: If for some reason the call instruction gets dead-code
- // eliminated after lowering, we would need to ensure that the
- // pre-call and the post-call esp adjustment get eliminated as well.
- if (ParameterAreaSizeBytes) {
- _adjust_stack(ParameterAreaSizeBytes);
- }
-
- // Copy arguments that are passed on the stack to the appropriate
- // stack locations.
- for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
- lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
- }
-
- // Copy arguments to be passed in registers to the appropriate
- // registers.
- // TODO: Investigate the impact of lowering arguments passed in
- // registers after lowering stack arguments as opposed to the other
- // way around. Lowering register arguments after stack arguments may
- // reduce register pressure. On the other hand, lowering register
- // arguments first (before stack arguments) may result in more compact
- // code, as the memory operand displacements may end up being smaller
- // before any stack adjustment is done.
- for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
- Variable *Reg =
- legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
- // Generate a FakeUse of register arguments so that they do not get
- // dead code eliminated as a result of the FakeKill of scratch
- // registers after the call.
- Context.insert(InstFakeUse::create(Func, Reg));
- }
- // Generate the call instruction. Assign its result to a temporary
- // with high register allocation weight.
- Variable *Dest = Instr->getDest();
- // ReturnReg doubles as ReturnRegLo as necessary.
- Variable *ReturnReg = nullptr;
- Variable *ReturnRegHi = nullptr;
- if (Dest) {
- switch (Dest->getType()) {
- case IceType_NUM:
- llvm_unreachable("Invalid Call dest type");
- break;
- case IceType_void:
- break;
- case IceType_i1:
- case IceType_i8:
- case IceType_i16:
- case IceType_i32:
- ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
- break;
- case IceType_i64:
- ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
- ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
- break;
- case IceType_f32:
- case IceType_f64:
- // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
- // the fstp instruction.
- break;
- case IceType_v4i1:
- case IceType_v8i1:
- case IceType_v16i1:
- case IceType_v16i8:
- case IceType_v8i16:
- case IceType_v4i32:
- case IceType_v4f32:
- ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
- break;
- }
- }
- Operand *CallTarget = legalize(Instr->getCallTarget());
- const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
- if (NeedSandboxing) {
- if (llvm::isa<Constant>(CallTarget)) {
- _bundle_lock(InstBundleLock::Opt_AlignToEnd);
- } else {
- Variable *CallTargetVar = nullptr;
- _mov(CallTargetVar, CallTarget);
- _bundle_lock(InstBundleLock::Opt_AlignToEnd);
- const SizeT BundleSize =
- 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
- _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
- CallTarget = CallTargetVar;
- }
- }
- Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
- Context.insert(NewCall);
- if (NeedSandboxing)
- _bundle_unlock();
- if (ReturnRegHi)
- Context.insert(InstFakeDef::create(Func, ReturnRegHi));
-
- // Add the appropriate offset to esp. The call instruction takes care
- // of resetting the stack offset during emission.
- if (ParameterAreaSizeBytes) {
- Variable *esp =
- Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
- _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
- }
-
- // Insert a register-kill pseudo instruction.
- Context.insert(InstFakeKill::create(Func, NewCall));
-
- // Generate a FakeUse to keep the call live if necessary.
- if (Instr->hasSideEffects() && ReturnReg) {
- Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
- Context.insert(FakeUse);
- }
-
- if (!Dest)
- return;
-
- // Assign the result of the call to Dest.
- if (ReturnReg) {
- if (ReturnRegHi) {
- assert(Dest->getType() == IceType_i64);
- split64(Dest);
- Variable *DestLo = Dest->getLo();
- Variable *DestHi = Dest->getHi();
- _mov(DestLo, ReturnReg);
- _mov(DestHi, ReturnRegHi);
- } else {
- assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
- Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
- isVectorType(Dest->getType()));
- if (isVectorType(Dest->getType())) {
- _movp(Dest, ReturnReg);
- } else {
- _mov(Dest, ReturnReg);
- }
- }
- } else if (isScalarFloatingType(Dest->getType())) {
- // Special treatment for an FP function which returns its result in
- // st(0).
- // If Dest ends up being a physical xmm register, the fstp emit code
- // will route st(0) through a temporary stack slot.
- _fstp(Dest);
- // Create a fake use of Dest in case it actually isn't used,
- // because st(0) still needs to be popped.
- Context.insert(InstFakeUse::create(Func, Dest));
- }
-}
-
-template <class Machine>
void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
// a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
InstCast::OpKind CastKind = Inst->getCastKind();