Subzero. Implements x86-64 lowerCall. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077 R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1266673003.
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp index 1bcb857..0a426c7 100644 --- a/src/IceGlobalContext.cpp +++ b/src/IceGlobalContext.cpp
@@ -881,20 +881,19 @@ JumpTableDataList GlobalContext::getJumpTables() { JumpTableDataList JumpTables(*getJumpTableList()); if (getFlags().shouldReorderPooledConstants()) { - // If reorder-pooled-constants option is set to true, we need to shuffle the - // constant pool before emitting it. - RandomShuffle(JumpTables.begin(), JumpTables.end(), [this](uint64_t N) { - return (uint32_t)getRNG().next(N); - }); + // If reorder-pooled-constants option is set to true, we need to shuffle the + // constant pool before emitting it. + RandomShuffle(JumpTables.begin(), JumpTables.end(), + [this](uint64_t N) { return (uint32_t)getRNG().next(N); }); } else { // Make order deterministic by sorting into functions and then ID of the // jump table within that function. - std::sort(JumpTables.begin(), JumpTables.end(), [](const JumpTableData &A, - const JumpTableData &B) { - if (A.getFunctionName() != B.getFunctionName()) - return A.getFunctionName() < B.getFunctionName(); - return A.getId() < B.getId(); - }); + std::sort(JumpTables.begin(), JumpTables.end(), + [](const JumpTableData &A, const JumpTableData &B) { + if (A.getFunctionName() != B.getFunctionName()) + return A.getFunctionName() < B.getFunctionName(); + return A.getId() < B.getId(); + }); } return JumpTables; }
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h index 44939dd..d765660 100644 --- a/src/IceInstX8632.h +++ b/src/IceInstX8632.h
@@ -20,9 +20,6 @@ /// /// ::Ice::X8632::Traits::Insts::Mov::create /// -/// In the future, this file might be used to declare X8632 specific -/// instructions (e.g., FLD, and FSTP.) -/// //===----------------------------------------------------------------------===// #ifndef SUBZERO_SRC_ICEINSTX8632_H
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp index a42e122..8adfab2 100644 --- a/src/IceTargetLoweringX8632.cpp +++ b/src/IceTargetLoweringX8632.cpp
@@ -21,6 +21,14 @@ namespace Ice { +//------------------------------------------------------------------------------ +// ______ ______ ______ __ ______ ______ +// /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\ +// \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \ +// \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\ +// \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/ +// +//------------------------------------------------------------------------------ namespace X86Internal { const MachineTraits<TargetX8632>::TableFcmpType MachineTraits<TargetX8632>::TableFcmp[] = { @@ -399,4 +407,214 @@ } // end of namespace dummy3 } // end of anonymous namespace +//------------------------------------------------------------------------------ +// __ ______ __ __ ______ ______ __ __ __ ______ +// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\ +// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \ +// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\ +// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/ +// +//------------------------------------------------------------------------------ +void TargetX8632::lowerCall(const InstCall *Instr) { + // x86-32 calling convention: + // + // * At the point before the call, the stack must be aligned to 16 + // bytes. + // + // * The first four arguments of vector type, regardless of their + // position relative to the other arguments in the argument list, are + // placed in registers xmm0 - xmm3. + // + // * Other arguments are pushed onto the stack in right-to-left order, + // such that the left-most argument ends up on the top of the stack at + // the lowest memory address. + // + // * Stack arguments of vector type are aligned to start at the next + // highest multiple of 16 bytes. Other stack arguments are aligned to + // 4 bytes. + // + // This intends to match the section "IA-32 Function Calling + // Convention" of the document "OS X ABI Function Call Guide" by + // Apple. + NeedsStackAlignment = true; + + typedef std::vector<Operand *> OperandList; + OperandList XmmArgs; + OperandList StackArgs, StackArgLocations; + uint32_t ParameterAreaSizeBytes = 0; + + // Classify each argument operand according to the location where the + // argument is passed. + for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { + Operand *Arg = Instr->getArg(i); + Type Ty = Arg->getType(); + // The PNaCl ABI requires the width of arguments to be at least 32 bits. + assert(typeWidthInBytes(Ty) >= 4); + if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { + XmmArgs.push_back(Arg); + } else { + StackArgs.push_back(Arg); + if (isVectorType(Arg->getType())) { + ParameterAreaSizeBytes = + Traits::applyStackAlignment(ParameterAreaSizeBytes); + } + Variable *esp = + Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); + Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); + StackArgLocations.push_back( + Traits::X86OperandMem::create(Func, Ty, esp, Loc)); + ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); + } + } + + // Adjust the parameter area so that the stack is aligned. It is + // assumed that the stack is already aligned at the start of the + // calling sequence. + ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); + + // Subtract the appropriate amount for the argument area. This also + // takes care of setting the stack adjustment during emission. + // + // TODO: If for some reason the call instruction gets dead-code + // eliminated after lowering, we would need to ensure that the + // pre-call and the post-call esp adjustment get eliminated as well. + if (ParameterAreaSizeBytes) { + _adjust_stack(ParameterAreaSizeBytes); + } + + // Copy arguments that are passed on the stack to the appropriate + // stack locations. + for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { + lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); + } + + // Copy arguments to be passed in registers to the appropriate + // registers. + // TODO: Investigate the impact of lowering arguments passed in + // registers after lowering stack arguments as opposed to the other + // way around. Lowering register arguments after stack arguments may + // reduce register pressure. On the other hand, lowering register + // arguments first (before stack arguments) may result in more compact + // code, as the memory operand displacements may end up being smaller + // before any stack adjustment is done. + for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { + Variable *Reg = + legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); + // Generate a FakeUse of register arguments so that they do not get + // dead code eliminated as a result of the FakeKill of scratch + // registers after the call. + Context.insert(InstFakeUse::create(Func, Reg)); + } + // Generate the call instruction. Assign its result to a temporary + // with high register allocation weight. + Variable *Dest = Instr->getDest(); + // ReturnReg doubles as ReturnRegLo as necessary. + Variable *ReturnReg = nullptr; + Variable *ReturnRegHi = nullptr; + if (Dest) { + switch (Dest->getType()) { + case IceType_NUM: + llvm_unreachable("Invalid Call dest type"); + break; + case IceType_void: + break; + case IceType_i1: + case IceType_i8: + case IceType_i16: + case IceType_i32: + ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); + break; + case IceType_i64: + ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); + ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); + break; + case IceType_f32: + case IceType_f64: + // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with + // the fstp instruction. + break; + case IceType_v4i1: + case IceType_v8i1: + case IceType_v16i1: + case IceType_v16i8: + case IceType_v8i16: + case IceType_v4i32: + case IceType_v4f32: + ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); + break; + } + } + Operand *CallTarget = legalize(Instr->getCallTarget()); + const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); + if (NeedSandboxing) { + if (llvm::isa<Constant>(CallTarget)) { + _bundle_lock(InstBundleLock::Opt_AlignToEnd); + } else { + Variable *CallTargetVar = nullptr; + _mov(CallTargetVar, CallTarget); + _bundle_lock(InstBundleLock::Opt_AlignToEnd); + const SizeT BundleSize = + 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); + _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); + CallTarget = CallTargetVar; + } + } + Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); + Context.insert(NewCall); + if (NeedSandboxing) + _bundle_unlock(); + if (ReturnRegHi) + Context.insert(InstFakeDef::create(Func, ReturnRegHi)); + + // Add the appropriate offset to esp. The call instruction takes care + // of resetting the stack offset during emission. + if (ParameterAreaSizeBytes) { + Variable *esp = + Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); + _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); + } + + // Insert a register-kill pseudo instruction. + Context.insert(InstFakeKill::create(Func, NewCall)); + + // Generate a FakeUse to keep the call live if necessary. + if (Instr->hasSideEffects() && ReturnReg) { + Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); + Context.insert(FakeUse); + } + + if (!Dest) + return; + + // Assign the result of the call to Dest. + if (ReturnReg) { + if (ReturnRegHi) { + assert(Dest->getType() == IceType_i64); + split64(Dest); + Variable *DestLo = Dest->getLo(); + Variable *DestHi = Dest->getHi(); + _mov(DestLo, ReturnReg); + _mov(DestHi, ReturnRegHi); + } else { + assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || + Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || + isVectorType(Dest->getType())); + if (isVectorType(Dest->getType())) { + _movp(Dest, ReturnReg); + } else { + _mov(Dest, ReturnReg); + } + } + } else if (isScalarFloatingType(Dest->getType())) { + // Special treatment for an FP function which returns its result in + // st(0). + // If Dest ends up being a physical xmm register, the fstp emit code + // will route st(0) through a temporary stack slot. + _fstp(Dest); + // Create a fake use of Dest in case it actually isn't used, + // because st(0) still needs to be popped. + Context.insert(InstFakeUse::create(Func, Dest)); + } +} + } // end of namespace Ice
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h index f49e673..d086135 100644 --- a/src/IceTargetLoweringX8632.h +++ b/src/IceTargetLoweringX8632.h
@@ -41,6 +41,9 @@ static TargetX8632 *create(Cfg *Func) { return new TargetX8632(Func); } +protected: + void lowerCall(const InstCall *Instr) override; + private: friend class ::Ice::X86Internal::TargetX86Base<TargetX8632>;
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp index 1fcf0b9..f5d4ead 100644 --- a/src/IceTargetLoweringX8664.cpp +++ b/src/IceTargetLoweringX8664.cpp
@@ -21,6 +21,14 @@ namespace Ice { +//------------------------------------------------------------------------------ +// ______ ______ ______ __ ______ ______ +// /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\ +// \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \ +// \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\ +// \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/ +// +//------------------------------------------------------------------------------ namespace X86Internal { const MachineTraits<TargetX8664>::TableFcmpType MachineTraits<TargetX8664>::TableFcmp[] = { @@ -81,6 +89,286 @@ } // end of namespace X86Internal +//------------------------------------------------------------------------------ +// __ ______ __ __ ______ ______ __ __ __ ______ +// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\ +// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \ +// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\ +// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/ +// +//------------------------------------------------------------------------------ +namespace { +static inline TargetX8664::Traits::RegisterSet::AllRegisters +getRegisterForXmmArgNum(uint32_t ArgNum) { + assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS); + return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>( + TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum); +} + +static inline TargetX8664::Traits::RegisterSet::AllRegisters +getRegisterForGprArgNum(uint32_t ArgNum) { + assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS); + static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = { + TargetX8664::Traits::RegisterSet::Reg_edi, + TargetX8664::Traits::RegisterSet::Reg_esi, + TargetX8664::Traits::RegisterSet::Reg_edx, + TargetX8664::Traits::RegisterSet::Reg_ecx, + TargetX8664::Traits::RegisterSet::Reg_r8d, + TargetX8664::Traits::RegisterSet::Reg_r9d, + }; + static_assert(llvm::array_lengthof(GprForArgNum) == + TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS, + "Mismatch between MAX_GPR_ARGS and GprForArgNum."); + return GprForArgNum[ArgNum]; +} + +// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining +// OperandList in lowerCall. std::max() was supposed to work, but it doesn't. +constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; } + +} // end of anonymous namespace + +void TargetX8664::lowerCall(const InstCall *Instr) { + // x86-64 calling convention: + // + // * At the point before the call, the stack must be aligned to 16 + // bytes. + // + // * The first eight arguments of vector/fp type, regardless of their + // position relative to the other arguments in the argument list, are + // placed in registers %xmm0 - %xmm7. + // + // * The first six arguments of integer types, regardless of their + // position relative to the other arguments in the argument list, are + // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9. + // + // * Other arguments are pushed onto the stack in right-to-left order, + // such that the left-most argument ends up on the top of the stack at + // the lowest memory address. + // + // * Stack arguments of vector type are aligned to start at the next + // highest multiple of 16 bytes. Other stack arguments are aligned to + // 8 bytes. + // + // This intends to match the section "Function Calling Sequence" of the + // document "System V Application Binary Interface." + NeedsStackAlignment = true; + + using OperandList = + llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS, + Traits::X86_MAX_GPR_ARGS)>; + OperandList XmmArgs; + OperandList GprArgs; + OperandList StackArgs, StackArgLocations; + uint32_t ParameterAreaSizeBytes = 0; + + // Classify each argument operand according to the location where the + // argument is passed. + for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { + Operand *Arg = Instr->getArg(i); + Type Ty = Arg->getType(); + // The PNaCl ABI requires the width of arguments to be at least 32 bits. + assert(typeWidthInBytes(Ty) >= 4); + if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { + XmmArgs.push_back(Arg); + } else if (isScalarFloatingType(Ty) && + XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { + XmmArgs.push_back(Arg); + } else if (isScalarIntegerType(Ty) && + GprArgs.size() < Traits::X86_MAX_GPR_ARGS) { + GprArgs.push_back(Arg); + } else { + StackArgs.push_back(Arg); + if (isVectorType(Arg->getType())) { + ParameterAreaSizeBytes = + Traits::applyStackAlignment(ParameterAreaSizeBytes); + } + Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); + Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); + StackArgLocations.push_back( + Traits::X86OperandMem::create(Func, Ty, esp, Loc)); + ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); + } + } + + // Adjust the parameter area so that the stack is aligned. It is + // assumed that the stack is already aligned at the start of the + // calling sequence. + ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); + + // Subtract the appropriate amount for the argument area. This also + // takes care of setting the stack adjustment during emission. + // + // TODO: If for some reason the call instruction gets dead-code + // eliminated after lowering, we would need to ensure that the + // pre-call and the post-call esp adjustment get eliminated as well. + if (ParameterAreaSizeBytes) { + _adjust_stack(ParameterAreaSizeBytes); + } + + // Copy arguments that are passed on the stack to the appropriate + // stack locations. + for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { + lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); + } + + // Copy arguments to be passed in registers to the appropriate + // registers. + // TODO: Investigate the impact of lowering arguments passed in + // registers after lowering stack arguments as opposed to the other + // way around. Lowering register arguments after stack arguments may + // reduce register pressure. On the other hand, lowering register + // arguments first (before stack arguments) may result in more compact + // code, as the memory operand displacements may end up being smaller + // before any stack adjustment is done. + for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { + Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i)); + // Generate a FakeUse of register arguments so that they do not get + // dead code eliminated as a result of the FakeKill of scratch + // registers after the call. + Context.insert(InstFakeUse::create(Func, Reg)); + } + + for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { + Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i)); + Context.insert(InstFakeUse::create(Func, Reg)); + } + + // Generate the call instruction. Assign its result to a temporary + // with high register allocation weight. + Variable *Dest = Instr->getDest(); + // ReturnReg doubles as ReturnRegLo as necessary. + Variable *ReturnReg = nullptr; + Variable *ReturnRegHi = nullptr; + if (Dest) { + switch (Dest->getType()) { + case IceType_NUM: + llvm_unreachable("Invalid Call dest type"); + break; + case IceType_void: + break; + case IceType_i1: + case IceType_i8: + case IceType_i16: + case IceType_i32: + ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); + break; + case IceType_i64: + // TODO(jpp): return i64 in a GPR. + ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); + ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); + break; + case IceType_f32: + case IceType_f64: + case IceType_v4i1: + case IceType_v8i1: + case IceType_v16i1: + case IceType_v16i8: + case IceType_v8i16: + case IceType_v4i32: + case IceType_v4f32: + ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); + break; + } + } + + Operand *CallTarget = legalize(Instr->getCallTarget()); + const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); + if (NeedSandboxing) { + if (llvm::isa<Constant>(CallTarget)) { + _bundle_lock(InstBundleLock::Opt_AlignToEnd); + } else { + Variable *CallTargetVar = nullptr; + _mov(CallTargetVar, CallTarget); + _bundle_lock(InstBundleLock::Opt_AlignToEnd); + const SizeT BundleSize = + 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); + _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); + CallTarget = CallTargetVar; + } + } + Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); + Context.insert(NewCall); + if (NeedSandboxing) + _bundle_unlock(); + if (ReturnRegHi) + Context.insert(InstFakeDef::create(Func, ReturnRegHi)); + + // Add the appropriate offset to esp. The call instruction takes care + // of resetting the stack offset during emission. + if (ParameterAreaSizeBytes) { + Variable *Esp = + Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); + _add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); + } + + // Insert a register-kill pseudo instruction. + Context.insert(InstFakeKill::create(Func, NewCall)); + + // Generate a FakeUse to keep the call live if necessary. + if (Instr->hasSideEffects() && ReturnReg) { + Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); + Context.insert(FakeUse); + } + + if (!Dest) + return; + + assert(ReturnReg && "x86-64 always returns value on registers."); + + // Assign the result of the call to Dest. + if (ReturnRegHi) { + assert(Dest->getType() == IceType_i64); + split64(Dest); + Variable *DestLo = Dest->getLo(); + Variable *DestHi = Dest->getHi(); + _mov(DestLo, ReturnReg); + _mov(DestHi, ReturnRegHi); + return; + } + + assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 || + Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || + Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || + isVectorType(Dest->getType())); + + if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { + _movp(Dest, ReturnReg); + } else { + _mov(Dest, ReturnReg); + } +} + +void TargetDataX8664::lowerJumpTables() { + switch (Ctx->getFlags().getOutFileType()) { + case FT_Elf: { + ELFObjectWriter *Writer = Ctx->getObjectWriter(); + for (const JumpTableData &JumpTable : Ctx->getJumpTables()) + // TODO(jpp): not 386. + Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32); + } break; + case FT_Asm: + // Already emitted from Cfg + break; + case FT_Iasm: { + if (!BuildDefs::dump()) + return; + Ostream &Str = Ctx->getStrEmit(); + for (const JumpTableData &JT : Ctx->getJumpTables()) { + Str << "\t.section\t.rodata." << JT.getFunctionName() + << "$jumptable,\"a\",@progbits\n"; + Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; + Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":"; + + // On X8664 ILP32 pointers are 32-bit hence the use of .long + for (intptr_t TargetOffset : JT.getTargetOffsets()) + Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset; + Str << "\n"; + } + } break; + } +} + namespace { template <typename T> struct PoolTypeConverter {}; @@ -236,36 +524,6 @@ Str << "\n"; } -void TargetDataX8664::lowerJumpTables() { - switch (Ctx->getFlags().getOutFileType()) { - case FT_Elf: { - ELFObjectWriter *Writer = Ctx->getObjectWriter(); - for (const JumpTableData &JT : Ctx->getJumpTables()) - // TODO(jpp): not 386. - Writer->writeJumpTable(JT, llvm::ELF::R_386_32); - } break; - case FT_Asm: - // Already emitted from Cfg - break; - case FT_Iasm: { - if (!BuildDefs::dump()) - return; - Ostream &Str = Ctx->getStrEmit(); - for (const JumpTableData &JT : Ctx->getJumpTables()) { - Str << "\t.section\t.rodata." << JT.getFunctionName() - << "$jumptable,\"a\",@progbits\n"; - Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; - Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":"; - - // On X8664 ILP32 pointers are 32-bit hence the use of .long - for (intptr_t TargetOffset : JT.getTargetOffsets()) - Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset; - Str << "\n"; - } - } break; - } -} - void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars, const IceString &SectionSuffix) { switch (Ctx->getFlags().getOutFileType()) {
diff --git a/src/IceTargetLoweringX8664.h b/src/IceTargetLoweringX8664.h index 8c4329d..1e012b5 100644 --- a/src/IceTargetLoweringX8664.h +++ b/src/IceTargetLoweringX8664.h
@@ -38,6 +38,9 @@ public: static TargetX8664 *create(Cfg *Func) { return new TargetX8664(Func); } +protected: + void lowerCall(const InstCall *Instr) override; + private: friend class ::Ice::X86Internal::TargetX86Base<TargetX8664>;
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h index cc22171..89fc203 100644 --- a/src/IceTargetLoweringX8664Traits.h +++ b/src/IceTargetLoweringX8664Traits.h
@@ -439,7 +439,9 @@ } /// The maximum number of arguments to pass in XMM registers - static const uint32_t X86_MAX_XMM_ARGS = 4; + static const uint32_t X86_MAX_XMM_ARGS = 8; + /// The maximum number of arguments to pass in GPR registers + static const uint32_t X86_MAX_GPR_ARGS = 6; /// The number of bits in a byte static const uint32_t X86_CHAR_BIT = 8; /// Stack alignment. This is defined in IceTargetLoweringX8664.cpp because it
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h index d89d747..1e33a96 100644 --- a/src/IceTargetLoweringX86Base.h +++ b/src/IceTargetLoweringX86Base.h
@@ -83,6 +83,7 @@ size_t typeWidthInBytesOnStack(Type Ty) const override { // Round up to the next multiple of 4 bytes. In particular, i1, // i8, and i16 are rounded up to 4 bytes. + // TODO(jpp): this needs to round to multiples of 8 bytes in x86-64. return (typeWidthInBytes(Ty) + 3) & ~3; } @@ -127,7 +128,6 @@ void lowerArithmetic(const InstArithmetic *Inst) override; void lowerAssign(const InstAssign *Inst) override; void lowerBr(const InstBr *Inst) override; - void lowerCall(const InstCall *Inst) override; void lowerCast(const InstCast *Inst) override; void lowerExtractElement(const InstExtractElement *Inst) override; void lowerFcmp(const InstFcmp *Inst) override;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h index 3ec094e..245861c 100644 --- a/src/IceTargetLoweringX86BaseImpl.h +++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -2126,209 +2126,6 @@ } template <class Machine> -void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) { - // x86-32 calling convention: - // - // * At the point before the call, the stack must be aligned to 16 - // bytes. - // - // * The first four arguments of vector type, regardless of their - // position relative to the other arguments in the argument list, are - // placed in registers xmm0 - xmm3. - // - // * Other arguments are pushed onto the stack in right-to-left order, - // such that the left-most argument ends up on the top of the stack at - // the lowest memory address. - // - // * Stack arguments of vector type are aligned to start at the next - // highest multiple of 16 bytes. Other stack arguments are aligned to - // 4 bytes. - // - // This intends to match the section "IA-32 Function Calling - // Convention" of the document "OS X ABI Function Call Guide" by - // Apple. - NeedsStackAlignment = true; - - typedef std::vector<Operand *> OperandList; - OperandList XmmArgs; - OperandList StackArgs, StackArgLocations; - uint32_t ParameterAreaSizeBytes = 0; - - // Classify each argument operand according to the location where the - // argument is passed. - for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { - Operand *Arg = Instr->getArg(i); - Type Ty = Arg->getType(); - // The PNaCl ABI requires the width of arguments to be at least 32 bits. - assert(typeWidthInBytes(Ty) >= 4); - if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { - XmmArgs.push_back(Arg); - } else { - StackArgs.push_back(Arg); - if (isVectorType(Arg->getType())) { - ParameterAreaSizeBytes = - Traits::applyStackAlignment(ParameterAreaSizeBytes); - } - Variable *esp = - Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); - Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); - StackArgLocations.push_back( - Traits::X86OperandMem::create(Func, Ty, esp, Loc)); - ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); - } - } - - // Adjust the parameter area so that the stack is aligned. It is - // assumed that the stack is already aligned at the start of the - // calling sequence. - ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); - - // Subtract the appropriate amount for the argument area. This also - // takes care of setting the stack adjustment during emission. - // - // TODO: If for some reason the call instruction gets dead-code - // eliminated after lowering, we would need to ensure that the - // pre-call and the post-call esp adjustment get eliminated as well. - if (ParameterAreaSizeBytes) { - _adjust_stack(ParameterAreaSizeBytes); - } - - // Copy arguments that are passed on the stack to the appropriate - // stack locations. - for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { - lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); - } - - // Copy arguments to be passed in registers to the appropriate - // registers. - // TODO: Investigate the impact of lowering arguments passed in - // registers after lowering stack arguments as opposed to the other - // way around. Lowering register arguments after stack arguments may - // reduce register pressure. On the other hand, lowering register - // arguments first (before stack arguments) may result in more compact - // code, as the memory operand displacements may end up being smaller - // before any stack adjustment is done. - for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { - Variable *Reg = - legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); - // Generate a FakeUse of register arguments so that they do not get - // dead code eliminated as a result of the FakeKill of scratch - // registers after the call. - Context.insert(InstFakeUse::create(Func, Reg)); - } - // Generate the call instruction. Assign its result to a temporary - // with high register allocation weight. - Variable *Dest = Instr->getDest(); - // ReturnReg doubles as ReturnRegLo as necessary. - Variable *ReturnReg = nullptr; - Variable *ReturnRegHi = nullptr; - if (Dest) { - switch (Dest->getType()) { - case IceType_NUM: - llvm_unreachable("Invalid Call dest type"); - break; - case IceType_void: - break; - case IceType_i1: - case IceType_i8: - case IceType_i16: - case IceType_i32: - ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); - break; - case IceType_i64: - ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); - ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); - break; - case IceType_f32: - case IceType_f64: - // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with - // the fstp instruction. - break; - case IceType_v4i1: - case IceType_v8i1: - case IceType_v16i1: - case IceType_v16i8: - case IceType_v8i16: - case IceType_v4i32: - case IceType_v4f32: - ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); - break; - } - } - Operand *CallTarget = legalize(Instr->getCallTarget()); - const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); - if (NeedSandboxing) { - if (llvm::isa<Constant>(CallTarget)) { - _bundle_lock(InstBundleLock::Opt_AlignToEnd); - } else { - Variable *CallTargetVar = nullptr; - _mov(CallTargetVar, CallTarget); - _bundle_lock(InstBundleLock::Opt_AlignToEnd); - const SizeT BundleSize = - 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); - _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); - CallTarget = CallTargetVar; - } - } - Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); - Context.insert(NewCall); - if (NeedSandboxing) - _bundle_unlock(); - if (ReturnRegHi) - Context.insert(InstFakeDef::create(Func, ReturnRegHi)); - - // Add the appropriate offset to esp. The call instruction takes care - // of resetting the stack offset during emission. - if (ParameterAreaSizeBytes) { - Variable *esp = - Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); - _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); - } - - // Insert a register-kill pseudo instruction. - Context.insert(InstFakeKill::create(Func, NewCall)); - - // Generate a FakeUse to keep the call live if necessary. - if (Instr->hasSideEffects() && ReturnReg) { - Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); - Context.insert(FakeUse); - } - - if (!Dest) - return; - - // Assign the result of the call to Dest. - if (ReturnReg) { - if (ReturnRegHi) { - assert(Dest->getType() == IceType_i64); - split64(Dest); - Variable *DestLo = Dest->getLo(); - Variable *DestHi = Dest->getHi(); - _mov(DestLo, ReturnReg); - _mov(DestHi, ReturnRegHi); - } else { - assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || - Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || - isVectorType(Dest->getType())); - if (isVectorType(Dest->getType())) { - _movp(Dest, ReturnReg); - } else { - _mov(Dest, ReturnReg); - } - } - } else if (isScalarFloatingType(Dest->getType())) { - // Special treatment for an FP function which returns its result in - // st(0). - // If Dest ends up being a physical xmm register, the fstp emit code - // will route st(0) through a temporary stack slot. - _fstp(Dest); - // Create a fake use of Dest in case it actually isn't used, - // because st(0) still needs to be popped. - Context.insert(InstFakeUse::create(Func, Dest)); - } -} - -template <class Machine> void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) InstCast::OpKind CastKind = Inst->getCastKind();