| //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===// |
| // |
| // The Subzero Code Generator |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// This file implements the TargetLoweringX8664 class, which |
| /// consists almost entirely of the lowering sequence for each |
| /// high-level instruction. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "IceTargetLoweringX8664.h" |
| |
| #include "IceTargetLoweringX8664Traits.h" |
| #include "IceTargetLoweringX86Base.h" |
| |
| namespace Ice { |
| |
| //------------------------------------------------------------------------------ |
| // ______ ______ ______ __ ______ ______ |
| // /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\ |
| // \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \ |
| // \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\ |
| // \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/ |
| // |
| //------------------------------------------------------------------------------ |
| namespace X86Internal { |
| const MachineTraits<TargetX8664>::TableFcmpType |
| MachineTraits<TargetX8664>::TableFcmp[] = { |
| #define X(val, dflt, swapS, C1, C2, swapV, pred) \ |
| { \ |
| dflt, swapS, X8664::Traits::Cond::C1, X8664::Traits::Cond::C2, swapV, \ |
| X8664::Traits::Cond::pred \ |
| } \ |
| , |
| FCMPX8664_TABLE |
| #undef X |
| }; |
| |
| const size_t MachineTraits<TargetX8664>::TableFcmpSize = |
| llvm::array_lengthof(TableFcmp); |
| |
| const MachineTraits<TargetX8664>::TableIcmp32Type |
| MachineTraits<TargetX8664>::TableIcmp32[] = { |
| #define X(val, C_32, C1_64, C2_64, C3_64) \ |
| { X8664::Traits::Cond::C_32 } \ |
| , |
| ICMPX8664_TABLE |
| #undef X |
| }; |
| |
| const size_t MachineTraits<TargetX8664>::TableIcmp32Size = |
| llvm::array_lengthof(TableIcmp32); |
| |
| const MachineTraits<TargetX8664>::TableIcmp64Type |
| MachineTraits<TargetX8664>::TableIcmp64[] = { |
| #define X(val, C_32, C1_64, C2_64, C3_64) \ |
| { \ |
| X8664::Traits::Cond::C1_64, X8664::Traits::Cond::C2_64, \ |
| X8664::Traits::Cond::C3_64 \ |
| } \ |
| , |
| ICMPX8664_TABLE |
| #undef X |
| }; |
| |
| const size_t MachineTraits<TargetX8664>::TableIcmp64Size = |
| llvm::array_lengthof(TableIcmp64); |
| |
| const MachineTraits<TargetX8664>::TableTypeX8664AttributesType |
| MachineTraits<TargetX8664>::TableTypeX8664Attributes[] = { |
| #define X(tag, elementty, cvt, sdss, pack, width, fld) \ |
| { elementty } \ |
| , |
| ICETYPEX8664_TABLE |
| #undef X |
| }; |
| |
| const size_t MachineTraits<TargetX8664>::TableTypeX8664AttributesSize = |
| llvm::array_lengthof(TableTypeX8664Attributes); |
| |
| const uint32_t MachineTraits<TargetX8664>::X86_STACK_ALIGNMENT_BYTES = 16; |
| const char *MachineTraits<TargetX8664>::TargetName = "X8664"; |
| |
| } // end of namespace X86Internal |
| |
| //------------------------------------------------------------------------------ |
| // __ ______ __ __ ______ ______ __ __ __ ______ |
| // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\ |
| // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \ |
| // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\ |
| // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/ |
| // |
| //------------------------------------------------------------------------------ |
| namespace { |
| static inline TargetX8664::Traits::RegisterSet::AllRegisters |
| getRegisterForXmmArgNum(uint32_t ArgNum) { |
| assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS); |
| return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>( |
| TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum); |
| } |
| |
| static inline TargetX8664::Traits::RegisterSet::AllRegisters |
| getRegisterForGprArgNum(uint32_t ArgNum) { |
| assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS); |
| static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = { |
| TargetX8664::Traits::RegisterSet::Reg_edi, |
| TargetX8664::Traits::RegisterSet::Reg_esi, |
| TargetX8664::Traits::RegisterSet::Reg_edx, |
| TargetX8664::Traits::RegisterSet::Reg_ecx, |
| TargetX8664::Traits::RegisterSet::Reg_r8d, |
| TargetX8664::Traits::RegisterSet::Reg_r9d, |
| }; |
| static_assert(llvm::array_lengthof(GprForArgNum) == |
| TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS, |
| "Mismatch between MAX_GPR_ARGS and GprForArgNum."); |
| return GprForArgNum[ArgNum]; |
| } |
| |
| // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining |
| // OperandList in lowerCall. std::max() was supposed to work, but it doesn't. |
| constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; } |
| |
| } // end of anonymous namespace |
| |
| void TargetX8664::lowerCall(const InstCall *Instr) { |
| // x86-64 calling convention: |
| // |
| // * At the point before the call, the stack must be aligned to 16 |
| // bytes. |
| // |
| // * The first eight arguments of vector/fp type, regardless of their |
| // position relative to the other arguments in the argument list, are |
| // placed in registers %xmm0 - %xmm7. |
| // |
| // * The first six arguments of integer types, regardless of their |
| // position relative to the other arguments in the argument list, are |
| // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9. |
| // |
| // * Other arguments are pushed onto the stack in right-to-left order, |
| // such that the left-most argument ends up on the top of the stack at |
| // the lowest memory address. |
| // |
| // * Stack arguments of vector type are aligned to start at the next |
| // highest multiple of 16 bytes. Other stack arguments are aligned to |
| // 8 bytes. |
| // |
| // This intends to match the section "Function Calling Sequence" of the |
| // document "System V Application Binary Interface." |
| NeedsStackAlignment = true; |
| |
| using OperandList = |
| llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS, |
| Traits::X86_MAX_GPR_ARGS)>; |
| OperandList XmmArgs; |
| OperandList GprArgs; |
| OperandList StackArgs, StackArgLocations; |
| uint32_t ParameterAreaSizeBytes = 0; |
| |
| // Classify each argument operand according to the location where the |
| // argument is passed. |
| for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
| Operand *Arg = Instr->getArg(i); |
| Type Ty = Arg->getType(); |
| // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
| assert(typeWidthInBytes(Ty) >= 4); |
| if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { |
| XmmArgs.push_back(Arg); |
| } else if (isScalarFloatingType(Ty) && |
| XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { |
| XmmArgs.push_back(Arg); |
| } else if (isScalarIntegerType(Ty) && |
| GprArgs.size() < Traits::X86_MAX_GPR_ARGS) { |
| GprArgs.push_back(Arg); |
| } else { |
| StackArgs.push_back(Arg); |
| if (isVectorType(Arg->getType())) { |
| ParameterAreaSizeBytes = |
| Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| } |
| Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
| StackArgLocations.push_back( |
| Traits::X86OperandMem::create(Func, Ty, esp, Loc)); |
| ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
| } |
| } |
| |
| // Adjust the parameter area so that the stack is aligned. It is |
| // assumed that the stack is already aligned at the start of the |
| // calling sequence. |
| ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| |
| // Subtract the appropriate amount for the argument area. This also |
| // takes care of setting the stack adjustment during emission. |
| // |
| // TODO: If for some reason the call instruction gets dead-code |
| // eliminated after lowering, we would need to ensure that the |
| // pre-call and the post-call esp adjustment get eliminated as well. |
| if (ParameterAreaSizeBytes) { |
| _adjust_stack(ParameterAreaSizeBytes); |
| } |
| |
| // Copy arguments that are passed on the stack to the appropriate |
| // stack locations. |
| for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
| lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
| } |
| |
| // Copy arguments to be passed in registers to the appropriate |
| // registers. |
| // TODO: Investigate the impact of lowering arguments passed in |
| // registers after lowering stack arguments as opposed to the other |
| // way around. Lowering register arguments after stack arguments may |
| // reduce register pressure. On the other hand, lowering register |
| // arguments first (before stack arguments) may result in more compact |
| // code, as the memory operand displacements may end up being smaller |
| // before any stack adjustment is done. |
| for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
| Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i)); |
| // Generate a FakeUse of register arguments so that they do not get |
| // dead code eliminated as a result of the FakeKill of scratch |
| // registers after the call. |
| Context.insert(InstFakeUse::create(Func, Reg)); |
| } |
| |
| for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { |
| Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i)); |
| Context.insert(InstFakeUse::create(Func, Reg)); |
| } |
| |
| // Generate the call instruction. Assign its result to a temporary |
| // with high register allocation weight. |
| Variable *Dest = Instr->getDest(); |
| // ReturnReg doubles as ReturnRegLo as necessary. |
| Variable *ReturnReg = nullptr; |
| Variable *ReturnRegHi = nullptr; |
| if (Dest) { |
| switch (Dest->getType()) { |
| case IceType_NUM: |
| case IceType_void: |
| llvm::report_fatal_error("Invalid Call dest type"); |
| break; |
| case IceType_i1: |
| case IceType_i8: |
| case IceType_i16: |
| case IceType_i32: |
| ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); |
| break; |
| case IceType_i64: |
| // TODO(jpp): return i64 in a GPR. |
| ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| break; |
| case IceType_f32: |
| case IceType_f64: |
| case IceType_v4i1: |
| case IceType_v8i1: |
| case IceType_v16i1: |
| case IceType_v16i8: |
| case IceType_v8i16: |
| case IceType_v4i32: |
| case IceType_v4f32: |
| ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); |
| break; |
| } |
| } |
| |
| Operand *CallTarget = legalize(Instr->getCallTarget()); |
| const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); |
| if (NeedSandboxing) { |
| if (llvm::isa<Constant>(CallTarget)) { |
| _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| } else { |
| Variable *CallTargetVar = nullptr; |
| _mov(CallTargetVar, CallTarget); |
| _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| const SizeT BundleSize = |
| 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
| _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); |
| CallTarget = CallTargetVar; |
| } |
| } |
| Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); |
| Context.insert(NewCall); |
| if (NeedSandboxing) |
| _bundle_unlock(); |
| if (ReturnRegHi) |
| Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| |
| // Add the appropriate offset to esp. The call instruction takes care |
| // of resetting the stack offset during emission. |
| if (ParameterAreaSizeBytes) { |
| Variable *Esp = |
| Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| _add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); |
| } |
| |
| // Insert a register-kill pseudo instruction. |
| Context.insert(InstFakeKill::create(Func, NewCall)); |
| |
| // Generate a FakeUse to keep the call live if necessary. |
| if (Instr->hasSideEffects() && ReturnReg) { |
| Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
| Context.insert(FakeUse); |
| } |
| |
| if (!Dest) |
| return; |
| |
| assert(ReturnReg && "x86-64 always returns value on registers."); |
| |
| // Assign the result of the call to Dest. |
| if (ReturnRegHi) { |
| assert(Dest->getType() == IceType_i64); |
| split64(Dest); |
| Variable *DestLo = Dest->getLo(); |
| Variable *DestHi = Dest->getHi(); |
| _mov(DestLo, ReturnReg); |
| _mov(DestHi, ReturnRegHi); |
| return; |
| } |
| |
| assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 || |
| Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || |
| Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || |
| isVectorType(Dest->getType())); |
| |
| if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { |
| _movp(Dest, ReturnReg); |
| } else { |
| _mov(Dest, ReturnReg); |
| } |
| } |
| |
| void TargetX8664::lowerArguments() { |
| VarList &Args = Func->getArgs(); |
| // The first eight vetcor typed arguments (as well as fp arguments) are passed |
| // in %xmm0 through %xmm7 regardless of their position in the argument list. |
| unsigned NumXmmArgs = 0; |
| // The first six integer typed arguments are passed in %rdi, %rsi, %rdx, %rcx, |
| // %r8, and %r9 regardless of their position in the argument list. |
| unsigned NumGprArgs = 0; |
| |
| Context.init(Func->getEntryNode()); |
| Context.setInsertPoint(Context.getCur()); |
| |
| for (SizeT i = 0, End = Args.size(); |
| i < End && (NumXmmArgs < Traits::X86_MAX_XMM_ARGS || |
| NumGprArgs < Traits::X86_MAX_XMM_ARGS); |
| ++i) { |
| Variable *Arg = Args[i]; |
| Type Ty = Arg->getType(); |
| if ((isVectorType(Ty) || isScalarFloatingType(Ty)) && |
| NumXmmArgs < Traits::X86_MAX_XMM_ARGS) { |
| // Replace Arg in the argument list with the home register. Then |
| // generate an instruction in the prolog to copy the home register |
| // to the assigned location of Arg. |
| int32_t RegNum = getRegisterForXmmArgNum(NumXmmArgs); |
| ++NumXmmArgs; |
| Variable *RegisterArg = Func->makeVariable(Ty); |
| if (BuildDefs::dump()) |
| RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); |
| RegisterArg->setRegNum(RegNum); |
| RegisterArg->setIsArg(); |
| Arg->setIsArg(false); |
| |
| Args[i] = RegisterArg; |
| Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
| } else if (isScalarIntegerType(Ty) && |
| NumGprArgs < Traits::X86_MAX_GPR_ARGS) { |
| int32_t RegNum = getRegisterForGprArgNum(NumGprArgs); |
| ++NumGprArgs; |
| Variable *RegisterArg = Func->makeVariable(Ty); |
| if (BuildDefs::dump()) |
| RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); |
| RegisterArg->setRegNum(RegNum); |
| RegisterArg->setIsArg(); |
| Arg->setIsArg(false); |
| |
| Args[i] = RegisterArg; |
| Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
| } |
| } |
| } |
| |
| void TargetX8664::lowerRet(const InstRet *Inst) { |
| Variable *Reg = nullptr; |
| if (Inst->hasRetValue()) { |
| Operand *Src0 = legalize(Inst->getRetValue()); |
| // TODO(jpp): this is not needed. |
| if (Src0->getType() == IceType_i64) { |
| Variable *eax = |
| legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax); |
| Variable *edx = |
| legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx); |
| Reg = eax; |
| Context.insert(InstFakeUse::create(Func, edx)); |
| } else if (isScalarFloatingType(Src0->getType())) { |
| _fld(Src0); |
| } else if (isVectorType(Src0->getType())) { |
| Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0); |
| } else { |
| _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); |
| } |
| } |
| // Add a ret instruction even if sandboxing is enabled, because |
| // addEpilog explicitly looks for a ret instruction as a marker for |
| // where to insert the frame removal instructions. |
| _ret(Reg); |
| // Add a fake use of esp to make sure esp stays alive for the entire |
| // function. Otherwise post-call esp adjustments get dead-code |
| // eliminated. TODO: Are there more places where the fake use |
| // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not |
| // have a ret instruction. |
| Variable *esp = |
| Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| Context.insert(InstFakeUse::create(Func, esp)); |
| } |
| |
| void TargetX8664::addProlog(CfgNode *Node) { |
| // Stack frame layout: |
| // |
| // +------------------------+ |
| // | 1. return address | |
| // +------------------------+ |
| // | 2. preserved registers | |
| // +------------------------+ |
| // | 3. padding | |
| // +------------------------+ |
| // | 4. global spill area | |
| // +------------------------+ |
| // | 5. padding | |
| // +------------------------+ |
| // | 6. local spill area | |
| // +------------------------+ |
| // | 7. padding | |
| // +------------------------+ |
| // | 8. allocas | |
| // +------------------------+ |
| // |
| // The following variables record the size in bytes of the given areas: |
| // * X86_RET_IP_SIZE_BYTES: area 1 |
| // * PreservedRegsSizeBytes: area 2 |
| // * SpillAreaPaddingBytes: area 3 |
| // * GlobalsSize: area 4 |
| // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 |
| // * LocalsSpillAreaSize: area 6 |
| // * SpillAreaSizeBytes: areas 3 - 7 |
| |
| // Determine stack frame offsets for each Variable without a |
| // register assignment. This can be done as one variable per stack |
| // slot. Or, do coalescing by running the register allocator again |
| // with an infinite set of registers (as a side effect, this gives |
| // variables a second chance at physical register assignment). |
| // |
| // A middle ground approach is to leverage sparsity and allocate one |
| // block of space on the frame for globals (variables with |
| // multi-block lifetime), and one block to share for locals |
| // (single-block lifetime). |
| |
| Context.init(Node); |
| Context.setInsertPoint(Context.getCur()); |
| |
| llvm::SmallBitVector CalleeSaves = |
| getRegisterSet(RegSet_CalleeSave, RegSet_None); |
| RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); |
| VarList SortedSpilledVariables, VariablesLinkedToSpillSlots; |
| size_t GlobalsSize = 0; |
| // If there is a separate locals area, this represents that area. |
| // Otherwise it counts any variable not counted by GlobalsSize. |
| SpillAreaSizeBytes = 0; |
| // If there is a separate locals area, this specifies the alignment |
| // for it. |
| uint32_t LocalsSlotsAlignmentBytes = 0; |
| // The entire spill locations area gets aligned to largest natural |
| // alignment of the variables that have a spill slot. |
| uint32_t SpillAreaAlignmentBytes = 0; |
| // A spill slot linked to a variable with a stack slot should reuse |
| // that stack slot. |
| std::function<bool(Variable *)> TargetVarHook = |
| [&VariablesLinkedToSpillSlots](Variable *Var) { |
| if (auto *SpillVar = |
| llvm::dyn_cast<typename Traits::SpillVariable>(Var)) { |
| assert(Var->getWeight().isZero()); |
| if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { |
| VariablesLinkedToSpillSlots.push_back(Var); |
| return true; |
| } |
| } |
| return false; |
| }; |
| |
| // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
| getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
| &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
| &LocalsSlotsAlignmentBytes, TargetVarHook); |
| uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
| SpillAreaSizeBytes += GlobalsSize; |
| |
| // Add push instructions for preserved registers. |
| uint32_t NumCallee = 0; |
| size_t PreservedRegsSizeBytes = 0; |
| for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| if (CalleeSaves[i] && RegsUsed[i]) { |
| ++NumCallee; |
| PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64); |
| _push(getPhysicalRegister(i)); |
| } |
| } |
| Ctx->statsUpdateRegistersSaved(NumCallee); |
| |
| // Generate "push ebp; mov ebp, esp" |
| if (IsEbpBasedFrame) { |
| assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) |
| .count() == 0); |
| PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64); |
| Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); |
| Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| _push(ebp); |
| _mov(ebp, esp); |
| // Keep ebp live for late-stage liveness analysis |
| // (e.g. asm-verbose mode). |
| Context.insert(InstFakeUse::create(Func, ebp)); |
| } |
| |
| // Align the variables area. SpillAreaPaddingBytes is the size of |
| // the region after the preserved registers and before the spill areas. |
| // LocalsSlotsPaddingBytes is the amount of padding between the globals |
| // and locals area if they are separate. |
| assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES); |
| assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
| uint32_t SpillAreaPaddingBytes = 0; |
| uint32_t LocalsSlotsPaddingBytes = 0; |
| alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes, |
| SpillAreaAlignmentBytes, GlobalsSize, |
| LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes, |
| &LocalsSlotsPaddingBytes); |
| SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
| uint32_t GlobalsAndSubsequentPaddingSize = |
| GlobalsSize + LocalsSlotsPaddingBytes; |
| |
| // Align esp if necessary. |
| if (NeedsStackAlignment) { |
| uint32_t StackOffset = |
| Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
| uint32_t StackSize = |
| Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
| SpillAreaSizeBytes = StackSize - StackOffset; |
| } |
| |
| // Generate "sub esp, SpillAreaSizeBytes" |
| if (SpillAreaSizeBytes) |
| _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), |
| Ctx->getConstantInt32(SpillAreaSizeBytes)); |
| Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| |
| resetStackAdjustment(); |
| |
| // Fill in stack offsets for stack args, and copy args into registers |
| // for those that were register-allocated. Args are pushed right to |
| // left, so Arg[0] is closest to the stack/frame pointer. |
| Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
| size_t BasicFrameOffset = |
| PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; |
| if (!IsEbpBasedFrame) |
| BasicFrameOffset += SpillAreaSizeBytes; |
| |
| const VarList &Args = Func->getArgs(); |
| size_t InArgsSizeBytes = 0; |
| unsigned NumXmmArgs = 0; |
| unsigned NumGPRArgs = 0; |
| for (Variable *Arg : Args) { |
| // Skip arguments passed in registers. |
| if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) { |
| ++NumXmmArgs; |
| continue; |
| } |
| if (isScalarFloatingType(Arg->getType()) && |
| NumXmmArgs < Traits::X86_MAX_XMM_ARGS) { |
| ++NumXmmArgs; |
| continue; |
| } |
| if (isScalarIntegerType(Arg->getType()) && |
| NumGPRArgs < Traits::X86_MAX_GPR_ARGS) { |
| ++NumGPRArgs; |
| continue; |
| } |
| finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| } |
| |
| // Fill in stack offsets for locals. |
| assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, |
| SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, |
| IsEbpBasedFrame); |
| // Assign stack offsets to variables that have been linked to spilled |
| // variables. |
| for (Variable *Var : VariablesLinkedToSpillSlots) { |
| Variable *Linked = |
| (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo(); |
| Var->setStackOffset(Linked->getStackOffset()); |
| } |
| this->HasComputedFrame = true; |
| |
| if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { |
| OstreamLocker L(Func->getContext()); |
| Ostream &Str = Func->getContext()->getStrDump(); |
| |
| Str << "Stack layout:\n"; |
| uint32_t EspAdjustmentPaddingSize = |
| SpillAreaSizeBytes - LocalsSpillAreaSize - |
| GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; |
| Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
| << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" |
| << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
| << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
| << " globals spill area = " << GlobalsSize << " bytes\n" |
| << " globals-locals spill areas intermediate padding = " |
| << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" |
| << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" |
| << " esp alignment padding = " << EspAdjustmentPaddingSize |
| << " bytes\n"; |
| |
| Str << "Stack details:\n" |
| << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" |
| << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
| << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
| << " bytes\n" |
| << " is ebp based = " << IsEbpBasedFrame << "\n"; |
| } |
| } |
| |
| void TargetX8664::addEpilog(CfgNode *Node) { |
| InstList &Insts = Node->getInsts(); |
| InstList::reverse_iterator RI, E; |
| for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { |
| if (llvm::isa<typename Traits::Insts::Ret>(*RI)) |
| break; |
| } |
| if (RI == E) |
| return; |
| |
| // Convert the reverse_iterator position into its corresponding |
| // (forward) iterator position. |
| InstList::iterator InsertPoint = RI.base(); |
| --InsertPoint; |
| Context.init(Node); |
| Context.setInsertPoint(InsertPoint); |
| |
| Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| if (IsEbpBasedFrame) { |
| Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); |
| // For late-stage liveness analysis (e.g. asm-verbose mode), |
| // adding a fake use of esp before the assignment of esp=ebp keeps |
| // previous esp adjustments from being dead-code eliminated. |
| Context.insert(InstFakeUse::create(Func, esp)); |
| _mov(esp, ebp); |
| _pop(ebp); |
| } else { |
| // add esp, SpillAreaSizeBytes |
| if (SpillAreaSizeBytes) |
| _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes)); |
| } |
| |
| // Add pop instructions for preserved registers. |
| llvm::SmallBitVector CalleeSaves = |
| getRegisterSet(RegSet_CalleeSave, RegSet_None); |
| for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| SizeT j = CalleeSaves.size() - i - 1; |
| if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame) |
| continue; |
| if (CalleeSaves[j] && RegsUsed[j]) { |
| _pop(getPhysicalRegister(j)); |
| } |
| } |
| |
| if (!Ctx->getFlags().getUseSandboxing()) |
| return; |
| // Change the original ret instruction into a sandboxed return sequence. |
| // t:ecx = pop |
| // bundle_lock |
| // and t, ~31 |
| // jmp *t |
| // bundle_unlock |
| // FakeUse <original_ret_operand> |
| Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| _pop(T_ecx); |
| lowerIndirectJump(T_ecx); |
| if (RI->getSrcSize()) { |
| Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| Context.insert(InstFakeUse::create(Func, RetValue)); |
| } |
| RI->setDeleted(); |
| } |
| |
| void TargetX8664::emitJumpTable(const Cfg *Func, |
| const InstJumpTable *JumpTable) const { |
| if (!BuildDefs::dump()) |
| return; |
| Ostream &Str = Ctx->getStrEmit(); |
| IceString MangledName = Ctx->mangleName(Func->getFunctionName()); |
| Str << "\t.section\t.rodata." << MangledName |
| << "$jumptable,\"a\",@progbits\n"; |
| Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; |
| Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":"; |
| |
| // On X8664 ILP32 pointers are 32-bit hence the use of .long |
| for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I) |
| Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName(); |
| Str << "\n"; |
| } |
| |
| namespace { |
| template <typename T> struct PoolTypeConverter {}; |
| |
| template <> struct PoolTypeConverter<float> { |
| typedef uint32_t PrimitiveIntType; |
| typedef ConstantFloat IceType; |
| static const Type Ty = IceType_f32; |
| static const char *TypeName; |
| static const char *AsmTag; |
| static const char *PrintfString; |
| }; |
| const char *PoolTypeConverter<float>::TypeName = "float"; |
| const char *PoolTypeConverter<float>::AsmTag = ".long"; |
| const char *PoolTypeConverter<float>::PrintfString = "0x%x"; |
| |
| template <> struct PoolTypeConverter<double> { |
| typedef uint64_t PrimitiveIntType; |
| typedef ConstantDouble IceType; |
| static const Type Ty = IceType_f64; |
| static const char *TypeName; |
| static const char *AsmTag; |
| static const char *PrintfString; |
| }; |
| const char *PoolTypeConverter<double>::TypeName = "double"; |
| const char *PoolTypeConverter<double>::AsmTag = ".quad"; |
| const char *PoolTypeConverter<double>::PrintfString = "0x%llx"; |
| |
| // Add converter for int type constant pooling |
| template <> struct PoolTypeConverter<uint32_t> { |
| typedef uint32_t PrimitiveIntType; |
| typedef ConstantInteger32 IceType; |
| static const Type Ty = IceType_i32; |
| static const char *TypeName; |
| static const char *AsmTag; |
| static const char *PrintfString; |
| }; |
| const char *PoolTypeConverter<uint32_t>::TypeName = "i32"; |
| const char *PoolTypeConverter<uint32_t>::AsmTag = ".long"; |
| const char *PoolTypeConverter<uint32_t>::PrintfString = "0x%x"; |
| |
| // Add converter for int type constant pooling |
| template <> struct PoolTypeConverter<uint16_t> { |
| typedef uint32_t PrimitiveIntType; |
| typedef ConstantInteger32 IceType; |
| static const Type Ty = IceType_i16; |
| static const char *TypeName; |
| static const char *AsmTag; |
| static const char *PrintfString; |
| }; |
| const char *PoolTypeConverter<uint16_t>::TypeName = "i16"; |
| const char *PoolTypeConverter<uint16_t>::AsmTag = ".short"; |
| const char *PoolTypeConverter<uint16_t>::PrintfString = "0x%x"; |
| |
| // Add converter for int type constant pooling |
| template <> struct PoolTypeConverter<uint8_t> { |
| typedef uint32_t PrimitiveIntType; |
| typedef ConstantInteger32 IceType; |
| static const Type Ty = IceType_i8; |
| static const char *TypeName; |
| static const char *AsmTag; |
| static const char *PrintfString; |
| }; |
| const char *PoolTypeConverter<uint8_t>::TypeName = "i8"; |
| const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte"; |
| const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x"; |
| } // end of anonymous namespace |
| |
| template <typename T> |
| void TargetDataX8664::emitConstantPool(GlobalContext *Ctx) { |
| if (!BuildDefs::dump()) |
| return; |
| Ostream &Str = Ctx->getStrEmit(); |
| Type Ty = T::Ty; |
| SizeT Align = typeAlignInBytes(Ty); |
| ConstantList Pool = Ctx->getConstantPool(Ty); |
| |
| Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align |
| << "\n"; |
| Str << "\t.align\t" << Align << "\n"; |
| |
| // If reorder-pooled-constants option is set to true, we need to shuffle the |
| // constant pool before emitting it. |
| if (Ctx->getFlags().shouldReorderPooledConstants()) |
| RandomShuffle(Pool.begin(), Pool.end(), [Ctx](uint64_t N) { |
| return (uint32_t)Ctx->getRNG().next(N); |
| }); |
| |
| for (Constant *C : Pool) { |
| if (!C->getShouldBePooled()) |
| continue; |
| typename T::IceType *Const = llvm::cast<typename T::IceType>(C); |
| typename T::IceType::PrimType Value = Const->getValue(); |
| // Use memcpy() to copy bits from Value into RawValue in a way |
| // that avoids breaking strict-aliasing rules. |
| typename T::PrimitiveIntType RawValue; |
| memcpy(&RawValue, &Value, sizeof(Value)); |
| char buf[30]; |
| int CharsPrinted = |
| snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue); |
| assert(CharsPrinted >= 0 && |
| (size_t)CharsPrinted < llvm::array_lengthof(buf)); |
| (void)CharsPrinted; // avoid warnings if asserts are disabled |
| Const->emitPoolLabel(Str); |
| Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " " |
| << Value << "\n"; |
| } |
| } |
| |
| void TargetDataX8664::lowerConstants() { |
| if (Ctx->getFlags().getDisableTranslation()) |
| return; |
| // No need to emit constants from the int pool since (for x86) they |
| // are embedded as immediates in the instructions, just emit float/double. |
| switch (Ctx->getFlags().getOutFileType()) { |
| case FT_Elf: { |
| ELFObjectWriter *Writer = Ctx->getObjectWriter(); |
| |
| Writer->writeConstantPool<ConstantInteger32>(IceType_i8); |
| Writer->writeConstantPool<ConstantInteger32>(IceType_i16); |
| Writer->writeConstantPool<ConstantInteger32>(IceType_i32); |
| |
| Writer->writeConstantPool<ConstantFloat>(IceType_f32); |
| Writer->writeConstantPool<ConstantDouble>(IceType_f64); |
| } break; |
| case FT_Asm: |
| case FT_Iasm: { |
| OstreamLocker L(Ctx); |
| |
| emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx); |
| emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx); |
| emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx); |
| |
| emitConstantPool<PoolTypeConverter<float>>(Ctx); |
| emitConstantPool<PoolTypeConverter<double>>(Ctx); |
| } break; |
| } |
| } |
| |
| void TargetDataX8664::lowerJumpTables() { |
| switch (Ctx->getFlags().getOutFileType()) { |
| case FT_Elf: { |
| ELFObjectWriter *Writer = Ctx->getObjectWriter(); |
| for (const JumpTableData &JumpTable : Ctx->getJumpTables()) |
| // TODO(jpp): not 386. |
| Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32); |
| } break; |
| case FT_Asm: |
| // Already emitted from Cfg |
| break; |
| case FT_Iasm: { |
| if (!BuildDefs::dump()) |
| return; |
| Ostream &Str = Ctx->getStrEmit(); |
| for (const JumpTableData &JT : Ctx->getJumpTables()) { |
| Str << "\t.section\t.rodata." << JT.getFunctionName() |
| << "$jumptable,\"a\",@progbits\n"; |
| Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; |
| Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":"; |
| |
| // On X8664 ILP32 pointers are 32-bit hence the use of .long |
| for (intptr_t TargetOffset : JT.getTargetOffsets()) |
| Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset; |
| Str << "\n"; |
| } |
| } break; |
| } |
| } |
| |
| void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars, |
| const IceString &SectionSuffix) { |
| switch (Ctx->getFlags().getOutFileType()) { |
| case FT_Elf: { |
| ELFObjectWriter *Writer = Ctx->getObjectWriter(); |
| // TODO(jpp): not 386. |
| Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix); |
| } break; |
| case FT_Asm: |
| case FT_Iasm: { |
| const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly(); |
| OstreamLocker L(Ctx); |
| for (const VariableDeclaration *Var : Vars) { |
| if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) { |
| emitGlobal(*Var, SectionSuffix); |
| } |
| } |
| } break; |
| } |
| } |
| |
| // In some cases, there are x-macros tables for both high-level and |
| // low-level instructions/operands that use the same enum key value. |
| // The tables are kept separate to maintain a proper separation |
| // between abstraction layers. There is a risk that the tables could |
| // get out of sync if enum values are reordered or if entries are |
| // added or deleted. The following dummy namespaces use |
| // static_asserts to ensure everything is kept in sync. |
| |
| namespace { |
| // Validate the enum values in FCMPX8664_TABLE. |
| namespace dummy1 { |
| // Define a temporary set of enum values based on low-level table |
| // entries. |
| enum _tmp_enum { |
| #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val, |
| FCMPX8664_TABLE |
| #undef X |
| _num |
| }; |
| // Define a set of constants based on high-level table entries. |
| #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; |
| ICEINSTFCMP_TABLE |
| #undef X |
| // Define a set of constants based on low-level table entries, and |
| // ensure the table entry keys are consistent. |
| #define X(val, dflt, swapS, C1, C2, swapV, pred) \ |
| static const int _table2_##val = _tmp_##val; \ |
| static_assert( \ |
| _table1_##val == _table2_##val, \ |
| "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE"); |
| FCMPX8664_TABLE |
| #undef X |
| // Repeat the static asserts with respect to the high-level table |
| // entries in case the high-level table has extra entries. |
| #define X(tag, str) \ |
| static_assert( \ |
| _table1_##tag == _table2_##tag, \ |
| "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE"); |
| ICEINSTFCMP_TABLE |
| #undef X |
| } // end of namespace dummy1 |
| |
| // Validate the enum values in ICMPX8664_TABLE. |
| namespace dummy2 { |
| // Define a temporary set of enum values based on low-level table |
| // entries. |
| enum _tmp_enum { |
| #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, |
| ICMPX8664_TABLE |
| #undef X |
| _num |
| }; |
| // Define a set of constants based on high-level table entries. |
| #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; |
| ICEINSTICMP_TABLE |
| #undef X |
| // Define a set of constants based on low-level table entries, and |
| // ensure the table entry keys are consistent. |
| #define X(val, C_32, C1_64, C2_64, C3_64) \ |
| static const int _table2_##val = _tmp_##val; \ |
| static_assert( \ |
| _table1_##val == _table2_##val, \ |
| "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE"); |
| ICMPX8664_TABLE |
| #undef X |
| // Repeat the static asserts with respect to the high-level table |
| // entries in case the high-level table has extra entries. |
| #define X(tag, str) \ |
| static_assert( \ |
| _table1_##tag == _table2_##tag, \ |
| "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE"); |
| ICEINSTICMP_TABLE |
| #undef X |
| } // end of namespace dummy2 |
| |
| // Validate the enum values in ICETYPEX8664_TABLE. |
| namespace dummy3 { |
| // Define a temporary set of enum values based on low-level table |
| // entries. |
| enum _tmp_enum { |
| #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag, |
| ICETYPEX8664_TABLE |
| #undef X |
| _num |
| }; |
| // Define a set of constants based on high-level table entries. |
| #define X(tag, sizeLog2, align, elts, elty, str) \ |
| static const int _table1_##tag = tag; |
| ICETYPE_TABLE |
| #undef X |
| // Define a set of constants based on low-level table entries, and |
| // ensure the table entry keys are consistent. |
| #define X(tag, elementty, cvt, sdss, pack, width, fld) \ |
| static const int _table2_##tag = _tmp_##tag; \ |
| static_assert(_table1_##tag == _table2_##tag, \ |
| "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); |
| ICETYPEX8664_TABLE |
| #undef X |
| // Repeat the static asserts with respect to the high-level table |
| // entries in case the high-level table has extra entries. |
| #define X(tag, sizeLog2, align, elts, elty, str) \ |
| static_assert(_table1_##tag == _table2_##tag, \ |
| "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); |
| ICETYPE_TABLE |
| #undef X |
| } // end of namespace dummy3 |
| } // end of anonymous namespace |
| |
| } // end of namespace Ice |