| //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// |
| // |
| // The Subzero Code Generator |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// \brief Declares the TargetLoweringARM32 class, which implements the |
| /// TargetLowering interface for the ARM 32-bit architecture. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H |
| #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H |
| |
| #include "IceAssemblerARM32.h" |
| #include "IceDefs.h" |
| #include "IceInstARM32.h" |
| #include "IceRegistersARM32.h" |
| #include "IceTargetLowering.h" |
| |
| #include <utility> |
| |
| namespace Ice { |
| namespace ARM32 { |
| |
| // Class encapsulating ARM cpu features / instruction set. |
| class TargetARM32Features { |
| TargetARM32Features() = delete; |
| TargetARM32Features(const TargetARM32Features &) = delete; |
| TargetARM32Features &operator=(const TargetARM32Features &) = delete; |
| |
| public: |
| explicit TargetARM32Features(const ClFlags &Flags); |
| |
| enum ARM32InstructionSet { |
| Begin, |
| // Neon is the PNaCl baseline instruction set. |
| Neon = Begin, |
| HWDivArm, // HW divide in ARM mode (not just Thumb mode). |
| End |
| }; |
| |
| bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; } |
| |
| private: |
| ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin; |
| }; |
| |
| // The target lowering logic for ARM32. |
| class TargetARM32 : public TargetLowering { |
| TargetARM32() = delete; |
| TargetARM32(const TargetARM32 &) = delete; |
| TargetARM32 &operator=(const TargetARM32 &) = delete; |
| |
| public: |
| static void staticInit(GlobalContext *Ctx); |
| |
| static bool shouldBePooled(const Constant *C) { |
| if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) { |
| return !Utils::isPositiveZero(ConstDouble->getValue()); |
| } |
| if (llvm::isa<ConstantFloat>(C)) |
| return true; |
| return false; |
| } |
| |
| static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; } |
| |
| // TODO(jvoung): return a unique_ptr. |
| static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) { |
| return makeUnique<TargetARM32>(Func); |
| } |
| |
| std::unique_ptr<::Ice::Assembler> createAssembler() const override { |
| const bool IsNonsfi = SandboxingType == ST_Nonsfi; |
| return makeUnique<ARM32::AssemblerARM32>(IsNonsfi); |
| } |
| |
| void initNodeForLowering(CfgNode *Node) override { |
| Computations.forgetProducers(); |
| Computations.recordProducers(Node); |
| Computations.dump(Func); |
| } |
| |
| void translateOm1() override; |
| void translateO2() override; |
| bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; |
| |
| SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; } |
| Variable *getPhysicalRegister(RegNumT RegNum, |
| Type Ty = IceType_void) override; |
| const char *getRegName(RegNumT RegNum, Type Ty) const override; |
| SmallBitVector getRegisterSet(RegSetMask Include, |
| RegSetMask Exclude) const override; |
| const SmallBitVector & |
| getRegistersForVariable(const Variable *Var) const override { |
| RegClass RC = Var->getRegClass(); |
| switch (RC) { |
| default: |
| assert(RC < RC_Target); |
| return TypeToRegisterSet[RC]; |
| case RegARM32::RCARM32_QtoS: |
| return TypeToRegisterSet[RC]; |
| } |
| } |
| const SmallBitVector & |
| getAllRegistersForVariable(const Variable *Var) const override { |
| RegClass RC = Var->getRegClass(); |
| assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM); |
| return TypeToRegisterSetUnfiltered[RC]; |
| } |
| const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override { |
| return RegisterAliases[Reg]; |
| } |
| bool hasFramePointer() const override { return UsesFramePointer; } |
| void setHasFramePointer() override { UsesFramePointer = true; } |
| RegNumT getStackReg() const override { return RegARM32::Reg_sp; } |
| RegNumT getFrameReg() const override { return RegARM32::Reg_fp; } |
| RegNumT getFrameOrStackReg() const override { |
| return UsesFramePointer ? getFrameReg() : getStackReg(); |
| } |
| RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; } |
| |
| size_t typeWidthInBytesOnStack(Type Ty) const override { |
| // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16 |
| // are rounded up to 4 bytes. |
| return (typeWidthInBytes(Ty) + 3) & ~3; |
| } |
| uint32_t getStackAlignment() const override; |
| void reserveFixedAllocaArea(size_t Size, size_t Align) override { |
| FixedAllocaSizeBytes = Size; |
| assert(llvm::isPowerOf2_32(Align)); |
| FixedAllocaAlignBytes = Align; |
| PrologEmitsFixedAllocas = true; |
| } |
| int32_t getFrameFixedAllocaOffset() const override { |
| return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes); |
| } |
| uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; } |
| |
| bool shouldSplitToVariable64On32(Type Ty) const override { |
| return Ty == IceType_i64; |
| } |
| |
| // TODO(ascull): what size is best for ARM? |
| SizeT getMinJumpTableSize() const override { return 3; } |
| void emitJumpTable(const Cfg *Func, |
| const InstJumpTable *JumpTable) const override; |
| |
| void emitVariable(const Variable *Var) const override; |
| |
| void emit(const ConstantUndef *C) const final; |
| void emit(const ConstantInteger32 *C) const final; |
| void emit(const ConstantInteger64 *C) const final; |
| void emit(const ConstantFloat *C) const final; |
| void emit(const ConstantDouble *C) const final; |
| void emit(const ConstantRelocatable *C) const final; |
| |
| void lowerArguments() override; |
| void addProlog(CfgNode *Node) override; |
| void addEpilog(CfgNode *Node) override; |
| |
| Operand *loOperand(Operand *Operand); |
| Operand *hiOperand(Operand *Operand); |
| void finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
| size_t BasicFrameOffset, size_t *InArgsSizeBytes); |
| |
| bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const { |
| return CPUFeatures.hasFeature(I); |
| } |
| |
| enum OperandLegalization { |
| Legal_Reg = 1 << 0, /// physical register, not stack location |
| Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small |
| /// immediates, shifted registers, or modified fp imm. |
| Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12] |
| Legal_Rematerializable = 1 << 3, |
| Legal_Default = ~Legal_Rematerializable, |
| }; |
| |
| using LegalMask = uint32_t; |
| Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT()); |
| Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default, |
| RegNumT RegNum = RegNumT()); |
| Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT()); |
| |
| OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const { |
| assert(ShAmtImm < 32); |
| return OperandARM32ShAmtImm::create( |
| Func, |
| llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F))); |
| } |
| |
| GlobalContext *getCtx() const { return Ctx; } |
| |
| protected: |
| explicit TargetARM32(Cfg *Func); |
| |
| void postLower() override; |
| |
| enum SafeBoolChain { |
| SBC_No, |
| SBC_Yes, |
| }; |
| |
| void lowerAlloca(const InstAlloca *Instr) override; |
| SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr); |
| void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest, |
| Operand *Src0, Operand *Src1); |
| void lowerArithmetic(const InstArithmetic *Instr) override; |
| void lowerAssign(const InstAssign *Instr) override; |
| void lowerBr(const InstBr *Instr) override; |
| void lowerCall(const InstCall *Instr) override; |
| void lowerCast(const InstCast *Instr) override; |
| void lowerExtractElement(const InstExtractElement *Instr) override; |
| |
| /// CondWhenTrue is a helper type returned by every method in the lowering |
| /// that emits code to set the condition codes. |
| class CondWhenTrue { |
| public: |
| explicit CondWhenTrue(CondARM32::Cond T0, |
| CondARM32::Cond T1 = CondARM32::kNone) |
| : WhenTrue0(T0), WhenTrue1(T1) { |
| assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone); |
| assert(T1 != T0 || T0 == CondARM32::kNone); |
| } |
| CondARM32::Cond WhenTrue0; |
| CondARM32::Cond WhenTrue1; |
| |
| /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted. |
| CondWhenTrue invert() const { |
| switch (WhenTrue0) { |
| default: |
| if (WhenTrue1 == CondARM32::kNone) |
| return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0)); |
| return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0), |
| InstARM32::getOppositeCondition(WhenTrue1)); |
| case CondARM32::AL: |
| return CondWhenTrue(CondARM32::kNone); |
| case CondARM32::kNone: |
| return CondWhenTrue(CondARM32::AL); |
| } |
| } |
| }; |
| |
| CondWhenTrue lowerFcmpCond(const InstFcmp *Instr); |
| void lowerFcmp(const InstFcmp *Instr) override; |
| CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, |
| Operand *Src0, Operand *Src1); |
| CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
| Operand *Src1); |
| CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
| Operand *Src1); |
| CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
| Operand *Src1); |
| CondWhenTrue lowerIcmpCond(const InstIcmp *Instr); |
| void lowerIcmp(const InstIcmp *Instr) override; |
| /// Emits the basic sequence for lower-linked/store-exclusive loops: |
| /// |
| /// retry: |
| /// ldrex tmp, [Addr] |
| /// StoreValue = Operation(tmp) |
| /// strexCond success, StoreValue, [Addr] |
| /// cmpCond success, #0 |
| /// bne retry |
| /// |
| /// Operation needs to return which value to strex in Addr, it must not change |
| /// the flags if Cond is not AL, and must not emit any instructions that could |
| /// end up writing to memory. Operation also needs to handle fake-defing for |
| /// i64 handling. |
| void |
| lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr, |
| std::function<Variable *(Variable *)> Operation, |
| CondARM32::Cond Cond = CondARM32::AL); |
| void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, |
| Operand *Val); |
| void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, |
| Operand *Val); |
| void lowerBreakpoint(const InstBreakpoint *Instr) override; |
| void lowerIntrinsicCall(const InstIntrinsicCall *Instr) override; |
| void lowerInsertElement(const InstInsertElement *Instr) override; |
| void lowerLoad(const InstLoad *Instr) override; |
| void lowerPhi(const InstPhi *Instr) override; |
| void lowerRet(const InstRet *Instr) override; |
| void lowerSelect(const InstSelect *Instr) override; |
| void lowerShuffleVector(const InstShuffleVector *Instr) override; |
| void lowerStore(const InstStore *Instr) override; |
| void lowerSwitch(const InstSwitch *Instr) override; |
| void lowerUnreachable(const InstUnreachable *Instr) override; |
| void prelowerPhis() override; |
| uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override; |
| void genTargetHelperCallFor(Inst *Instr) override; |
| void doAddressOptLoad() override; |
| void doAddressOptStore() override; |
| void randomlyInsertNop(float Probability, |
| RandomNumberGenerator &RNG) override; |
| |
| OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty); |
| |
| Variable64On32 *makeI64RegPair(); |
| Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT()); |
| static Type stackSlotType(); |
| Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); |
| void alignRegisterPow2(Variable *Reg, uint32_t Align, |
| RegNumT TmpRegNum = RegNumT()); |
| |
| /// Returns a vector in a register with the given constant entries. |
| Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT()); |
| |
| void |
| makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation, |
| const SmallBitVector &ExcludeRegisters, |
| uint64_t Salt) const override; |
| |
| // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap; |
| // .LSKIP: <continuation>. If no check is needed nothing is inserted. |
| void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi); |
| using ExtInstr = void (TargetARM32::*)(Variable *, Variable *, |
| CondARM32::Cond); |
| using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *, |
| CondARM32::Cond); |
| void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1, |
| ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder); |
| |
| void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi); |
| |
| // The following are helpers that insert lowered ARM32 instructions with |
| // minimal syntactic overhead, so that the lowering code can look as close to |
| // assembly as practical. |
| void _add(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred); |
| } |
| void _adds(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| constexpr bool SetFlags = true; |
| Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags); |
| if (SetFlags) { |
| Context.insert<InstFakeUse>(Dest); |
| } |
| } |
| void _adc(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred); |
| } |
| void _and(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32And>(Dest, Src0, Src1, Pred); |
| } |
| void _asr(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred); |
| } |
| void _bic(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred); |
| } |
| void _br(CfgNode *TargetTrue, CfgNode *TargetFalse, |
| CondARM32::Cond Condition) { |
| Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition); |
| } |
| void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); } |
| void _br(CfgNode *Target, CondARM32::Cond Condition) { |
| Context.insert<InstARM32Br>(Target, Condition); |
| } |
| void _br(InstARM32Label *Label, CondARM32::Cond Condition) { |
| Context.insert<InstARM32Br>(Label, Condition); |
| } |
| void _cmn(Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Cmn>(Src0, Src1, Pred); |
| } |
| void _cmp(Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Cmp>(Src0, Src1, Pred); |
| } |
| void _clz(Variable *Dest, Variable *Src0, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Clz>(Dest, Src0, Pred); |
| } |
| void _dmb() { Context.insert<InstARM32Dmb>(); } |
| void _eor(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred); |
| } |
| /// _ldr, for all your memory to Variable data moves. It handles all types |
| /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's |
| /// type (e.g., no immediates for vector loads, and no index registers for fp |
| /// loads.) |
| void _ldr(Variable *Dest, OperandARM32Mem *Addr, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Ldr>(Dest, Addr, Pred); |
| } |
| InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred); |
| if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { |
| Context.insert<InstFakeDef>(Dest64->getLo(), Dest); |
| Context.insert<InstFakeDef>(Dest64->getHi(), Dest); |
| } |
| return Ldrex; |
| } |
| void _lsl(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred); |
| } |
| void _lsls(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| constexpr bool SetFlags = true; |
| Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags); |
| if (SetFlags) { |
| Context.insert<InstFakeUse>(Dest); |
| } |
| } |
| void _lsr(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred); |
| } |
| void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred); |
| } |
| void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred); |
| } |
| /// _mov, for all your Variable to Variable data movement needs. It handles |
| /// all types (integer, floating point, and vectors), as well as moves between |
| /// Core and VFP registers. This is not a panacea: you must obey the (weird, |
| /// confusing, non-uniform) rules for data moves in ARM. |
| void _mov(Variable *Dest, Operand *Src0, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| // _mov used to be unique in the sense that it would create a temporary |
| // automagically if Dest was nullptr. It won't do that anymore, so we keep |
| // an assert around just in case there is some untested code path where Dest |
| // is nullptr. |
| assert(Dest != nullptr); |
| assert(!llvm::isa<OperandARM32Mem>(Src0)); |
| auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred); |
| |
| if (Instr->isMultiDest()) { |
| // If Instr is multi-dest, then Dest must be a Variable64On32. We add a |
| // fake-def for Instr.DestHi here. |
| assert(llvm::isa<Variable64On32>(Dest)); |
| Context.insert<InstFakeDef>(Instr->getDestHi()); |
| } |
| } |
| |
| void _mov_redefined(Variable *Dest, Operand *Src0, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred); |
| Instr->setDestRedefined(); |
| if (Instr->isMultiDest()) { |
| // If Instr is multi-dest, then Dest must be a Variable64On32. We add a |
| // fake-def for Instr.DestHi here. |
| assert(llvm::isa<Variable64On32>(Dest)); |
| Context.insert<InstFakeDef>(Instr->getDestHi()); |
| } |
| } |
| |
| void _nop() { Context.insert<InstARM32Nop>(); } |
| |
| // Generates a vmov instruction to extract the given index from a vector |
| // register. |
| void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred); |
| } |
| |
| // Generates a vmov instruction to insert a value into the given index of a |
| // vector register. |
| void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred); |
| } |
| |
| // -------------------------------------------------------------------------- |
| // Begin bool folding machinery. |
| // |
| // There are three types of boolean lowerings handled by this target: |
| // |
| // 1) Boolean expressions leading to a boolean Variable definition |
| // --------------------------------------------------------------- |
| // |
| // Whenever a i1 Variable is live out (i.e., its live range extends beyond |
| // the defining basic block) we do not fold the operation. We instead |
| // materialize (i.e., compute) the variable normally, so that it can be used |
| // when needed. We also materialize i1 values that are not single use to |
| // avoid code duplication. These expressions are not short circuited. |
| // |
| // 2) Boolean expressions leading to a select |
| // ------------------------------------------ |
| // |
| // These include boolean chains leading to a select instruction, as well as |
| // i1 Sexts. These boolean expressions are lowered to: |
| // |
| // mov T, <false value> |
| // CC <- eval(Boolean Expression) |
| // movCC T, <true value> |
| // |
| // For Sexts, <false value> is 0, and <true value> is -1. |
| // |
| // 3) Boolean expressions leading to a br i1 |
| // ----------------------------------------- |
| // |
| // These are the boolean chains leading to a branch. These chains are |
| // short-circuited, i.e.: |
| // |
| // A = or i1 B, C |
| // br i1 A, label %T, label %F |
| // |
| // becomes |
| // |
| // tst B |
| // jne %T |
| // tst B |
| // jne %T |
| // j %F |
| // |
| // and |
| // |
| // A = and i1 B, C |
| // br i1 A, label %T, label %F |
| // |
| // becomes |
| // |
| // tst B |
| // jeq %F |
| // tst B |
| // jeq %F |
| // j %T |
| // |
| // Arbitrarily long chains are short circuited, e.g |
| // |
| // A = or i1 B, C |
| // D = and i1 A, E |
| // F = and i1 G, H |
| // I = or i1 D, F |
| // br i1 I, label %True, label %False |
| // |
| // becomes |
| // |
| // Label[A]: |
| // tst B, 1 |
| // bne Label[D] |
| // tst C, 1 |
| // beq Label[I] |
| // Label[D]: |
| // tst E, 1 |
| // bne %True |
| // Label[I] |
| // tst G, 1 |
| // beq %False |
| // tst H, 1 |
| // beq %False (bne %True) |
| |
| /// lowerInt1 materializes Boolean to a Variable. |
| SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean); |
| |
| /// lowerInt1ForSelect generates the following instruction sequence: |
| /// |
| /// mov T, FalseValue |
| /// CC <- eval(Boolean) |
| /// movCC T, TrueValue |
| /// mov Dest, T |
| /// |
| /// It is used for lowering select i1, as well as i1 Sext. |
| void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue, |
| Operand *FalseValue); |
| |
| /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or |
| /// an InstARM32Label (but never both) so that, during br i1 lowering, we can |
| /// create auxiliary labels for short circuiting the condition evaluation. |
| class LowerInt1BranchTarget { |
| public: |
| explicit LowerInt1BranchTarget(CfgNode *const Target) |
| : NodeTarget(Target) {} |
| explicit LowerInt1BranchTarget(InstARM32Label *const Target) |
| : LabelTarget(Target) {} |
| |
| /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that |
| /// is the exact copy of this if Label is nullptr; otherwise, the returned |
| /// object will wrap Label instead. |
| LowerInt1BranchTarget |
| createForLabelOrDuplicate(InstARM32Label *Label) const { |
| if (Label != nullptr) |
| return LowerInt1BranchTarget(Label); |
| if (NodeTarget) |
| return LowerInt1BranchTarget(NodeTarget); |
| return LowerInt1BranchTarget(LabelTarget); |
| } |
| |
| CfgNode *const NodeTarget = nullptr; |
| InstARM32Label *const LabelTarget = nullptr; |
| }; |
| |
| /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for |
| /// determining which type arithmetic is allowed to be short circuited. This |
| /// is useful for lowering |
| /// |
| /// t1 = and i1 A, B |
| /// t2 = and i1 t1, C |
| /// br i1 t2, label %False, label %True |
| /// |
| /// to |
| /// |
| /// tst A, 1 |
| /// beq %False |
| /// tst B, 1 |
| /// beq %False |
| /// tst C, 1 |
| /// bne %True |
| /// b %False |
| /// |
| /// Without this information, short circuiting would only allow to short |
| /// circuit a single high level instruction. For example: |
| /// |
| /// t1 = or i1 A, B |
| /// t2 = and i1 t1, C |
| /// br i1 t2, label %False, label %True |
| /// |
| /// cannot be lowered to |
| /// |
| /// tst A, 1 |
| /// bne %True |
| /// tst B, 1 |
| /// bne %True |
| /// tst C, 1 |
| /// beq %True |
| /// b %False |
| /// |
| /// It needs to be lowered to |
| /// |
| /// tst A, 1 |
| /// bne Aux |
| /// tst B, 1 |
| /// beq %False |
| /// Aux: |
| /// tst C, 1 |
| /// bne %True |
| /// b %False |
| /// |
| /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it |
| /// might.) |
| enum LowerInt1AllowShortCircuit { |
| SC_And = 1, |
| SC_Or = 2, |
| SC_All = SC_And | SC_Or, |
| }; |
| |
| /// ShortCircuitCondAndLabel wraps the condition codes that should be used |
| /// after a lowerInt1ForBranch returns to branch to the |
| /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the |
| /// called lowerInt1forBranch created an internal (i.e., short-circuit) label |
| /// used for short circuiting. |
| class ShortCircuitCondAndLabel { |
| public: |
| explicit ShortCircuitCondAndLabel(CondWhenTrue &&C, |
| InstARM32Label *L = nullptr) |
| : Cond(std::move(C)), ShortCircuitTarget(L) {} |
| const CondWhenTrue Cond; |
| InstARM32Label *const ShortCircuitTarget; |
| |
| CondWhenTrue assertNoLabelAndReturnCond() const { |
| assert(ShortCircuitTarget == nullptr); |
| return Cond; |
| } |
| }; |
| |
| /// lowerInt1ForBranch expands Boolean, and returns the condition codes that |
| /// are to be used for branching to the branch's TrueTarget. It may return a |
| /// label that the expansion of Boolean used to short circuit the chain's |
| /// evaluation. |
| ShortCircuitCondAndLabel |
| lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, |
| const LowerInt1BranchTarget &TargetFalse, |
| uint32_t ShortCircuitable); |
| |
| // _br is a convenience wrapper that emits br instructions to Target. |
| void _br(const LowerInt1BranchTarget &BrTarget, |
| CondARM32::Cond Cond = CondARM32::AL) { |
| assert((BrTarget.NodeTarget == nullptr) != |
| (BrTarget.LabelTarget == nullptr)); |
| if (BrTarget.NodeTarget != nullptr) |
| _br(BrTarget.NodeTarget, Cond); |
| else |
| _br(BrTarget.LabelTarget, Cond); |
| } |
| |
| // _br_short_circuit is used when lowering InstArithmetic::And and |
| // InstArithmetic::Or and a short circuit branch is needed. |
| void _br_short_circuit(const LowerInt1BranchTarget &Target, |
| const CondWhenTrue &Cond) { |
| if (Cond.WhenTrue1 != CondARM32::kNone) { |
| _br(Target, Cond.WhenTrue1); |
| } |
| if (Cond.WhenTrue0 != CondARM32::kNone) { |
| _br(Target, Cond.WhenTrue0); |
| } |
| } |
| // End of bool folding machinery |
| // -------------------------------------------------------------------------- |
| |
| /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with |
| /// an upper16 relocation). |
| void _movt(Variable *Dest, Operand *Src0, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Movt>(Dest, Src0, Pred); |
| } |
| void _movw(Variable *Dest, Operand *Src0, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Movw>(Dest, Src0, Pred); |
| } |
| void _mul(Variable *Dest, Variable *Src0, Variable *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred); |
| } |
| void _mvn(Variable *Dest, Operand *Src0, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Mvn>(Dest, Src0, Pred); |
| } |
| void _orr(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred); |
| } |
| void _orrs(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| constexpr bool SetFlags = true; |
| Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags); |
| if (SetFlags) { |
| Context.insert<InstFakeUse>(Dest); |
| } |
| } |
| void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); } |
| void _pop(const VarList &Dests) { |
| Context.insert<InstARM32Pop>(Dests); |
| // Mark dests as modified. |
| for (Variable *Dest : Dests) |
| Context.insert<InstFakeDef>(Dest); |
| } |
| void _rbit(Variable *Dest, Variable *Src0, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Rbit>(Dest, Src0, Pred); |
| } |
| void _rev(Variable *Dest, Variable *Src0, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Rev>(Dest, Src0, Pred); |
| } |
| void _ret(Variable *LR, Variable *Src0 = nullptr) { |
| Context.insert<InstARM32Ret>(LR, Src0); |
| } |
| void _rscs(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| constexpr bool SetFlags = true; |
| Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags); |
| if (SetFlags) { |
| Context.insert<InstFakeUse>(Dest); |
| } |
| } |
| void _rsc(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred); |
| } |
| void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| constexpr bool SetFlags = true; |
| Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags); |
| if (SetFlags) { |
| Context.insert<InstFakeUse>(Dest); |
| } |
| } |
| void _rsb(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred); |
| } |
| void _sbc(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred); |
| } |
| void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| constexpr bool SetFlags = true; |
| Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags); |
| if (SetFlags) { |
| Context.insert<InstFakeUse>(Dest); |
| } |
| } |
| void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred); |
| } |
| /// _str, for all your Variable to memory transfers. Addr has the same |
| /// restrictions that it does in _ldr. |
| void _str(Variable *Value, OperandARM32Mem *Addr, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Str>(Value, Addr, Pred); |
| } |
| InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { |
| Context.insert<InstFakeUse>(Value64->getLo()); |
| Context.insert<InstFakeUse>(Value64->getHi()); |
| } |
| return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred); |
| } |
| void _sub(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred); |
| } |
| void _subs(Variable *Dest, Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| constexpr bool SetFlags = true; |
| Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags); |
| if (SetFlags) { |
| Context.insert<InstFakeUse>(Dest); |
| } |
| } |
| void _sxt(Variable *Dest, Variable *Src0, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Sxt>(Dest, Src0, Pred); |
| } |
| void _tst(Variable *Src0, Operand *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Tst>(Src0, Src1, Pred); |
| } |
| void _trap() { Context.insert<InstARM32Trap>(); } |
| void _udiv(Variable *Dest, Variable *Src0, Variable *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred); |
| } |
| void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0, |
| Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { |
| // umull requires DestLo and DestHi to be assigned to different GPRs. The |
| // following lines create overlapping liveness ranges for both variables. If |
| // either one of them is live, then they are both going to be live, and thus |
| // assigned to different registers; if they are both dead, then DCE will |
| // kick in and delete the following three instructions. |
| Context.insert<InstFakeDef>(DestHi); |
| Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred); |
| Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined(); |
| Context.insert<InstFakeUse>(DestHi); |
| } |
| void _uxt(Variable *Dest, Variable *Src0, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Uxt>(Dest, Src0, Pred); |
| } |
| void _vabs(Variable *Dest, Variable *Src, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Vabs>(Dest, Src, Pred); |
| } |
| void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vadd>(Dest, Src0, Src1); |
| } |
| void _vand(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vand>(Dest, Src0, Src1); |
| } |
| InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) { |
| return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1); |
| } |
| void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vceq>(Dest, Src0, Src1); |
| } |
| InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) { |
| return Context.insert<InstARM32Vcge>(Dest, Src0, Src1); |
| } |
| InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) { |
| return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1); |
| } |
| void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred); |
| } |
| void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vdiv>(Dest, Src0, Src1); |
| } |
| void _vcmp(Variable *Src0, Variable *Src1, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Vcmp>(Src0, Src1, Pred); |
| } |
| void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred); |
| } |
| void _vdup(Variable *Dest, Variable *Src, int Idx) { |
| Context.insert<InstARM32Vdup>(Dest, Src, Idx); |
| } |
| void _veor(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Veor>(Dest, Src0, Src1); |
| } |
| void _vldr1d(Variable *Dest, OperandARM32Mem *Addr, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred); |
| } |
| void _vldr1q(Variable *Dest, OperandARM32Mem *Addr, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred); |
| } |
| void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Vmrs>(Pred); |
| } |
| void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vmla>(Dest, Src0, Src1); |
| } |
| void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vmlap>(Dest, Src0, Src1); |
| } |
| void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vmls>(Dest, Src0, Src1); |
| } |
| void _vmovl(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vmovl>(Dest, Src0, Src1); |
| } |
| void _vmovh(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vmovh>(Dest, Src0, Src1); |
| } |
| void _vmovhl(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vmovhl>(Dest, Src0, Src1); |
| } |
| void _vmovlh(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vmovlh>(Dest, Src0, Src1); |
| } |
| void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vmul>(Dest, Src0, Src1); |
| } |
| void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { |
| Context.insert<InstARM32Vmulh>(Dest, Src0, Src1) |
| ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); |
| } |
| void _vmvn(Variable *Dest, Variable *Src0) { |
| Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL); |
| } |
| void _vneg(Variable *Dest, Variable *Src0) { |
| Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL) |
| ->setSignType(InstARM32::FS_Signed); |
| } |
| void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vorr>(Dest, Src0, Src1); |
| } |
| void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { |
| Context.insert<InstARM32Vqadd>(Dest, Src0, Src1) |
| ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); |
| } |
| void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned, |
| bool Saturating) { |
| Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1) |
| ->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned |
| : InstARM32::FS_Signed) |
| : InstARM32::FS_None); |
| } |
| void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { |
| Context.insert<InstARM32Vqsub>(Dest, Src0, Src1) |
| ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); |
| } |
| InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) { |
| return Context.insert<InstARM32Vshl>(Dest, Src0, Src1); |
| } |
| void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) { |
| Context.insert<InstARM32Vshl>(Dest, Src0, Src1) |
| ->setSignType(InstARM32::FS_Unsigned); |
| } |
| InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0, |
| ConstantInteger32 *Src1) { |
| return Context.insert<InstARM32Vshr>(Dest, Src0, Src1); |
| } |
| void _vsqrt(Variable *Dest, Variable *Src, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Vsqrt>(Dest, Src, Pred); |
| } |
| void _vstr1d(Variable *Value, OperandARM32Mem *Addr, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32); |
| } |
| void _vstr1q(Variable *Value, OperandARM32Mem *Addr, |
| CondARM32::Cond Pred = CondARM32::AL) { |
| Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64); |
| } |
| void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vsub>(Dest, Src0, Src1); |
| } |
| void _vzip(Variable *Dest, Variable *Src0, Variable *Src1) { |
| Context.insert<InstARM32Vzip>(Dest, Src0, Src1); |
| } |
| |
| // Iterates over the CFG and determines the maximum outgoing stack arguments |
| // bytes. This information is later used during addProlog() to pre-allocate |
| // the outargs area. |
| // TODO(jpp): This could live in the Parser, if we provided a Target-specific |
| // method that the Parser could call. |
| void findMaxStackOutArgsSize(); |
| |
| /// Returns true if the given Offset can be represented in a Load/Store Mem |
| /// Operand. |
| bool isLegalMemOffset(Type Ty, int32_t Offset) const; |
| |
| void postLowerLegalization(); |
| |
| /// Manages the GotPtr variable, which is used for Nonsfi sandboxing. |
| /// @{ |
| void createGotPtr(); |
| void insertGotPtrInitPlaceholder(); |
| VariableDeclaration *createGotRelocation(RelocOffset *AddPcReloc); |
| void materializeGotAddr(CfgNode *Node); |
| Variable *GotPtr = nullptr; |
| // TODO(jpp): use CfgLocalAllocator. |
| /// @} |
| |
| /// Manages the Gotoff relocations created during the function lowering. A |
| /// single Gotoff relocation is created for each global variable used by the |
| /// function being lowered. |
| /// @{ |
| // TODO(jpp): if the same global G is used in different functions, then this |
| // method will emit one G(gotoff) relocation per function. |
| GlobalString createGotoffRelocation(const ConstantRelocatable *CR); |
| CfgUnorderedSet<GlobalString> KnownGotoffs; |
| /// @} |
| |
| /// Loads the constant relocatable Name to Register. Then invoke Finish to |
| /// finish the relocatable lowering. Finish **must** use PC in its first |
| /// emitted instruction, or the relocatable in Register will contain the wrong |
| /// value. |
| // |
| // Lowered sequence: |
| // |
| // Movw: |
| // movw Register, #:lower16:Name - (End - Movw) - 8 . |
| // Movt: |
| // movt Register, #:upper16:Name - (End - Movt) - 8 . |
| // PC = fake-def |
| // End: |
| // Finish(PC) |
| // |
| // The -8 in movw/movt above is to account for the PC value that the first |
| // instruction emitted by Finish(PC) will read. |
| void |
| loadNamedConstantRelocatablePIC(GlobalString Name, Variable *Register, |
| std::function<void(Variable *PC)> Finish); |
| |
| /// Sandboxer defines methods for ensuring that "dangerous" operations are |
| /// masked during sandboxed code emission. For regular, non-sandboxed code |
| /// emission, its methods are simple pass-through methods. |
| /// |
| /// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions |
| /// in the constructor/destructor during sandboxed code emission. Therefore, |
| /// it is a bad idea to create an object of this type and "keep it around." |
| /// The recommended usage is: |
| /// |
| /// AutoSandboxing(this).<<operation>>(...); |
| /// |
| /// This usage ensures that no other instructions are inadvertently added to |
| /// the bundle. |
| class Sandboxer { |
| Sandboxer() = delete; |
| Sandboxer(const Sandboxer &) = delete; |
| Sandboxer &operator=(const Sandboxer &) = delete; |
| |
| public: |
| explicit Sandboxer( |
| TargetARM32 *Target, |
| InstBundleLock::Option BundleOption = InstBundleLock::Opt_None); |
| ~Sandboxer(); |
| |
| /// Increments sp: |
| /// |
| /// add sp, sp, AddAmount |
| /// bic sp, sp, 0xc0000000 |
| /// |
| /// (for the rationale, see the ARM 32-bit Sandbox Specification.) |
| void add_sp(Operand *AddAmount); |
| |
| /// Emits code to align sp to the specified alignment: |
| /// |
| /// bic/and sp, sp, Alignment |
| /// bic, sp, sp, 0xc0000000 |
| void align_sp(size_t Alignment); |
| |
| /// Emits a call instruction. If CallTarget is a Variable, it emits |
| /// |
| /// bic CallTarget, CallTarget, 0xc000000f |
| /// bl CallTarget |
| /// |
| /// Otherwise, it emits |
| /// |
| /// bl CallTarget |
| /// |
| /// Note: in sandboxed code calls are always emitted in addresses 12 mod 16. |
| InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget); |
| |
| /// Emits a load: |
| /// |
| /// bic rBase, rBase, 0xc0000000 |
| /// ldr rDest, [rBase, #Offset] |
| /// |
| /// Exception: if rBase is r9 or sp, then the load is emitted as: |
| /// |
| /// ldr rDest, [rBase, #Offset] |
| /// |
| /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are |
| /// always valid. |
| void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred); |
| |
| /// Emits a load exclusive: |
| /// |
| /// bic rBase, rBase, 0xc0000000 |
| /// ldrex rDest, [rBase] |
| /// |
| /// Exception: if rBase is r9 or sp, then the load is emitted as: |
| /// |
| /// ldrex rDest, [rBase] |
| /// |
| /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are |
| /// always valid. |
| void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred); |
| |
| /// Resets sp to Src: |
| /// |
| /// mov sp, Src |
| /// bic sp, sp, 0xc0000000 |
| void reset_sp(Variable *Src); |
| |
| /// Emits code to return from a function: |
| /// |
| /// bic lr, lr, 0xc000000f |
| /// bx lr |
| void ret(Variable *RetAddr, Variable *RetValue); |
| |
| /// Emits a store: |
| /// |
| /// bic rBase, rBase, 0xc0000000 |
| /// str rSrc, [rBase, #Offset] |
| /// |
| /// Exception: if rBase is r9 or sp, then the store is emitted as: |
| /// |
| /// str rDest, [rBase, #Offset] |
| /// |
| /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are |
| /// always valid. |
| void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred); |
| |
| /// Emits a store exclusive: |
| /// |
| /// bic rBase, rBase, 0xc0000000 |
| /// strex rDest, rSrc, [rBase] |
| /// |
| /// Exception: if rBase is r9 or sp, then the store is emitted as: |
| /// |
| /// strex rDest, rSrc, [rBase] |
| /// |
| /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are |
| /// always valid. |
| void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem, |
| CondARM32::Cond Pred); |
| |
| /// Decrements sp: |
| /// |
| /// sub sp, sp, SubAmount |
| /// bic sp, sp, 0xc0000000 |
| void sub_sp(Operand *SubAmount); |
| |
| private: |
| TargetARM32 *const Target; |
| const InstBundleLock::Option BundleOption; |
| std::unique_ptr<AutoBundle> Bundler; |
| |
| void createAutoBundle(); |
| }; |
| |
| class PostLoweringLegalizer { |
| PostLoweringLegalizer() = delete; |
| PostLoweringLegalizer(const PostLoweringLegalizer &) = delete; |
| PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete; |
| |
| public: |
| explicit PostLoweringLegalizer(TargetARM32 *Target) |
| : Target(Target), StackOrFrameReg(Target->getPhysicalRegister( |
| Target->getFrameOrStackReg())) {} |
| |
| void resetTempBaseIfClobberedBy(const Inst *Instr); |
| |
| // Ensures that the TempBase register held by the this legalizer (if any) is |
| // assigned to IP. |
| void assertNoTempOrAssignedToIP() const { |
| assert(TempBaseReg == nullptr || |
| TempBaseReg->getRegNum() == Target->getReservedTmpReg()); |
| } |
| |
| // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is |
| // fixed up. |
| OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem, |
| bool AllowOffsets = true); |
| |
| /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or |
| /// if its Source is a Rematerializable variable (this form is used in lieu |
| /// of lea, which is not available in ARM.) |
| /// |
| /// Moves to memory become store instructions, and moves from memory, loads. |
| void legalizeMov(InstARM32Mov *Mov); |
| |
| private: |
| /// Creates a new Base register centered around [Base, +/- Offset]. |
| Variable *newBaseRegister(Variable *Base, int32_t Offset, |
| RegNumT ScratchRegNum); |
| |
| /// Creates a new, legal OperandARM32Mem for accessing Base + Offset. |
| /// The returned mem operand is a legal operand for accessing memory that is |
| /// of type Ty. |
| /// |
| /// If [Base, #Offset] is encodable, then the method returns a Mem operand |
| /// expressing it. Otherwise, |
| /// |
| /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the |
| /// method will return that. Otherwise, |
| /// |
| /// a new base register ip=Base+Offset is created, and the method returns a |
| /// memory operand expressing [ip, #0]. |
| OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset, |
| bool AllowOffsets = true); |
| TargetARM32 *const Target; |
| Variable *const StackOrFrameReg; |
| Variable *TempBaseReg = nullptr; |
| int32_t TempBaseOffset = 0; |
| }; |
| |
| const bool NeedSandboxing; |
| TargetARM32Features CPUFeatures; |
| bool UsesFramePointer = false; |
| bool NeedsStackAlignment = false; |
| bool MaybeLeafFunc = true; |
| size_t SpillAreaSizeBytes = 0; |
| size_t FixedAllocaSizeBytes = 0; |
| size_t FixedAllocaAlignBytes = 0; |
| bool PrologEmitsFixedAllocas = false; |
| uint32_t MaxOutArgsSizeBytes = 0; |
| // TODO(jpp): std::array instead of array. |
| static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM]; |
| static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; |
| static SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; |
| SmallBitVector RegsUsed; |
| VarList PhysicalRegisters[IceType_NUM]; |
| VarList PreservedGPRs; |
| VarList PreservedSRegs; |
| |
| /// Helper class that understands the Calling Convention and register |
| /// assignments. The first few integer type parameters can use r0-r3, |
| /// regardless of their position relative to the floating-point/vector |
| /// arguments in the argument list. Floating-point and vector arguments |
| /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic, |
| /// see the ARM Architecture Procedure Calling Standards (AAPCS). |
| /// |
| /// Technically, arguments that can start with registers but extend beyond the |
| /// available registers can be split between the registers and the stack. |
| /// However, this is typically for passing GPR structs by value, and PNaCl |
| /// transforms expand this out. |
| /// |
| /// At (public) function entry, the stack must be 8-byte aligned. |
| class CallingConv { |
| CallingConv(const CallingConv &) = delete; |
| CallingConv &operator=(const CallingConv &) = delete; |
| |
| public: |
| CallingConv(); |
| ~CallingConv() = default; |
| |
| /// argInGPR returns true if there is a GPR available for the requested |
| /// type, and false otherwise. If it returns true, Reg is set to the |
| /// appropriate register number. Note that, when Ty == IceType_i64, Reg will |
| /// be an I64 register pair. |
| bool argInGPR(Type Ty, RegNumT *Reg); |
| |
| /// argInVFP is to floating-point/vector types what argInGPR is for integer |
| /// types. |
| bool argInVFP(Type Ty, RegNumT *Reg); |
| |
| private: |
| void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs); |
| SmallBitVector GPRegsUsed; |
| CfgVector<RegNumT> GPRArgs; |
| CfgVector<RegNumT> I64Args; |
| |
| void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs); |
| SmallBitVector VFPRegsUsed; |
| CfgVector<RegNumT> FP32Args; |
| CfgVector<RegNumT> FP64Args; |
| CfgVector<RegNumT> Vec128Args; |
| }; |
| |
| private: |
| ENABLE_MAKE_UNIQUE; |
| |
| OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, |
| Operand *Base); |
| |
| void postambleCtpop64(const InstCall *Instr); |
| void preambleDivRem(const InstCall *Instr); |
| CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> |
| ARM32HelpersPreamble; |
| CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> |
| ARM32HelpersPostamble; |
| |
| class ComputationTracker { |
| public: |
| ComputationTracker() = default; |
| ~ComputationTracker() = default; |
| |
| void forgetProducers() { KnownComputations.clear(); } |
| void recordProducers(CfgNode *Node); |
| |
| const Inst *getProducerOf(const Operand *Opnd) const { |
| auto *Var = llvm::dyn_cast<Variable>(Opnd); |
| if (Var == nullptr) { |
| return nullptr; |
| } |
| |
| auto Iter = KnownComputations.find(Var->getIndex()); |
| if (Iter == KnownComputations.end()) { |
| return nullptr; |
| } |
| |
| return Iter->second.Instr; |
| } |
| |
| void dump(const Cfg *Func) const { |
| if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding)) |
| return; |
| OstreamLocker L(Func->getContext()); |
| Ostream &Str = Func->getContext()->getStrDump(); |
| Str << "foldable producer:\n"; |
| for (const auto &Computation : KnownComputations) { |
| Str << " "; |
| Computation.second.Instr->dump(Func); |
| Str << "\n"; |
| } |
| Str << "\n"; |
| } |
| |
| private: |
| class ComputationEntry { |
| public: |
| ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {} |
| Inst *const Instr; |
| // Boolean folding is disabled for variables whose live range is multi |
| // block. We conservatively initialize IsLiveOut to true, and set it to |
| // false once we find the end of the live range for the variable defined |
| // by this instruction. If liveness analysis is not performed (e.g., in |
| // Om1 mode) IsLiveOut will never be set to false, and folding will be |
| // disabled. |
| bool IsLiveOut = true; |
| int32_t NumUses = 0; |
| Type ComputationType; |
| }; |
| |
| // ComputationMap maps a Variable number to a payload identifying which |
| // instruction defined it. |
| using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>; |
| ComputationMap KnownComputations; |
| }; |
| |
| ComputationTracker Computations; |
| |
| // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked |
| // without specifying a physical register. This is needed for creating unbound |
| // temporaries during Ice -> ARM lowering, but before register allocation. |
| // This a safe-guard that no unbound temporaries are created during the |
| // legalization post-passes. |
| bool AllowTemporaryWithNoReg = true; |
| // ForbidTemporaryWithoutReg is a RAII class that manages |
| // AllowTemporaryWithNoReg. |
| class ForbidTemporaryWithoutReg { |
| ForbidTemporaryWithoutReg() = delete; |
| ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete; |
| ForbidTemporaryWithoutReg & |
| operator=(const ForbidTemporaryWithoutReg &) = delete; |
| |
| public: |
| explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) { |
| Target->AllowTemporaryWithNoReg = false; |
| } |
| ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; } |
| |
| private: |
| TargetARM32 *const Target; |
| }; |
| }; |
| |
| class TargetDataARM32 final : public TargetDataLowering { |
| TargetDataARM32() = delete; |
| TargetDataARM32(const TargetDataARM32 &) = delete; |
| TargetDataARM32 &operator=(const TargetDataARM32 &) = delete; |
| |
| public: |
| static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) { |
| return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx)); |
| } |
| |
| void lowerGlobals(const VariableDeclarationList &Vars, |
| const std::string &SectionSuffix) override; |
| void lowerConstants() override; |
| void lowerJumpTables() override; |
| |
| protected: |
| explicit TargetDataARM32(GlobalContext *Ctx); |
| |
| private: |
| ~TargetDataARM32() override = default; |
| }; |
| |
| class TargetHeaderARM32 final : public TargetHeaderLowering { |
| TargetHeaderARM32() = delete; |
| TargetHeaderARM32(const TargetHeaderARM32 &) = delete; |
| TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete; |
| |
| public: |
| static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) { |
| return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx)); |
| } |
| |
| void lower() override; |
| |
| protected: |
| explicit TargetHeaderARM32(GlobalContext *Ctx); |
| |
| private: |
| ~TargetHeaderARM32() = default; |
| |
| TargetARM32Features CPUFeatures; |
| }; |
| |
| } // end of namespace ARM32 |
| } // end of namespace Ice |
| |
| #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H |