Subzero. ARM32. Address mode formation. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=kschimpf@google.com, stichnot@chromium.org Review URL: https://codereview.chromium.org/1422753010 .
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp index 35dbef6..7312d84 100644 --- a/src/IceInstARM32.cpp +++ b/src/IceInstARM32.cpp
@@ -33,7 +33,7 @@ int8_t SExtAddrOffsetBits; int8_t ZExtAddrOffsetBits; } TypeARM32Attributes[] = { -#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr) \ +#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \ { int_width, vec_width, sbits, ubits } \ , ICETYPEARM32_TABLE
diff --git a/src/IceInstARM32.def b/src/IceInstARM32.def index 291a64a..b3d55b2 100644 --- a/src/IceInstARM32.def +++ b/src/IceInstARM32.def
@@ -289,23 +289,23 @@ // extending load/stores). #define ICETYPEARM32_TABLE \ /* tag, element type, int_width, vec_width, addr bits sext, zext, \ - reg-reg addr allowed */ \ - X(IceType_void, IceType_void, "" , "" , 0 , 0 , 0) \ - X(IceType_i1, IceType_void, "b", "" , 8 , 12, 1) \ - X(IceType_i8, IceType_void, "b", "" , 8 , 12, 1) \ - X(IceType_i16, IceType_void, "h", "" , 8 , 8 , 1) \ - X(IceType_i32, IceType_void, "" , "" , 12, 12, 1) \ - X(IceType_i64, IceType_void, "d", "" , 0 , 0 , 0) \ - X(IceType_f32, IceType_void, "" , ".f32", 8, 8 , 0) \ - X(IceType_f64, IceType_void, "" , ".f64", 8, 8 , 0) \ - X(IceType_v4i1, IceType_i32 , "" , ".i32", 0 , 0 , 1) \ - X(IceType_v8i1, IceType_i16 , "" , ".i16", 0 , 0 , 1) \ - X(IceType_v16i1, IceType_i8 , "" , ".i8" , 0 , 0 , 1) \ - X(IceType_v16i8, IceType_i8 , "" , ".i8" , 0 , 0 , 1) \ - X(IceType_v8i16, IceType_i16 , "" , ".i16", 0 , 0 , 1) \ - X(IceType_v4i32, IceType_i32 , "" , ".i32", 0 , 0 , 1) \ - X(IceType_v4f32, IceType_f32 , "" , ".f32", 0 , 0 , 1) -//#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr) + reg-reg addr allowed, shift allowed, */ \ + X(IceType_void, IceType_void, "" , "" , 0 , 0 , 0, 0) \ + X(IceType_i1, IceType_void, "b", "" , 8 , 12, 1, 1) \ + X(IceType_i8, IceType_void, "b", "" , 8 , 12, 1, 1) \ + X(IceType_i16, IceType_void, "h", "" , 8 , 8 , 1, 0) \ + X(IceType_i32, IceType_void, "" , "" , 12, 12, 1, 1) \ + X(IceType_i64, IceType_void, "d", "" , 12, 12, 1, 1) \ + X(IceType_f32, IceType_void, "" , ".f32", 8, 8 , 0, 0) \ + X(IceType_f64, IceType_void, "" , ".f64", 8, 8 , 0, 0) \ + X(IceType_v4i1, IceType_i32 , "" , ".i32", 0 , 0 , 1, 0) \ + X(IceType_v8i1, IceType_i16 , "" , ".i16", 0 , 0 , 1, 0) \ + X(IceType_v16i1, IceType_i8 , "" , ".i8" , 0 , 0 , 1, 0) \ + X(IceType_v16i8, IceType_i8 , "" , ".i8" , 0 , 0 , 1, 0) \ + X(IceType_v8i16, IceType_i16 , "" , ".i16", 0 , 0 , 1, 0) \ + X(IceType_v4i32, IceType_i32 , "" , ".i32", 0 , 0 , 1, 0) \ + X(IceType_v4f32, IceType_f32 , "" , ".f32", 0 , 0 , 1, 0) +//#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) // Shifter types for Data-processing operands as defined in section A5.1.2. #define ICEINSTARM32SHIFT_TABLE \
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp index d2aa74e..17bd1b5 100644 --- a/src/IceTargetLoweringARM32.cpp +++ b/src/IceTargetLoweringARM32.cpp
@@ -465,7 +465,7 @@ Offset += getStackAdjustment(); } const Type VarTy = Var->getType(); - if (!isLegalVariableStackOffset(VarTy, Offset)) { + if (!isLegalMemOffset(VarTy, Offset)) { llvm::report_fatal_error("Illegal stack offset"); } Str << "[" << getRegName(BaseRegNum, VarTy); @@ -837,7 +837,7 @@ this->HasComputedFrame = true; if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { - OstreamLocker L(Func->getContext()); + OstreamLocker _(Func->getContext()); Ostream &Str = Func->getContext()->getStrDump(); Str << "Stack layout:\n"; @@ -947,15 +947,15 @@ RI->setDeleted(); } -bool TargetARM32::isLegalVariableStackOffset(Type Ty, int32_t Offset) const { - constexpr bool SignExt = false; - return OperandARM32Mem::canHoldOffset(Ty, SignExt, Offset); +bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const { + constexpr bool ZeroExt = false; + return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset); } -StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, - int32_t StackAdjust, - Variable *OrigBaseReg) { - int32_t Offset = Var->getStackOffset() + StackAdjust; +Variable *TargetARM32::newBaseRegister(int32_t OriginalOffset, + int32_t StackAdjust, + Variable *OrigBaseReg) { + int32_t Offset = OriginalOffset + StackAdjust; // Legalize will likely need a movw/movt combination, but if the top bits are // all 0 from negating the offset and subtracting, we could use that instead. bool ShouldSub = (-Offset & 0xFFFF0000) == 0; @@ -968,12 +968,85 @@ _sub(ScratchReg, OrigBaseReg, OffsetVal); else _add(ScratchReg, OrigBaseReg, OffsetVal); - StackVariable *NewVar = Func->makeVariable<StackVariable>(stackSlotType()); - NewVar->setMustNotHaveReg(); - NewVar->setBaseRegNum(ScratchReg->getRegNum()); - constexpr int32_t NewOffset = 0; - NewVar->setStackOffset(NewOffset); - return NewVar; + return ScratchReg; +} + +StackVariable *TargetARM32::legalizeStackSlot(Type Ty, int32_t Offset, + int32_t StackAdjust, + Variable *OrigBaseReg, + Variable **NewBaseReg, + int32_t *NewBaseOffset) { + if (*NewBaseReg == nullptr) { + *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); + *NewBaseOffset = Offset + StackAdjust; + } + + int32_t OffsetDiff = Offset + StackAdjust - *NewBaseOffset; + if (!isLegalMemOffset(Ty, OffsetDiff)) { + *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); + *NewBaseOffset = Offset + StackAdjust; + OffsetDiff = 0; + } + + StackVariable *NewDest = Func->makeVariable<StackVariable>(Ty); + NewDest->setMustNotHaveReg(); + NewDest->setBaseRegNum((*NewBaseReg)->getRegNum()); + NewDest->setStackOffset(OffsetDiff); + return NewDest; +} + +void TargetARM32::legalizeMovStackAddrImm(InstARM32Mov *MovInstr, + int32_t StackAdjust, + Variable *OrigBaseReg, + Variable **NewBaseReg, + int32_t *NewBaseOffset) { + Variable *Dest = MovInstr->getDest(); + assert(Dest != nullptr); + Type DestTy = Dest->getType(); + assert(DestTy != IceType_i64); + + Operand *Src = MovInstr->getSrc(0); + Type SrcTy = Src->getType(); + assert(SrcTy != IceType_i64); + + if (MovInstr->isMultiDest() || MovInstr->isMultiSource()) + return; + + bool Legalized = false; + if (!Dest->hasReg()) { + assert(llvm::cast<Variable>(Src)->hasReg()); + const int32_t Offset = Dest->getStackOffset(); + if (!isLegalMemOffset(DestTy, Offset + StackAdjust)) { + Legalized = true; + Dest = legalizeStackSlot(DestTy, Offset, StackAdjust, OrigBaseReg, + NewBaseReg, NewBaseOffset); + } + } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { + if (!Var->hasReg()) { + const int32_t Offset = Var->getStackOffset(); + if (!isLegalMemOffset(SrcTy, Offset + StackAdjust)) { + Legalized = true; + Src = legalizeStackSlot(SrcTy, Offset, StackAdjust, OrigBaseReg, + NewBaseReg, NewBaseOffset); + } + } + } else if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) { + if (ConstantInteger32 *OffsetOp = Mem->getOffset()) { + const int32_t Offset = OffsetOp->getValue(); + if (!isLegalMemOffset(SrcTy, Offset + StackAdjust)) { + assert(Mem->getBase()->hasReg()); + assert(Mem->getBase()->getRegNum() == (int32_t)getFrameOrStackReg()); + Legalized = true; + Src = legalizeStackSlot(SrcTy, Offset, StackAdjust, OrigBaseReg, + NewBaseReg, NewBaseOffset); + } + } + } + + if (Legalized) { + _mov(Dest, Src); + MovInstr->setDeleted(); + } } void TargetARM32::legalizeStackSlots() { @@ -991,7 +1064,7 @@ // Early exit, if SpillAreaSizeBytes is really small. // TODO(jpp): this is not safe -- loads and stores of q registers can't have // offsets. - if (isLegalVariableStackOffset(IceType_v4i32, SpillAreaSizeBytes)) + if (isLegalMemOffset(IceType_v4i32, SpillAreaSizeBytes)) return; Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); int32_t StackAdjust = 0; @@ -1006,12 +1079,13 @@ // don't depend on this legalization. for (CfgNode *Node : Func->getNodes()) { Context.init(Node); - StackVariable *NewBaseReg = nullptr; + Variable *NewBaseReg = nullptr; int32_t NewBaseOffset = 0; while (!Context.atEnd()) { PostIncrLoweringContext PostIncrement(Context); Inst *CurInstr = Context.getCur(); Variable *Dest = CurInstr->getDest(); + // Check if the previous NewBaseReg is clobbered, and reset if needed. if ((Dest && NewBaseReg && Dest->hasReg() && Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || @@ -1019,6 +1093,7 @@ NewBaseReg = nullptr; NewBaseOffset = 0; } + // The stack adjustment only matters if we are using SP instead of FP. if (!hasFramePointer()) { if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { @@ -1033,80 +1108,11 @@ } } - // For now, only Mov instructions can have stack variables. We need to - // know the type of instruction because we currently create a fresh one - // to replace Dest/Source, rather than mutate in place. - bool MayNeedOffsetRewrite = false; + // The Lowering ensures that ldr and str always have legal Mem operands. + // The only other instruction that may access memory is mov. if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) { - MayNeedOffsetRewrite = - !MovInstr->isMultiDest() && !MovInstr->isMultiSource(); - } - - if (!MayNeedOffsetRewrite) { - continue; - } - - assert(Dest != nullptr); - Type DestTy = Dest->getType(); - assert(DestTy != IceType_i64); - if (!Dest->hasReg()) { - int32_t Offset = Dest->getStackOffset(); - Offset += StackAdjust; - if (!isLegalVariableStackOffset(DestTy, Offset)) { - if (NewBaseReg) { - int32_t OffsetDiff = Offset - NewBaseOffset; - if (isLegalVariableStackOffset(DestTy, OffsetDiff)) { - StackVariable *NewDest = - Func->makeVariable<StackVariable>(stackSlotType()); - NewDest->setMustNotHaveReg(); - NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum()); - NewDest->setStackOffset(OffsetDiff); - Variable *NewDestVar = NewDest; - _mov(NewDestVar, CurInstr->getSrc(0)); - CurInstr->setDeleted(); - continue; - } - } - StackVariable *LegalDest = - legalizeVariableSlot(Dest, StackAdjust, OrigBaseReg); - assert(LegalDest != Dest); - Variable *LegalDestVar = LegalDest; - _mov(LegalDestVar, CurInstr->getSrc(0)); - CurInstr->setDeleted(); - NewBaseReg = LegalDest; - NewBaseOffset = Offset; - continue; - } - } - assert(CurInstr->getSrcSize() == 1); - Variable *Var = llvm::dyn_cast<Variable>(CurInstr->getSrc(0)); - if (Var && !Var->hasReg()) { - Type VarTy = Var->getType(); - int32_t Offset = Var->getStackOffset(); - Offset += StackAdjust; - if (!isLegalVariableStackOffset(VarTy, Offset)) { - if (NewBaseReg) { - int32_t OffsetDiff = Offset - NewBaseOffset; - if (isLegalVariableStackOffset(VarTy, OffsetDiff)) { - StackVariable *NewVar = - Func->makeVariable<StackVariable>(stackSlotType()); - NewVar->setMustNotHaveReg(); - NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum()); - NewVar->setStackOffset(OffsetDiff); - _mov(Dest, NewVar); - CurInstr->setDeleted(); - continue; - } - } - StackVariable *LegalVar = - legalizeVariableSlot(Var, StackAdjust, OrigBaseReg); - assert(LegalVar != Var); - _mov(Dest, LegalVar); - CurInstr->setDeleted(); - NewBaseReg = LegalVar; - NewBaseOffset = Offset; - continue; - } + legalizeMovStackAddrImm(MovInstr, StackAdjust, OrigBaseReg, &NewBaseReg, + &NewBaseOffset); } } } @@ -1171,8 +1177,8 @@ ConstantInteger32 *Offset = Mem->getOffset(); assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); int32_t NextOffsetVal = Offset->getValue() + 4; - const bool SignExt = false; - if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) { + constexpr bool ZeroExt = false; + if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) { // We have to make a temp variable and add 4 to either Base or Offset. // If we add 4 to Offset, this will convert a non-RegReg addressing // mode into a RegReg addressing mode. Since NaCl sandboxing disallows @@ -1819,7 +1825,7 @@ CondARM32::Cond BrCondTrue1 = CondARM32::kNone; CondARM32::Cond BrCondFalse = CondARM32::kNone; if (!_mov_i1_to_flags(Cond, &BrCondTrue0, &BrCondTrue1, &BrCondFalse)) { - // "Cond" was not fold. + // "Cond" was not folded. Type Ty = Cond->getType(); Variable *Src0R = legalizeToReg(Cond); assert(Ty == IceType_i1); @@ -3375,7 +3381,461 @@ lowerAssign(Assign); } -void TargetARM32::doAddressOptLoad() {} +namespace { +void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset, + const Variable *OffsetReg, int16_t OffsetRegShAmt, + const Inst *Reason) { + if (!BuildDefs::dump()) + return; + if (!Func->isVerbose(IceV_AddrOpt)) + return; + OstreamLocker _(Func->getContext()); + Ostream &Str = Func->getContext()->getStrDump(); + Str << "Instruction: "; + Reason->dumpDecorated(Func); + Str << " results in Base="; + if (Base) + Base->dump(Func); + else + Str << "<null>"; + Str << ", OffsetReg="; + if (OffsetReg) + OffsetReg->dump(Func); + else + Str << "<null>"; + Str << ", Shift=" << OffsetRegShAmt << ", Offset=" << Offset << "\n"; +} + +bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var, + int32_t *Offset, const Inst **Reason) { + // Var originates from Var=SrcVar ==> set Var:=SrcVar + if (*Var == nullptr) + return false; + const Inst *VarAssign = VMetadata->getSingleDefinition(*Var); + if (!VarAssign) + return false; + assert(!VMetadata->isMultiDef(*Var)); + if (!llvm::isa<InstAssign>(VarAssign)) + return false; + + Operand *SrcOp = VarAssign->getSrc(0); + if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) { + if (!VMetadata->isMultiDef(SrcVar) || + // TODO: ensure SrcVar stays single-BB + false) { + *Var = SrcVar; + } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) { + int32_t MoreOffset = Const->getValue(); + int32_t NewOffset = MoreOffset + *Offset; + if (Utils::WouldOverflowAdd(*Offset, MoreOffset)) + return false; + *Var = nullptr; + *Offset += NewOffset; + } + + *Reason = VarAssign; + return true; + } + + return false; +} + +bool isAddOrSub(const Inst *Inst, InstArithmetic::OpKind *Kind) { + if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Inst)) { + switch (Arith->getOp()) { + default: + return false; + case InstArithmetic::Add: + case InstArithmetic::Sub: + *Kind = Arith->getOp(); + return true; + } + } + return false; +} + +bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable **Base, + Variable **OffsetReg, int32_t OffsetRegShamt, + const Inst **Reason) { + // OffsetReg==nullptr && Base is Base=Var1+Var2 ==> + // set Base=Var1, OffsetReg=Var2, Shift=0 + if (*Base == nullptr) + return false; + if (*OffsetReg != nullptr) + return false; + (void)OffsetRegShamt; + assert(OffsetRegShamt == 0); + const Inst *BaseInst = VMetadata->getSingleDefinition(*Base); + if (BaseInst == nullptr) + return false; + assert(!VMetadata->isMultiDef(*Base)); + if (BaseInst->getSrcSize() < 2) + return false; + auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0)); + if (!Var1) + return false; + if (VMetadata->isMultiDef(Var1)) + return false; + auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1)); + if (!Var2) + return false; + if (VMetadata->isMultiDef(Var2)) + return false; + InstArithmetic::OpKind _; + if (!isAddOrSub(BaseInst, &_) || + // TODO: ensure Var1 and Var2 stay single-BB + false) + return false; + *Base = Var1; + *OffsetReg = Var2; + // OffsetRegShamt is already 0. + *Reason = BaseInst; + return true; +} + +bool matchShiftedOffsetReg(const VariablesMetadata *VMetadata, + Variable **OffsetReg, OperandARM32::ShiftKind *Kind, + int32_t *OffsetRegShamt, const Inst **Reason) { + // OffsetReg is OffsetReg=Var*Const && log2(Const)+Shift<=32 ==> + // OffsetReg=Var, Shift+=log2(Const) + // OffsetReg is OffsetReg=Var<<Const && Const+Shift<=32 ==> + // OffsetReg=Var, Shift+=Const + // OffsetReg is OffsetReg=Var>>Const && Const-Shift>=-32 ==> + // OffsetReg=Var, Shift-=Const + OperandARM32::ShiftKind NewShiftKind = OperandARM32::kNoShift; + if (*OffsetReg == nullptr) + return false; + auto *IndexInst = VMetadata->getSingleDefinition(*OffsetReg); + if (IndexInst == nullptr) + return false; + assert(!VMetadata->isMultiDef(*OffsetReg)); + if (IndexInst->getSrcSize() < 2) + return false; + auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst); + if (ArithInst == nullptr) + return false; + auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0)); + if (Var == nullptr) + return false; + auto *Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1)); + if (Const == nullptr) { + assert(!llvm::isa<ConstantInteger32>(ArithInst->getSrc(0))); + return false; + } + if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32) + return false; + + uint32_t NewShamt = -1; + switch (ArithInst->getOp()) { + default: + return false; + case InstArithmetic::Shl: { + NewShiftKind = OperandARM32::LSL; + NewShamt = Const->getValue(); + if (NewShamt > 31) + return false; + } break; + case InstArithmetic::Lshr: { + NewShiftKind = OperandARM32::LSR; + NewShamt = Const->getValue(); + if (NewShamt > 31) + return false; + } break; + case InstArithmetic::Ashr: { + NewShiftKind = OperandARM32::ASR; + NewShamt = Const->getValue(); + if (NewShamt > 31) + return false; + } break; + case InstArithmetic::Udiv: + case InstArithmetic::Mul: { + const uint32_t UnsignedConst = Const->getValue(); + NewShamt = llvm::findFirstSet(UnsignedConst); + if (NewShamt != llvm::findLastSet(UnsignedConst)) { + // First bit set is not the same as the last bit set, so Const is not + // a power of 2. + return false; + } + NewShiftKind = ArithInst->getOp() == InstArithmetic::Udiv + ? OperandARM32::LSR + : OperandARM32::LSL; + } break; + } + // Allowed "transitions": + // kNoShift -> * iff NewShamt < 31 + // LSL -> LSL iff NewShamt + OffsetRegShamt < 31 + // LSR -> LSR iff NewShamt + OffsetRegShamt < 31 + // ASR -> ASR iff NewShamt + OffsetRegShamt < 31 + if (*Kind != OperandARM32::kNoShift && *Kind != NewShiftKind) { + return false; + } + const int32_t NewOffsetRegShamt = *OffsetRegShamt + NewShamt; + if (NewOffsetRegShamt > 31) + return false; + *OffsetReg = Var; + *OffsetRegShamt = NewOffsetRegShamt; + *Kind = NewShiftKind; + *Reason = IndexInst; + return true; +} + +bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base, + int32_t *Offset, const Inst **Reason) { + // Base is Base=Var+Const || Base is Base=Const+Var ==> + // set Base=Var, Offset+=Const + // Base is Base=Var-Const ==> + // set Base=Var, Offset-=Const + if (*Base == nullptr) + return false; + const Inst *BaseInst = VMetadata->getSingleDefinition(*Base); + if (BaseInst == nullptr) { + return false; + } + assert(!VMetadata->isMultiDef(*Base)); + + auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst); + if (ArithInst == nullptr) + return false; + InstArithmetic::OpKind Kind; + if (!isAddOrSub(ArithInst, &Kind)) + return false; + bool IsAdd = Kind == InstArithmetic::Add; + Operand *Src0 = ArithInst->getSrc(0); + Operand *Src1 = ArithInst->getSrc(1); + auto *Var0 = llvm::dyn_cast<Variable>(Src0); + auto *Var1 = llvm::dyn_cast<Variable>(Src1); + auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0); + auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1); + Variable *NewBase = nullptr; + int32_t NewOffset = *Offset; + + if (Var0 == nullptr && Const0 == nullptr) { + assert(llvm::isa<ConstantRelocatable>(Src0)); + return false; + } + + if (Var1 == nullptr && Const1 == nullptr) { + assert(llvm::isa<ConstantRelocatable>(Src1)); + return false; + } + + if (Var0 && Var1) + // TODO(jpp): merge base/index splitting into here. + return false; + if (!IsAdd && Var1) + return false; + if (Var0) + NewBase = Var0; + else if (Var1) + NewBase = Var1; + // Compute the updated constant offset. + if (Const0) { + int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue(); + if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) + return false; + NewOffset += MoreOffset; + } + if (Const1) { + int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue(); + if (Utils::WouldOverflowAdd(NewOffset, MoreOffset)) + return false; + NewOffset += MoreOffset; + } + + // Update the computed address parameters once we are sure optimization + // is valid. + *Base = NewBase; + *Offset = NewOffset; + *Reason = BaseInst; + return true; +} +} // end of anonymous namespace + +// ARM32 address modes: +// ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12], +// [reg +/- reg << shamt5] +// ld/st f[32|64] : [reg], [reg +/- imm8] , [pc +/- imm8] +// ld/st vectors : [reg] +// +// For now, we don't handle address modes with Relocatables. +namespace { +// MemTraits contains per-type valid address mode information. +#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \ + static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag); +ICETYPEARM32_TABLE +#undef X + +static const struct { + int32_t ValidImmMask; + bool CanHaveImm; + bool CanHaveIndex; + bool CanHaveShiftedIndex; +} MemTraits[] = { +#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \ + { (1 << ubits) - 1, (ubits) > 0, rraddr, shaddr, } \ + , + ICETYPEARM32_TABLE +#undef X +}; +static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits); +} // end of anonymous namespace + +OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func, + const Inst *LdSt, + Operand *Base) { + assert(Base != nullptr); + int32_t OffsetImm = 0; + Variable *OffsetReg = nullptr; + int32_t OffsetRegShamt = 0; + OperandARM32::ShiftKind ShiftKind = OperandARM32::kNoShift; + + Func->resetCurrentNode(); + if (Func->isVerbose(IceV_AddrOpt)) { + OstreamLocker _(Func->getContext()); + Ostream &Str = Func->getContext()->getStrDump(); + Str << "\nAddress mode formation:\t"; + LdSt->dumpDecorated(Func); + } + + if (isVectorType(Ty)) + // vector loads and stores do not allow offsets, and only support the + // "[reg]" addressing mode (the other supported modes are write back.) + return nullptr; + + auto *BaseVar = llvm::dyn_cast<Variable>(Base); + if (BaseVar == nullptr) + return nullptr; + + (void)MemTraitsSize; + assert(Ty < MemTraitsSize); + auto *TypeTraits = &MemTraits[Ty]; + const bool CanHaveIndex = TypeTraits->CanHaveIndex; + const bool CanHaveShiftedIndex = TypeTraits->CanHaveShiftedIndex; + const bool CanHaveImm = TypeTraits->CanHaveImm; + const int32_t ValidImmMask = TypeTraits->ValidImmMask; + (void)ValidImmMask; + assert(!CanHaveImm || ValidImmMask >= 0); + + const VariablesMetadata *VMetadata = Func->getVMetadata(); + const Inst *Reason = nullptr; + + do { + if (Reason != nullptr) { + dumpAddressOpt(Func, BaseVar, OffsetImm, OffsetReg, OffsetRegShamt, + Reason); + Reason = nullptr; + } + + if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) { + continue; + } + + if (CanHaveIndex && + matchAssign(VMetadata, &OffsetReg, &OffsetImm, &Reason)) { + continue; + } + + if (CanHaveIndex && matchCombinedBaseIndex(VMetadata, &BaseVar, &OffsetReg, + OffsetRegShamt, &Reason)) { + continue; + } + + if (CanHaveShiftedIndex) { + if (matchShiftedOffsetReg(VMetadata, &OffsetReg, &ShiftKind, + &OffsetRegShamt, &Reason)) { + continue; + } + + if ((OffsetRegShamt == 0) && + matchShiftedOffsetReg(VMetadata, &BaseVar, &ShiftKind, + &OffsetRegShamt, &Reason)) { + std::swap(BaseVar, OffsetReg); + continue; + } + } + + if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) { + continue; + } + } while (Reason); + + if (BaseVar == nullptr) { + // [OffsetReg{, LSL Shamt}{, #OffsetImm}] is not legal in ARM, so we have to + // legalize the addressing mode to [BaseReg, OffsetReg{, LSL Shamt}]. + // Instead of a zeroed BaseReg, we initialize it with OffsetImm: + // + // [OffsetReg{, LSL Shamt}{, #OffsetImm}] -> + // mov BaseReg, #OffsetImm + // use of [BaseReg, OffsetReg{, LSL Shamt}] + // + const Type PointerType = getPointerType(); + BaseVar = makeReg(PointerType); + Context.insert( + InstAssign::create(Func, BaseVar, Ctx->getConstantInt32(OffsetImm))); + OffsetImm = 0; + } else if (OffsetImm != 0) { + // ARM Ldr/Str instructions have limited range immediates. The formation + // loop above materialized an Immediate carelessly, so we ensure the + // generated offset is sane. + const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm; + const InstArithmetic::OpKind Op = + OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub; + + if (!CanHaveImm || !isLegalMemOffset(Ty, OffsetImm) || + OffsetReg != nullptr) { + if (OffsetReg == nullptr) { + // We formed a [Base, #const] addressing mode which is not encodable in + // ARM. There is little point in forming an address mode now if we don't + // have an offset. Effectively, we would end up with something like + // + // [Base, #const] -> add T, Base, #const + // use of [T] + // + // Which is exactly what we already have. So we just bite the bullet + // here and don't form any address mode. + return nullptr; + } + // We formed [Base, Offset {, LSL Amnt}, #const]. Oops. Legalize it to + // + // [Base, Offset, {LSL amount}, #const] -> + // add T, Base, #const + // use of [T, Offset {, LSL amount}] + const Type PointerType = getPointerType(); + Variable *T = makeReg(PointerType); + Context.insert(InstArithmetic::create( + Func, Op, T, BaseVar, Ctx->getConstantInt32(PositiveOffset))); + BaseVar = T; + OffsetImm = 0; + } + } + + assert(BaseVar != nullptr); + assert(OffsetImm == 0 || OffsetReg == nullptr); + assert(OffsetReg == nullptr || CanHaveIndex); + assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm + : (ValidImmMask & OffsetImm) == OffsetImm); + + if (OffsetReg != nullptr) { + return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetReg, ShiftKind, + OffsetRegShamt); + } + + return OperandARM32Mem::create( + Func, Ty, BaseVar, + llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm))); +} + +void TargetARM32::doAddressOptLoad() { + Inst *Instr = Context.getCur(); + assert(llvm::isa<InstLoad>(Instr)); + Variable *Dest = Instr->getDest(); + Operand *Addr = Instr->getSrc(0); + if (OperandARM32Mem *Mem = + formAddressingMode(Dest->getType(), Func, Instr, Addr)) { + Instr->setDeleted(); + Context.insert(InstLoad::create(Func, Dest, Mem)); + } +} void TargetARM32::randomlyInsertNop(float Probability, RandomNumberGenerator &RNG) { @@ -3576,7 +4036,17 @@ } } -void TargetARM32::doAddressOptStore() {} +void TargetARM32::doAddressOptStore() { + Inst *Instr = Context.getCur(); + assert(llvm::isa<InstStore>(Instr)); + Operand *Src = Instr->getSrc(0); + Operand *Addr = Instr->getSrc(1); + if (OperandARM32Mem *Mem = + formAddressingMode(Src->getType(), Func, Instr, Addr)) { + Instr->setDeleted(); + Context.insert(InstStore::create(Func, Src, Mem)); + } +} void TargetARM32::lowerSwitch(const InstSwitch *Inst) { // This implements the most naive possible lowering. @@ -3673,16 +4143,6 @@ // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we // can always copy to a register. if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { - static const struct { - bool CanHaveOffset; - bool CanHaveIndex; - } MemTraits[] = { -#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr) \ - { (ubits) > 0, rraddr } \ - , - ICETYPEARM32_TABLE -#undef X - }; // Before doing anything with a Mem operand, we need to ensure that the // Base and Index components are in physical registers. Variable *Base = Mem->getBase(); @@ -3691,26 +4151,26 @@ assert(Index == nullptr || Offset == nullptr); Variable *RegBase = nullptr; Variable *RegIndex = nullptr; - if (Base) { - RegBase = legalizeToReg(Base); - } + assert(Base); + RegBase = legalizeToReg(Base); + bool InvalidImm = false; + assert(Ty < MemTraitsSize); if (Index) { + assert(Offset == nullptr); + assert(MemTraits[Ty].CanHaveIndex); RegIndex = legalizeToReg(Index); - if (!MemTraits[Ty].CanHaveIndex) { - Variable *T = makeReg(IceType_i32, getReservedTmpReg()); - _add(T, RegBase, RegIndex); - RegBase = T; - RegIndex = nullptr; - } } if (Offset && Offset->getValue() != 0) { - static constexpr bool SignExt = false; - if (!MemTraits[Ty].CanHaveOffset || - !OperandARM32Mem::canHoldOffset(Ty, SignExt, Offset->getValue())) { - Variable *T = legalizeToReg(Offset, getReservedTmpReg()); - _add(T, T, RegBase); - RegBase = T; - Offset = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0)); + assert(Index == nullptr); + static constexpr bool ZeroExt = false; + assert(MemTraits[Ty].CanHaveImm); + if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) { + assert(RegBase->hasReg()); + assert(RegBase->getRegNum() == (int32_t)getFrameOrStackReg()); + // We are a bit more lenient with invalid immediate when accessing the + // stack here, and then rely on legalizeStackSlots() to fix things as + // appropriate. + InvalidImm = true; } } @@ -3718,7 +4178,7 @@ if (Base != RegBase || Index != RegIndex) { // There is only a reg +/- reg or reg + imm form. // Figure out which to re-create. - if (RegBase && RegIndex) { + if (RegIndex) { Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex, Mem->getShiftOp(), Mem->getShiftAmt(), Mem->getAddrMode()); @@ -3731,7 +4191,15 @@ From = Mem; } else { Variable *Reg = makeReg(Ty, RegNum); - _ldr(Reg, Mem); + if (InvalidImm) { + // If Mem has an invalid immediate, we legalize it to a Reg using mov + // instead of ldr because legalizeStackSlots() will later kick in and + // fix the immediate for us. + _mov(Reg, Mem); + } else { + _ldr(Reg, Mem); + } + From = Reg; } return From; @@ -3804,7 +4272,7 @@ // TODO(jvoung): Allow certain immediates to be encoded directly in an // operand. See Table A7-18 of the ARM manual: "Floating-point modified // immediate constants". Or, for 32-bit floating point numbers, just - // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG, + // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG // instead of using a movw/movt pair to get the const-pool address then // loading to SREG. std::string Buffer; @@ -3874,9 +4342,9 @@ if (Mem) { return llvm::cast<OperandARM32Mem>(legalize(Mem)); } - // If we didn't do address mode optimization, then we only have a base/offset - // to work with. ARM always requires a base register, so just use that to - // hold the operand. + // If we didn't do address mode optimization, then we only have a + // base/offset to work with. ARM always requires a base register, so + // just use that to hold the operand. Variable *Base = legalizeToReg(Operand); return OperandARM32Mem::create( Func, Ty, Base, @@ -4095,7 +4563,7 @@ case FT_Asm: case FT_Iasm: { const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly(); - OstreamLocker L(Ctx); + OstreamLocker _(Ctx); for (const VariableDeclaration *Var : Vars) { if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) { emitGlobal(*Var, SectionSuffix); @@ -4198,7 +4666,7 @@ break; case FT_Asm: case FT_Iasm: { - OstreamLocker L(Ctx); + OstreamLocker _(Ctx); emitConstantPool<float>(Ctx); emitConstantPool<double>(Ctx); break; @@ -4227,7 +4695,7 @@ : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {} void TargetHeaderARM32::lower() { - OstreamLocker L(Ctx); + OstreamLocker _(Ctx); Ostream &Str = Ctx->getStrEmit(); Str << ".syntax unified\n"; // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h index f6029c4..268ab8c 100644 --- a/src/IceTargetLoweringARM32.h +++ b/src/IceTargetLoweringARM32.h
@@ -544,12 +544,23 @@ /// If the offset is not legal, use a new base register that accounts for the /// offset, such that the addressing mode offset bits are now legal. void legalizeStackSlots(); - /// Returns true if the given Offset can be represented in a stack ldr/str. - bool isLegalVariableStackOffset(Type Ty, int32_t Offset) const; - /// Assuming Var needs its offset legalized, define a new base register - /// centered on the given Var's offset plus StackAdjust, and use it. - StackVariable *legalizeVariableSlot(Variable *Var, int32_t StackAdjust, - Variable *OrigBaseReg); + /// Returns true if the given Offset can be represented in a ldr/str. + bool isLegalMemOffset(Type Ty, int32_t Offset) const; + // Creates a new Base register centered around + // [OrigBaseReg, +/- Offset+StackAdjust]. + Variable *newBaseRegister(int32_t Offset, int32_t StackAdjust, + Variable *OrigBaseReg); + /// Creates a new, legal StackVariable w.r.t. ARM's Immediate requirements. + /// This method is not very smart: it will always create and return a new + /// StackVariable, even if Offset + StackAdjust is encodable. + StackVariable *legalizeStackSlot(Type Ty, int32_t Offset, int32_t StackAdjust, + Variable *OrigBaseReg, Variable **NewBaseReg, + int32_t *NewBaseOffset); + /// Legalizes Mov if its Source (or Destination) contains an invalid + /// immediate. + void legalizeMovStackAddrImm(InstARM32Mov *Mov, int32_t StackAdjust, + Variable *OrigBaseReg, Variable **NewBaseReg, + int32_t *NewBaseOffset); TargetARM32Features CPUFeatures; bool UsesFramePointer = false; @@ -614,8 +625,12 @@ private: ~TargetARM32() override = default; + OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, + Operand *Base); + void lowerTruncToFlags(Operand *Src, CondARM32::Cond *CondIfTrue, CondARM32::Cond *CondIfFalse); + class BoolComputationTracker { public: BoolComputationTracker() = default;