Subzero. ARM32. No more SP frobbing. Pre-computes the max stack size outgoing arguments, and pre-allocates it during prolog, deallocating during epilog. With this CL, there are no more StackAdjustments needed for the ARM32, which will simplify rematerializing alloca'd variables. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=sehr@chromium.org Review URL: https://codereview.chromium.org/1467473003 .
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp index 76fceb1..0ca2d6a 100644 --- a/src/IceInstARM32.cpp +++ b/src/IceInstARM32.cpp
@@ -382,13 +382,6 @@ Vars[1] = ShiftVar; } -InstARM32AdjustStack::InstARM32AdjustStack(Cfg *Func, Variable *SP, - SizeT Amount, Operand *SrcAmount) - : InstARM32(Func, InstARM32::Adjuststack, 2, SP), Amount(Amount) { - addSource(SP); - addSource(SrcAmount); -} - InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse, const InstARM32Label *Label, CondARM32::Cond Pred) @@ -1272,33 +1265,6 @@ } } -void InstARM32AdjustStack::emit(const Cfg *Func) const { - if (!BuildDefs::dump()) - return; - Ostream &Str = Func->getContext()->getStrEmit(); - assert(getSrcSize() == 2); - Str << "\t" - << "sub" - << "\t"; - getDest()->emit(Func); - Str << ", "; - getSrc(0)->emit(Func); - Str << ", "; - getSrc(1)->emit(Func); - Func->getTarget()->updateStackAdjustment(Amount); -} - -void InstARM32AdjustStack::dump(const Cfg *Func) const { - if (!BuildDefs::dump()) - return; - Ostream &Str = Func->getContext()->getStrDump(); - getDest()->dump(Func); - Str << " = sub.i32 "; - getSrc(0)->dump(Func); - Str << ", " << Amount << " ; "; - getSrc(1)->dump(Func); -} - void InstARM32Push::emit(const Cfg *Func) const { // TODO(jpp): Improve FP register save/restore. if (!BuildDefs::dump())
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h index 0ae21c5..58c0c5b 100644 --- a/src/IceInstARM32.h +++ b/src/IceInstARM32.h
@@ -980,34 +980,6 @@ const InstARM32Label *Label; // Intra-block branch target }; -/// AdjustStack instruction - subtracts SP by the given amount and updates the -/// stack offset during code emission. -class InstARM32AdjustStack : public InstARM32 { - InstARM32AdjustStack() = delete; - InstARM32AdjustStack(const InstARM32AdjustStack &) = delete; - InstARM32AdjustStack &operator=(const InstARM32AdjustStack &) = delete; - -public: - /// Note: We need both Amount and SrcAmount. If Amount is too large then it - /// needs to be copied to a register (so SrcAmount could be a register). - /// However, we also need the numeric Amount for bookkeeping, and it's hard to - /// pull that from the generic SrcAmount operand. - static InstARM32AdjustStack *create(Cfg *Func, Variable *SP, SizeT Amount, - Operand *SrcAmount) { - return new (Func->allocate<InstARM32AdjustStack>()) - InstARM32AdjustStack(Func, SP, Amount, SrcAmount); - } - void emit(const Cfg *Func) const override; - void dump(const Cfg *Func) const override; - static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); } - SizeT getAmount() const { return Amount; } - -private: - InstARM32AdjustStack(Cfg *Func, Variable *SP, SizeT Amount, - Operand *SrcAmount); - const SizeT Amount; -}; - /// Call instruction (bl/blx). Arguments should have already been pushed. /// Technically bl and the register form of blx can be predicated, but we'll /// leave that out until needed.
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h index 5489b8b..1350afa 100644 --- a/src/IceTargetLowering.h +++ b/src/IceTargetLowering.h
@@ -311,6 +311,7 @@ virtual void lowerOther(const Inst *Instr); virtual void genTargetHelperCallFor(Inst *Instr) = 0; + virtual uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) = 0; virtual void doAddressOptLoad() {} virtual void doAddressOptStore() {}
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp index 60d3a37..12810f6 100644 --- a/src/IceTargetLoweringARM32.cpp +++ b/src/IceTargetLoweringARM32.cpp
@@ -233,12 +233,62 @@ } } // end of anonymous namespace +uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) { + TargetARM32::CallingConv CC; + size_t OutArgsSizeBytes = 0; + for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) { + Operand *Arg = legalizeUndef(Call->getArg(i)); + Type Ty = Arg->getType(); + if (Ty == IceType_i64) { + std::pair<int32_t, int32_t> Regs; + if (CC.I64InRegs(&Regs)) { + continue; + } + } else if (isVectorType(Ty) || isFloatingType(Ty)) { + int32_t Reg; + if (CC.FPInReg(Ty, &Reg)) { + continue; + } + } else { + assert(Ty == IceType_i32); + int32_t Reg; + if (CC.I32InReg(&Reg)) { + continue; + } + } + + OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty); + OutArgsSizeBytes += typeWidthInBytesOnStack(Ty); + } + + return applyStackAlignment(OutArgsSizeBytes); +} + +void TargetARM32::findMaxStackOutArgsSize() { + // MinNeededOutArgsBytes should be updated if the Target ever creates an + // high-level InstCall that requires more stack bytes. + constexpr size_t MinNeededOutArgsBytes = 0; + MaxOutArgsSizeBytes = MinNeededOutArgsBytes; + for (CfgNode *Node : Func->getNodes()) { + Context.init(Node); + while (!Context.atEnd()) { + PostIncrLoweringContext PostIncrement(Context); + Inst *CurInstr = Context.getCur(); + if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { + SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); + MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); + } + } + } +} + void TargetARM32::translateO2() { TimerMarker T(TimerStack::TT_O2, Func); // TODO(stichnot): share passes with X86? // https://code.google.com/p/nativeclient/issues/detail?id=4094 genTargetHelperCalls(); + findMaxStackOutArgsSize(); // Do not merge Alloca instructions, and lay out the stack. static constexpr bool SortAndCombineAllocas = false; @@ -346,6 +396,7 @@ // TODO: share passes with X86? genTargetHelperCalls(); + findMaxStackOutArgsSize(); // Do not merge Alloca instructions, and lay out the stack. static constexpr bool SortAndCombineAllocas = false; @@ -473,8 +524,6 @@ int32_t BaseRegNum = Var->getBaseRegNum(); if (BaseRegNum == Variable::NoRegister) { BaseRegNum = getFrameOrStackReg(); - if (!hasFramePointer()) - Offset += getStackAdjustment(); } const Type VarTy = Var->getType(); Str << "[" << getRegName(BaseRegNum, VarTy); @@ -670,7 +719,11 @@ // +------------------------+ // | 6. padding | // +------------------------+ - // | 7. allocas | + // | 7. allocas (variable) | + // +------------------------+ + // | 8. padding | + // +------------------------+ + // | 9. out args | // +------------------------+ <--- StackPointer // // The following variables record the size in bytes of the given areas: @@ -679,7 +732,9 @@ // * GlobalsSize: area 3 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 // * LocalsSpillAreaSize: area 5 - // * SpillAreaSizeBytes: areas 2 - 6 + // * SpillAreaSizeBytes: areas 2 - 6, and 9 + // * MaxOutArgsSizeBytes: area 9 + // // Determine stack frame offsets for each Variable without a register // assignment. This can be done as one variable per stack slot. Or, do // coalescing by running the register allocator again with an infinite set of @@ -785,10 +840,13 @@ uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize + LocalsSlotsPaddingBytes; - // Align SP if necessary. - if (NeedsStackAlignment) { + // Adds the out args space to the stack, and align SP if necessary. + if (!NeedsStackAlignment) { + SpillAreaSizeBytes += MaxOutArgsSizeBytes; + } else { uint32_t StackOffset = PreservedRegsSizeBytes; uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); + StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); SpillAreaSizeBytes = StackSize - StackOffset; } @@ -802,8 +860,6 @@ } Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); - resetStackAdjustment(); - // Fill in stack offsets for stack args, and copy args into registers for // those that were register-allocated. Args are pushed right to left, so // Arg[0] is closest to the stack/frame pointer. @@ -847,7 +903,8 @@ Str << "Stack layout:\n"; uint32_t SPAdjustmentPaddingSize = SpillAreaSizeBytes - LocalsSpillAreaSize - - GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; + GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes - + MaxOutArgsSizeBytes; Str << " in-args = " << InArgsSizeBytes << " bytes\n" << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" @@ -860,6 +917,7 @@ Str << "Stack details:\n" << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n" << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" + << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n" << " locals spill area alignment = " << LocalsSlotsAlignmentBytes << " bytes\n" << " is FP based = " << UsesFramePointer << "\n"; @@ -956,10 +1014,7 @@ return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset); } -Variable *TargetARM32::newBaseRegister(int32_t OriginalOffset, - int32_t StackAdjust, - Variable *OrigBaseReg) { - int32_t Offset = OriginalOffset + StackAdjust; +Variable *TargetARM32::newBaseRegister(int32_t Offset, Variable *OrigBaseReg) { // Legalize will likely need a movw/movt combination, but if the top bits are // all 0 from negating the offset and subtracting, we could use that instead. bool ShouldSub = (-Offset & 0xFFFF0000) == 0; @@ -976,26 +1031,25 @@ } OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset, - int32_t StackAdjust, Variable *OrigBaseReg, Variable **NewBaseReg, int32_t *NewBaseOffset) { - if (isLegalMemOffset(Ty, Offset + StackAdjust)) { + if (isLegalMemOffset(Ty, Offset)) { return OperandARM32Mem::create( - Func, Ty, OrigBaseReg, llvm::cast<ConstantInteger32>( - Ctx->getConstantInt32(Offset + StackAdjust)), + Func, Ty, OrigBaseReg, + llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)), OperandARM32Mem::Offset); } if (*NewBaseReg == nullptr) { - *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); - *NewBaseOffset = Offset + StackAdjust; + *NewBaseReg = newBaseRegister(Offset, OrigBaseReg); + *NewBaseOffset = Offset; } - int32_t OffsetDiff = Offset + StackAdjust - *NewBaseOffset; + int32_t OffsetDiff = Offset - *NewBaseOffset; if (!isLegalMemOffset(Ty, OffsetDiff)) { - *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); - *NewBaseOffset = Offset + StackAdjust; + *NewBaseReg = newBaseRegister(Offset, OrigBaseReg); + *NewBaseOffset = Offset; OffsetDiff = 0; } @@ -1005,9 +1059,8 @@ OperandARM32Mem::Offset); } -void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust, - Variable *OrigBaseReg, Variable **NewBaseReg, - int32_t *NewBaseOffset) { +void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg, + Variable **NewBaseReg, int32_t *NewBaseOffset) { Variable *Dest = MovInstr->getDest(); assert(Dest != nullptr); Type DestTy = Dest->getType(); @@ -1027,8 +1080,8 @@ assert(SrcR->hasReg()); const int32_t Offset = Dest->getStackOffset(); // This is a _mov(Mem(), Variable), i.e., a store. - _str(SrcR, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, - NewBaseReg, NewBaseOffset), + _str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg, + NewBaseOffset), MovInstr->getPredicate()); // _str() does not have a Dest, so we add a fake-def(Dest). Context.insert(InstFakeDef::create(Func, Dest)); @@ -1036,8 +1089,8 @@ } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { if (!Var->hasReg()) { const int32_t Offset = Var->getStackOffset(); - _ldr(Dest, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, - NewBaseReg, NewBaseOffset), + _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg, + NewBaseOffset), MovInstr->getPredicate()); Legalized = true; } @@ -1064,7 +1117,6 @@ Func->dump("Before legalizeStackSlots"); assert(hasComputedFrame()); Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); - int32_t StackAdjust = 0; // Do a fairly naive greedy clustering for now. Pick the first stack slot // that's out of bounds and make a new base reg using the architecture's temp // register. If that works for the next slot, then great. Otherwise, create a @@ -1091,23 +1143,8 @@ NewBaseOffset = 0; } - // The stack adjustment only matters if we are using SP instead of FP. - if (!hasFramePointer()) { - if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { - StackAdjust += AdjInst->getAmount(); - NewBaseOffset += AdjInst->getAmount(); - continue; - } - if (llvm::isa<InstARM32Call>(CurInstr)) { - NewBaseOffset -= StackAdjust; - StackAdjust = 0; - continue; - } - } - if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) { - legalizeMov(MovInstr, StackAdjust, OrigBaseReg, &NewBaseReg, - &NewBaseOffset); + legalizeMov(MovInstr, OrigBaseReg, &NewBaseReg, &NewBaseOffset); } } } @@ -1269,7 +1306,14 @@ alignRegisterPow2(T, Alignment); _sub(SP, SP, T); } - _mov(Dest, SP); + Variable *T = SP; + if (MaxOutArgsSizeBytes != 0) { + T = makeReg(getPointerType()); + Operand *OutArgsSizeRF = legalize( + Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex); + _add(T, SP, OutArgsSizeRF); + } + _mov(Dest, T); } void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { @@ -2093,6 +2137,8 @@ } case InstArithmetic::Sub: { if (Srcs.hasConstOperand()) { + // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed + // to be used. Variable *Src0R = Srcs.src0R(this); if (Srcs.immediateIsFlexEncodable()) { Operand *Src1RF = Srcs.src1RF(this); @@ -2346,7 +2392,7 @@ TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs; // Pair of Arg Operand -> stack offset. llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs; - int32_t ParameterAreaSizeBytes = 0; + size_t ParameterAreaSizeBytes = 0; // Classify each argument operand according to the location where the // argument is passed. @@ -2390,16 +2436,8 @@ // the stack is already aligned at the start of the calling sequence. ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); - // Subtract the appropriate amount for the argument area. This also takes - // care of setting the stack adjustment during emission. - // - // TODO: If for some reason the call instruction gets dead-code eliminated - // after lowering, we would need to ensure that the pre-call and the - // post-call esp adjustment get eliminated as well. - if (ParameterAreaSizeBytes) { - Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), - Legal_Reg | Legal_Flex); - _adjust_stack(ParameterAreaSizeBytes, SubAmount); + if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) { + llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max."); } // Copy arguments that are passed on the stack to the appropriate stack @@ -2492,15 +2530,6 @@ if (ReturnRegHi) Context.insert(InstFakeDef::create(Func, ReturnRegHi)); - // Add the appropriate offset to SP. The call instruction takes care of - // resetting the stack offset during emission. - if (ParameterAreaSizeBytes) { - Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), - Legal_Reg | Legal_Flex); - Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); - _add(SP, SP, AddAmount); - } - // Insert a register-kill pseudo instruction. Context.insert(InstFakeKill::create(Func, NewCall));
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h index c0266eb..cf38fe7 100644 --- a/src/IceTargetLoweringARM32.h +++ b/src/IceTargetLoweringARM32.h
@@ -237,6 +237,7 @@ void lowerSwitch(const InstSwitch *Inst) override; void lowerUnreachable(const InstUnreachable *Inst) override; void prelowerPhis() override; + uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override; void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; } void doAddressOptLoad() override; void doAddressOptStore() override; @@ -289,10 +290,6 @@ CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32Adc::create(Func, Dest, Src0, Src1, Pred)); } - void _adjust_stack(int32_t Amount, Operand *SrcAmount) { - Context.insert(InstARM32AdjustStack::create( - Func, getPhysicalRegister(RegARM32::Reg_sp), Amount, SrcAmount)); - } void _and(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32And::create(Func, Dest, Src0, Src1, Pred)); @@ -813,6 +810,13 @@ Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1)); } + // Iterates over the CFG and determines the maximum outgoing stack arguments + // bytes. This information is later used during addProlog() do pre-allocate + // the outargs area. + // TODO(jpp): This could live in the Parser, if we provided a Target-specific + // method that the Parser could call. + void findMaxStackOutArgsSize(); + /// Run a pass through stack variables and ensure that the offsets are legal. /// If the offset is not legal, use a new base register that accounts for the /// offset, such that the addressing mode offset bits are now legal. @@ -820,36 +824,35 @@ /// Returns true if the given Offset can be represented in a ldr/str. bool isLegalMemOffset(Type Ty, int32_t Offset) const; // Creates a new Base register centered around - // [OrigBaseReg, +/- Offset+StackAdjust]. - Variable *newBaseRegister(int32_t Offset, int32_t StackAdjust, - Variable *OrigBaseReg); - /// Creates a new, legal OperandARM32Mem for accessing OrigBase + Offset + - /// StackAdjust. The returned mem operand is a legal operand for accessing - /// memory that is of type Ty. + // [OrigBaseReg, +/- Offset]. + Variable *newBaseRegister(int32_t Offset, Variable *OrigBaseReg); + /// Creates a new, legal OperandARM32Mem for accessing OrigBase + Offset. The + /// returned mem operand is a legal operand for accessing memory that is of + /// type Ty. /// - /// If [OrigBaseReg, #Offset+StackAdjust] is encodable, then the method - /// returns a Mem operand expressing it. Otherwise, + /// If [OrigBaseReg, #Offset] is encodable, then the method returns a Mem + /// operand expressing it. Otherwise, /// - /// if [*NewBaseReg, #Offset+StackAdjust-*NewBaseOffset] is encodable, the - /// method will return that. Otherwise, + /// if [*NewBaseReg, #Offset-*NewBaseOffset] is encodable, the method will + /// return that. Otherwise, /// - /// a new base register ip=OrigBaseReg+Offset+StackAdjust is created, and the - /// method returns [ip, #0]. + /// a new base register ip=OrigBaseReg+Offset is created, and the method + /// returns [ip, #0]. OperandARM32Mem *createMemOperand(Type Ty, int32_t Offset, - int32_t StackAdjust, Variable *OrigBaseReg, + Variable *OrigBaseReg, Variable **NewBaseReg, int32_t *NewBaseOffset); /// Legalizes Mov if its Source (or Destination) is a spilled Variable. Moves /// to memory become store instructions, and moves from memory, loads. - void legalizeMov(InstARM32Mov *Mov, int32_t StackAdjust, - Variable *OrigBaseReg, Variable **NewBaseReg, - int32_t *NewBaseOffset); + void legalizeMov(InstARM32Mov *Mov, Variable *OrigBaseReg, + Variable **NewBaseReg, int32_t *NewBaseOffset); TargetARM32Features CPUFeatures; bool UsesFramePointer = false; bool NeedsStackAlignment = false; bool MaybeLeafFunc = true; size_t SpillAreaSizeBytes = 0; + uint32_t MaxOutArgsSizeBytes = 0; // TODO(jpp): std::array instead of array. static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM]; static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h index 5523360..387ec2e 100644 --- a/src/IceTargetLoweringMIPS32.h +++ b/src/IceTargetLoweringMIPS32.h
@@ -235,6 +235,10 @@ void lowerSwitch(const InstSwitch *Inst) override; void lowerUnreachable(const InstUnreachable *Inst) override; void prelowerPhis() override; + uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override { + (void)Instr; + return 0; + } void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; } void doAddressOptLoad() override; void doAddressOptStore() override;
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h index 3127151..36d3f9b 100644 --- a/src/IceTargetLoweringX86Base.h +++ b/src/IceTargetLoweringX86Base.h
@@ -182,6 +182,10 @@ void lowerOther(const Inst *Instr) override; void lowerRMW(const typename Traits::Insts::FakeRMW *RMW); void prelowerPhis() override; + uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override { + (void)Instr; + return 0; + } void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; } void doAddressOptLoad() override; void doAddressOptStore() override;
diff --git a/tests_lit/llvm2ice_tests/64bit.pnacl.ll b/tests_lit/llvm2ice_tests/64bit.pnacl.ll index b0110bf..88d4a3d 100644 --- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll +++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -92,25 +92,19 @@ ; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline ; ARM32-LABEL: pass64BitArg -; ARM32: sub sp, {{.*}} #16 ; ARM32: str {{.*}}, [sp] ; ARM32: movw r2, #123 ; ARM32: bl {{.*}} ignore64BitArgNoInline -; ARM32: add sp, {{.*}} #16 -; ARM32: sub sp, {{.*}} #16 ; ARM32: str {{.*}}, [sp] ; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r1 ; ARM32: movw r2, #123 ; ARM32: bl {{.*}} ignore64BitArgNoInline -; ARM32: add sp, {{.*}} #16 -; ARM32: sub sp, {{.*}} #16 ; ARM32: str {{.*}}, [sp] ; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r1 ; ARM32: movw r2, #123 ; ARM32: bl {{.*}} ignore64BitArgNoInline -; ARM32: add sp, {{.*}} #16 declare i32 @ignore64BitArgNoInline(i64, i32, i64) @@ -144,7 +138,6 @@ ; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline ; ARM32-LABEL: pass64BitConstArg -; ARM32: sub sp, {{.*}} #16 ; ARM32: movw [[REG1:r.*]], {{.*}} ; 0xbeef ; ARM32: movt [[REG1]], {{.*}} ; 0xdead ; ARM32: movw [[REG2:r.*]], {{.*}} ; 0x5678 @@ -155,7 +148,6 @@ ; ARM32: {{mov|ldr}} r1 ; ARM32: movw r2, #123 ; ARM32: bl {{.*}} ignore64BitArgNoInline -; ARM32: add sp, {{.*}} #16 define internal i32 @pass64BitUndefArg() { entry:
diff --git a/tests_lit/llvm2ice_tests/large_stack_offs.ll b/tests_lit/llvm2ice_tests/large_stack_offs.ll index 7818336..8449df7 100644 --- a/tests_lit/llvm2ice_tests/large_stack_offs.ll +++ b/tests_lit/llvm2ice_tests/large_stack_offs.ll
@@ -49,7 +49,7 @@ ; ARM32-NOT: mov fp, sp ; ARM32: movw ip, #4{{.*}} ; ARM32-NEXT: sub sp, sp, ip -; ARM32: movw ip, #4232 +; ARM32: movw ip, #4248 ; ARM32-NEXT: add ip, sp, ip ; ARM32-NOT: movw ip ; %t2 is the result of the "or", and %t2 will be passed via r1 to the call. @@ -61,14 +61,10 @@ ; ARM32: str [[REG]], [ip, #-20] ; ARM32: b {{[a-f0-9]+}} ; Now skip ahead to where the call in br_1 begins, to check how %t2 is used. -; ARM32: movw ip, #4216 +; ARM32: movw ip, #4232 ; ARM32-NEXT: add ip, sp, ip -; ARM32: sub sp, sp, #16 -; Now sp1 = sp0 - 16, but ip is still in terms of sp0. -; So, sp0 + 4212 == ip - 4. ; ARM32: ldr r2, [ip, #-4] ; ARM32: bl {{.*}} dummy -; ARM32: add sp, sp ; The call clobbers ip, so we need to re-create the base register. ; ARM32: movw ip, #4{{.*}} ; ARM32: b {{[a-f0-9]+}} @@ -122,12 +118,8 @@ ; Now skip ahead to where the call in br_1 begins, to check how %t2 is used. ; ARM32: movw ip, #4120 ; ARM32-NEXT: sub ip, fp, ip -; ARM32: sub sp, sp, #16 -; Now sp1 = sp0 - 16, but ip is still in terms of fp0. -; So, fp0 - 4124 == ip - 4. ; ARM32: ldr r2, [ip, #-4] ; ARM32: bl {{.*}} dummy -; ARM32: add sp, sp ; The call clobbers ip, so we need to re-create the base register. ; ARM32: movw ip, #4{{.*}} ; ARM32: b {{[a-f0-9]+}}