Subzero. ARM32. Removes memory legalization warts. This CL removes two warts from the ARM32 backend: 1) during argument lowering, if a stack parameter is assigned a register, the backend creates a new Variable that references the stack location with the incoming argument, and _mov() it to the parameter. 2) During stack slot legalization, all _mov(Mem(), Reg) are converted to stores; and all _mov(Reg, Mem()) are converted to loads. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=kschimpf@google.com Review URL: https://codereview.chromium.org/1457683004 .
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp index 9657c1d..24cced8 100644 --- a/src/IceTargetLoweringARM32.cpp +++ b/src/IceTargetLoweringARM32.cpp
@@ -475,9 +475,6 @@ Offset += getStackAdjustment(); } const Type VarTy = Var->getType(); - if (!isLegalMemOffset(VarTy, Offset)) { - llvm::report_fatal_error("Illegal stack offset"); - } Str << "[" << getRegName(BaseRegNum, VarTy); if (Offset != 0) { Str << ", " << getConstantPrefix() << Offset; @@ -625,7 +622,7 @@ // to copy Arg into its assigned register if applicable. void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset, - size_t &InArgsSizeBytes) { + size_t *InArgsSizeBytes) { if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { Variable *Lo = Arg64On32->getLo(); Variable *Hi = Arg64On32->getHi(); @@ -634,26 +631,23 @@ return; } Type Ty = Arg->getType(); - InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty); - Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); - InArgsSizeBytes += typeWidthInBytesOnStack(Ty); - // If the argument variable has been assigned a register, we need to load the - // value from the stack slot. - if (Arg->hasReg()) { - assert(Ty != IceType_i64); - // This should be simple, just load the parameter off the stack using a nice - // sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for - // fp types, cannot have an index register), so we legalize the memory - // operand instead. - auto *Mem = OperandARM32Mem::create( - Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( - Ctx->getConstantInt32(Arg->getStackOffset()))); - _mov(Arg, legalizeToReg(Mem, Arg->getRegNum())); - // This argument-copying instruction uses an explicit OperandARM32Mem - // operand instead of a Variable, so its fill-from-stack operation has to - // be tracked separately for statistics. - Ctx->statsUpdateFills(); + assert(Ty != IceType_i64); + + *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty); + const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes; + *InArgsSizeBytes += typeWidthInBytesOnStack(Ty); + + if (!Arg->hasReg()) { + Arg->setStackOffset(ArgStackOffset); + return; } + + // If the argument variable has been assigned a register, we need to copy the + // value from the stack slot. + Variable *Parameter = Func->makeVariable(Ty); + Parameter->setMustNotHaveReg(); + Parameter->setStackOffset(ArgStackOffset); + _mov(Arg, Parameter); } Type TargetARM32::stackSlotType() { return IceType_i32; } @@ -754,8 +748,6 @@ continue; } if (CalleeSaves[i] && RegsUsed[i]) { - // TODO(jvoung): do separate vpush for each floating point register - // segment and += 4, or 8 depending on type. ++NumCallee; Variable *PhysicalRegister = getPhysicalRegister(i); PreservedRegsSizeBytes += @@ -837,7 +829,7 @@ InRegs = CC.I32InReg(&DummyReg); } if (!InRegs) - finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); + finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, &InArgsSizeBytes); } // Fill in stack offsets for locals. @@ -981,11 +973,18 @@ return ScratchReg; } -StackVariable *TargetARM32::legalizeStackSlot(Type Ty, int32_t Offset, - int32_t StackAdjust, - Variable *OrigBaseReg, - Variable **NewBaseReg, - int32_t *NewBaseOffset) { +OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset, + int32_t StackAdjust, + Variable *OrigBaseReg, + Variable **NewBaseReg, + int32_t *NewBaseOffset) { + if (isLegalMemOffset(Ty, Offset + StackAdjust)) { + return OperandARM32Mem::create( + Func, Ty, OrigBaseReg, llvm::cast<ConstantInteger32>( + Ctx->getConstantInt32(Offset + StackAdjust)), + OperandARM32Mem::Offset); + } + if (*NewBaseReg == nullptr) { *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); *NewBaseOffset = Offset + StackAdjust; @@ -998,18 +997,15 @@ OffsetDiff = 0; } - StackVariable *NewDest = Func->makeVariable<StackVariable>(Ty); - NewDest->setMustNotHaveReg(); - NewDest->setBaseRegNum((*NewBaseReg)->getRegNum()); - NewDest->setStackOffset(OffsetDiff); - return NewDest; + return OperandARM32Mem::create( + Func, Ty, *NewBaseReg, + llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)), + OperandARM32Mem::Offset); } -void TargetARM32::legalizeMovStackAddrImm(InstARM32Mov *MovInstr, - int32_t StackAdjust, - Variable *OrigBaseReg, - Variable **NewBaseReg, - int32_t *NewBaseOffset) { +void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust, + Variable *OrigBaseReg, Variable **NewBaseReg, + int32_t *NewBaseOffset) { Variable *Dest = MovInstr->getDest(); assert(Dest != nullptr); Type DestTy = Dest->getType(); @@ -1017,6 +1013,7 @@ Operand *Src = MovInstr->getSrc(0); Type SrcTy = Src->getType(); + (void)SrcTy; assert(SrcTy != IceType_i64); if (MovInstr->isMultiDest() || MovInstr->isMultiSource()) @@ -1024,40 +1021,29 @@ bool Legalized = false; if (!Dest->hasReg()) { - assert(llvm::cast<Variable>(Src)->hasReg()); + auto *const SrcR = llvm::cast<Variable>(Src); + assert(SrcR->hasReg()); const int32_t Offset = Dest->getStackOffset(); - if (!isLegalMemOffset(DestTy, Offset + StackAdjust)) { - Legalized = true; - Dest = legalizeStackSlot(DestTy, Offset, StackAdjust, OrigBaseReg, - NewBaseReg, NewBaseOffset); - } + // This is a _mov(Mem(), Variable), i.e., a store. + _str(SrcR, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, + NewBaseReg, NewBaseOffset), + MovInstr->getPredicate()); + // _str() does not have a Dest, so we add a fake-def(Dest). + Context.insert(InstFakeDef::create(Func, Dest)); + Legalized = true; } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { if (!Var->hasReg()) { const int32_t Offset = Var->getStackOffset(); - if (!isLegalMemOffset(SrcTy, Offset + StackAdjust)) { - Legalized = true; - Src = legalizeStackSlot(SrcTy, Offset, StackAdjust, OrigBaseReg, - NewBaseReg, NewBaseOffset); - } - } - } else if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) { - if (ConstantInteger32 *OffsetOp = Mem->getOffset()) { - const int32_t Offset = OffsetOp->getValue(); - if (!isLegalMemOffset(SrcTy, Offset + StackAdjust)) { - assert(Mem->getBase()->hasReg()); - assert(Mem->getBase()->getRegNum() == (int32_t)getFrameOrStackReg()); - Legalized = true; - Src = legalizeStackSlot(SrcTy, Offset, StackAdjust, OrigBaseReg, - NewBaseReg, NewBaseOffset); - } + _ldr(Dest, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, + NewBaseReg, NewBaseOffset), + MovInstr->getPredicate()); + Legalized = true; } } if (Legalized) { if (MovInstr->isDestRedefined()) { - _mov_redefined(Dest, Src, MovInstr->getPredicate()); - } else { - _mov(Dest, Src, MovInstr->getPredicate()); + _set_dest_redefined(); } MovInstr->setDeleted(); } @@ -1075,11 +1061,6 @@ // clobber the flags register. Func->dump("Before legalizeStackSlots"); assert(hasComputedFrame()); - // Early exit, if SpillAreaSizeBytes is really small. - // TODO(jpp): this is not safe -- loads and stores of q registers can't have - // offsets. - if (isLegalMemOffset(IceType_v4i32, SpillAreaSizeBytes)) - return; Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); int32_t StackAdjust = 0; // Do a fairly naive greedy clustering for now. Pick the first stack slot @@ -1122,11 +1103,9 @@ } } - // The Lowering ensures that ldr and str always have legal Mem operands. - // The only other instruction that may access memory is mov. if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) { - legalizeMovStackAddrImm(MovInstr, StackAdjust, OrigBaseReg, &NewBaseReg, - &NewBaseOffset); + legalizeMov(MovInstr, StackAdjust, OrigBaseReg, &NewBaseReg, + &NewBaseOffset); } } } @@ -4600,7 +4579,11 @@ Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { Type Ty = Src->getType(); Variable *Reg = makeReg(Ty, RegNum); - _mov(Reg, Src); + if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) { + _ldr(Reg, Mem); + } else { + _mov(Reg, Src); + } return Reg; } @@ -4641,7 +4624,6 @@ Variable *RegIndex = nullptr; assert(Base); RegBase = legalizeToReg(Base); - bool InvalidImm = false; assert(Ty < MemTraitsSize); if (Index) { assert(Offset == nullptr); @@ -4653,12 +4635,7 @@ static constexpr bool ZeroExt = false; assert(MemTraits[Ty].CanHaveImm); if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) { - assert(RegBase->hasReg()); - assert(RegBase->getRegNum() == (int32_t)getFrameOrStackReg()); - // We are a bit more lenient with invalid immediate when accessing the - // stack here, and then rely on legalizeStackSlots() to fix things as - // appropriate. - InvalidImm = true; + llvm::report_fatal_error("Invalid memory offset."); } } @@ -4679,15 +4656,7 @@ From = Mem; } else { Variable *Reg = makeReg(Ty, RegNum); - if (InvalidImm) { - // If Mem has an invalid immediate, we legalize it to a Reg using mov - // instead of ldr because legalizeStackSlots() will later kick in and - // fix the immediate for us. - _mov(Reg, Mem); - } else { - _ldr(Reg, Mem); - } - + _ldr(Reg, Mem); From = Reg; } return From;