[Subzero][MIPS32] Fix alloca alignment and offset for Om1 and O2 optimization
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/2417233002 .
Patch from Sagar Thakur <sagar.thakur@imgtec.com>.
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 7f501ef..624c4a5 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -216,6 +216,25 @@
return MIPS32_STACK_ALIGNMENT_BYTES;
}
+uint32_t TargetMIPS32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
+ TargetMIPS32::CallingConv CC;
+ RegNumT DummyReg;
+ size_t OutArgsSizeBytes = 0;
+ for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
+ Operand *Arg = legalizeUndef(Call->getArg(i));
+ const Type Ty = Arg->getType();
+ RegNumT RegNum;
+ if (CC.argInReg(Ty, i, &RegNum)) {
+ continue;
+ }
+
+ OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
+ OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
+ }
+
+ return applyStackAlignment(OutArgsSizeBytes);
+}
+
void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
constexpr bool NoTailCall = false;
constexpr bool IsTargetHelperCall = true;
@@ -586,6 +605,7 @@
}
}
}
+ CurrentAllocaOffset = MaxOutArgsSizeBytes;
}
void TargetMIPS32::translateO2() {
@@ -1706,9 +1726,9 @@
if (Var->isRematerializable()) {
// This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
- // ExtraOffset is only needed for frame-pointer based frames as we have
+ // ExtraOffset is only needed for stack-pointer based frames as we have
// to account for spill storage.
- const int32_t ExtraOffset = (Var->getRegNum() == Target->getFrameReg())
+ const int32_t ExtraOffset = (Var->getRegNum() == Target->getStackReg())
? Target->getFrameFixedAllocaOffset()
: 0;
@@ -2008,6 +2028,17 @@
Context.insert<InstFakeDef>(Dest);
return;
}
+
+ if (Alignment > MIPS32_STACK_ALIGNMENT_BYTES) {
+ CurrentAllocaOffset =
+ Utils::applyAlignment(CurrentAllocaOffset, Alignment);
+ }
+ auto *T = I32Reg();
+ _addiu(T, SP, CurrentAllocaOffset);
+ _mov(Dest, T);
+ CurrentAllocaOffset += Value;
+ return;
+
} else {
// Non-constant sizes need to be adjusted to the next highest multiple of
// the required alignment at runtime.
@@ -2034,15 +2065,6 @@
_mov(SP, Dest);
return;
}
-
- // Add enough to the returned address to account for the out args area.
- if (MaxOutArgsSizeBytes > 0) {
- Variable *T = makeReg(getPointerType());
- _addiu(T, SP, MaxOutArgsSizeBytes);
- _mov(Dest, T);
- } else {
- _mov(Dest, SP);
- }
}
void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
@@ -2322,6 +2344,12 @@
void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {
Variable *Dest = Instr->getDest();
+
+ if (Dest->isRematerializable()) {
+ Context.insert<InstFakeDef>(Dest);
+ return;
+ }
+
// We need to signal all the UnimplementedLoweringError errors before any
// legalization into new variables, otherwise Om1 register allocation may fail
// when it sees variables that are defined but not used.
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 01f68ca..6a50b96 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -104,7 +104,9 @@
PrologEmitsFixedAllocas = true;
}
int32_t getFrameFixedAllocaOffset() const override {
- return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
+ int32_t FixedAllocaOffset =
+ Utils::applyAlignment(CurrentAllocaOffset, FixedAllocaAlignBytes);
+ return FixedAllocaOffset - MaxOutArgsSizeBytes;
}
uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
@@ -701,10 +703,7 @@
void lowerSwitch(const InstSwitch *Instr) override;
void lowerUnreachable(const InstUnreachable *Instr) override;
void prelowerPhis() override;
- uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override {
- (void)Instr;
- return 0;
- }
+ uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
void genTargetHelperCallFor(Inst *Instr) override;
void doAddressOptLoad() override;
void doAddressOptStore() override;
@@ -754,6 +753,7 @@
bool VariableAllocaUsed = false;
uint32_t MaxOutArgsSizeBytes = 0;
uint32_t TotalStackSizeBytes = 0;
+ uint32_t CurrentAllocaOffset = 0;
static SmallBitVector TypeToRegisterSet[RCMIPS32_NUM];
static SmallBitVector TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
static SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM];
diff --git a/tests_lit/llvm2ice_tests/alloc.ll b/tests_lit/llvm2ice_tests/alloc.ll
index 04b34a4..07cff69 100644
--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
@@ -94,9 +94,9 @@
; MIPS32-LABEL: fixed_416_align_32
; MIPS32-OPT2: addiu sp,sp,-448
-; MIPS32-OPT2: addiu a0,sp,16
+; MIPS32-OPT2: addiu a0,sp,32
; MIPS32-OPTM1: addiu sp,sp,-448
-; MIPS32-OPTM1: addiu [[REG:.*]],sp,16
+; MIPS32-OPTM1: addiu [[REG:.*]],sp,32
; MIPS32-OPTM1: sw [[REG]],{{.*}}
; MIPS32-OPTM1: lw a0,{{.*}}
; MIPS32: jal {{.*}} R_{{.*}} f1
@@ -159,9 +159,9 @@
; MIPS32-LABEL: fixed_351_align_32
; MIPS32-OPT2: addiu sp,sp,-384
-; MIPS32-OPT2: addiu a0,sp,16
+; MIPS32-OPT2: addiu a0,sp,32
; MIPS32-OPTM1: addiu sp,sp,-384
-; MIPS32-OPTM1: addiu [[REG:.*]],sp,16
+; MIPS32-OPTM1: addiu [[REG:.*]],sp,32
; MIPS32-OPTM1: sw [[REG]],{{.*}}
; MIPS32-OPTM1: lw a0,{{.*}}
; MIPS32: jal {{.*}} R_{{.*}} f1