[Subzero][MIPS32] Implements lowering of alloca instruction
BUG=none
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/2067183002 .
Patch from Sagar Thakur <sagar.thakur@imgtec.com>.
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 21262f6..ccb1676 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -163,10 +163,28 @@
RegMIPS32::getRegName, getRegClassName);
}
+void TargetMIPS32::unsetIfNonLeafFunc() {
+ for (CfgNode *Node : Func->getNodes()) {
+ for (Inst &Instr : Node->getInsts()) {
+ if (llvm::isa<InstCall>(&Instr)) {
+ // Unset MaybeLeafFunc if call instruction exists.
+ MaybeLeafFunc = false;
+ return;
+ }
+ }
+ }
+}
+
+uint32_t TargetMIPS32::getStackAlignment() const {
+ return MIPS32_STACK_ALIGNMENT_BYTES;
+}
+
void TargetMIPS32::findMaxStackOutArgsSize() {
// MinNeededOutArgsBytes should be updated if the Target ever creates a
// high-level InstCall that requires more stack bytes.
- constexpr size_t MinNeededOutArgsBytes = 16;
+ size_t MinNeededOutArgsBytes = 0;
+ if (!MaybeLeafFunc)
+ MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
for (CfgNode *Node : Func->getNodes()) {
Context.init(Node);
@@ -188,10 +206,12 @@
// https://code.google.com/p/nativeclient/issues/detail?id=4094
genTargetHelperCalls();
+ unsetIfNonLeafFunc();
+
findMaxStackOutArgsSize();
// Merge Alloca instructions, and lay out the stack.
- static constexpr bool SortAndCombineAllocas = false;
+ static constexpr bool SortAndCombineAllocas = true;
Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing");
@@ -291,6 +311,8 @@
// TODO: share passes with X86?
genTargetHelperCalls();
+ unsetIfNonLeafFunc();
+
findMaxStackOutArgsSize();
// Do not merge Alloca instructions, and lay out the stack.
@@ -441,8 +463,8 @@
// hold the operand.
auto *Base = llvm::cast<Variable>(legalize(Operand, Legal_Reg));
return OperandMIPS32Mem::create(
- Func, Ty, Base,
- llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
+ Func, Ty, Base, llvm::cast<ConstantInteger32>(
+ Ctx->getConstantInt32(Base->getStackOffset())));
}
void TargetMIPS32::emitVariable(const Variable *Var) const {
@@ -808,15 +830,9 @@
uint32_t GlobalsAndSubsequentPaddingSize =
GlobalsSize + LocalsSlotsPaddingBytes;
- if (MaybeLeafFunc)
- MaxOutArgsSizeBytes = 0;
-
// Adds the out args space to the stack, and align SP if necessary.
- uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes;
-
- // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with
- // TotalStackSizeBytes once lowerAlloca is implemented and leaf function
- // information is generated by lowerCall.
+ uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes +
+ FixedAllocaSizeBytes + MaxOutArgsSizeBytes;
// Generate "addiu sp, sp, -TotalStackSizeBytes"
if (TotalStackSizeBytes) {
@@ -854,7 +870,7 @@
// those that were register-allocated. Args are pushed right to left, so
// Arg[0] is closest to the stack/frame pointer.
const VarList &Args = Func->getArgs();
- size_t InArgsSizeBytes = 0;
+ size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
TargetMIPS32::CallingConv CC;
uint32_t ArgNo = 0;
@@ -1002,14 +1018,64 @@
}
void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
- UsesFramePointer = true;
// Conservatively require the stack to be aligned. Some stack adjustment
// operations implemented below assume that the stack is aligned before the
// alloca. All the alloca code ensures that the stack alignment is preserved
// after the alloca. The stack alignment restriction can be relaxed in some
// cases.
NeedsStackAlignment = true;
- UnimplementedLoweringError(this, Instr);
+
+ // For default align=0, set it to the real value 1, to avoid any
+ // bit-manipulation problems below.
+ const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
+
+ // LLVM enforces power of 2 alignment.
+ assert(llvm::isPowerOf2_32(AlignmentParam));
+ assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
+
+ const uint32_t Alignment =
+ std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
+ const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
+ const bool OptM1 = getFlags().getOptLevel() == Opt_m1;
+ const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
+ const bool UseFramePointer =
+ hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
+
+ if (UseFramePointer)
+ setHasFramePointer();
+
+ Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
+
+ Variable *Dest = Instr->getDest();
+ Operand *TotalSize = Instr->getSizeInBytes();
+
+ if (const auto *ConstantTotalSize =
+ llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
+ const uint32_t Value =
+ Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
+ FixedAllocaSizeBytes += Value;
+ // Constant size alloca.
+ if (!UseFramePointer) {
+ // If we don't need a Frame Pointer, this alloca has a known offset to the
+ // stack pointer. We don't need adjust the stack pointer, nor assign any
+ // value to Dest, as Dest is rematerializable.
+ assert(Dest->isRematerializable());
+ Context.insert<InstFakeDef>(Dest);
+ return;
+ }
+ } else {
+ UnimplementedLoweringError(this, Instr);
+ return;
+ }
+
+ // Add enough to the returned address to account for the out args area.
+ if (MaxOutArgsSizeBytes > 0) {
+ Variable *T = makeReg(getPointerType());
+ _addiu(T, SP, MaxOutArgsSizeBytes);
+ _mov(Dest, T);
+ } else {
+ _mov(Dest, SP);
+ }
}
void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index cfbaa6f..155b608 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -89,15 +89,12 @@
// are rounded up to 4 bytes.
return (typeWidthInBytes(Ty) + 3) & ~3;
}
- uint32_t getStackAlignment() const override {
- // TODO(sehr): what is the stack alignment?
- return 1;
- }
+ uint32_t getStackAlignment() const override;
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
- // TODO(sehr): Implement fixed stack layout.
- (void)Size;
- (void)Align;
- llvm::report_fatal_error("Not yet implemented");
+ FixedAllocaSizeBytes = Size;
+ assert(llvm::isPowerOf2_32(Align));
+ FixedAllocaAlignBytes = Align;
+ PrologEmitsFixedAllocas = true;
}
int32_t getFrameFixedAllocaOffset() const override {
// TODO(sehr): Implement fixed stack layout.
@@ -105,6 +102,8 @@
return 0;
}
+ uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
+
bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64;
}
@@ -447,6 +446,8 @@
static Type stackSlotType();
Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
+ void unsetIfNonLeafFunc();
+
// Iterates over the CFG and determines the maximum outgoing stack arguments
// bytes. This information is later used during addProlog() to pre-allocate
// the outargs area
@@ -563,6 +564,8 @@
static constexpr uint32_t CHAR_BITS = 8;
static constexpr uint32_t INT32_BITS = 32;
size_t SpillAreaSizeBytes = 0;
+ size_t FixedAllocaSizeBytes = 0;
+ size_t FixedAllocaAlignBytes = 0;
private:
ENABLE_MAKE_UNIQUE;
diff --git a/tests_lit/llvm2ice_tests/alloc.ll b/tests_lit/llvm2ice_tests/alloc.ll
index cf0926c..3be09f6 100644
--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
@@ -26,6 +26,20 @@
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPTM1 %s
+; RUN: %if --need=target_MIPS32 --need=allow_dump \
+; RUN: --command %p2i --filetype=asm --assemble \
+; RUN: --disassemble --target mips32 -i %s --args -O2 --skip-unimplemented \
+; RUN: -allow-externally-defined-symbols \
+; RUN: | %if --need=target_MIPS32 --need=allow_dump \
+; RUN: --command FileCheck --check-prefix MIPS32 --check-prefix=MIPS32-OPT2 %s
+
+; RUN: %if --need=target_MIPS32 --need=allow_dump \
+; RUN: --command %p2i --filetype=asm --assemble \
+; RUN: --disassemble --target mips32 -i %s --args -Om1 --skip-unimplemented \
+; RUN: -allow-externally-defined-symbols \
+; RUN: | %if --need=target_MIPS32 --need=allow_dump \
+; RUN: --command FileCheck --check-prefix MIPS32 --check-prefix=MIPS32-OPTM1 %s
+
define internal void @fixed_416_align_16(i32 %n) {
entry:
%array = alloca i8, i32 416, align 16
@@ -50,6 +64,10 @@
; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
+; MIPS32-LABEL: fixed_416_align_16
+; MIPS32-OPT2: addiu sp,sp,-440
+; MIPS32-OPTM1: addiu sp,sp,-448
+
define internal void @fixed_416_align_32(i32 %n) {
entry:
%array = alloca i8, i32 400, align 32
@@ -72,6 +90,10 @@
; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
+; MIPS32-LABEL: fixed_416_align_32
+; MIPS32-OPT2: addiu sp,sp,-440
+; MIPS32-OPTM1: addiu sp,sp,-448
+
; Show that the amount to allocate will be rounded up.
define internal void @fixed_351_align_16(i32 %n) {
entry:
@@ -97,6 +119,10 @@
; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
+; MIPS32-LABEL: fixed_351_align_16
+; MIPS32-OPT2: addiu sp,sp,-376
+; MIPS32-OPTM1: addiu sp,sp,-384
+
define internal void @fixed_351_align_32(i32 %n) {
entry:
%array = alloca i8, i32 351, align 32
@@ -119,6 +145,10 @@
; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
+; MIPS32-LABEL: fixed_351_align_32
+; MIPS32-OPT2: addiu sp,sp,-376
+; MIPS32-OPTM1: addiu sp,sp,-384
+
declare void @f1(i32 %ignored)
declare void @f2(i32 %ignored)