Subzero. ARM32. Folding rematerializable offsets in address operands.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1477873002 .
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 76fc99e..d260db0 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -681,10 +681,10 @@
     return;
   Func->dump("After stack frame mapping");
 
-  legalizeStackSlots();
+  postLowerLegalization();
   if (Func->hasError())
     return;
-  Func->dump("After legalizeStackSlots");
+  Func->dump("After postLowerLegalization");
 
   Func->contractEmptyNodes();
   Func->reorderNodes();
@@ -746,10 +746,10 @@
     return;
   Func->dump("After stack frame mapping");
 
-  legalizeStackSlots();
+  postLowerLegalization();
   if (Func->hasError())
     return;
-  Func->dump("After legalizeStackSlots");
+  Func->dump("After postLowerLegalization");
 
   // Nop insertion
   if (Ctx->getFlags().shouldDoNopInsertion()) {
@@ -1336,55 +1336,101 @@
   return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
 }
 
-Variable *TargetARM32::newBaseRegister(int32_t Offset, Variable *OrigBaseReg) {
+Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister(
+    Variable *Base, int32_t Offset, int32_t ScratchRegNum) {
   // Legalize will likely need a movw/movt combination, but if the top bits are
   // all 0 from negating the offset and subtracting, we could use that instead.
-  bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
-  if (ShouldSub)
-    Offset = -Offset;
-  Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),
-                                Legal_Reg | Legal_Flex, getReservedTmpReg());
-  Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());
-  if (ShouldSub)
-    _sub(ScratchReg, OrigBaseReg, OffsetVal);
-  else
-    _add(ScratchReg, OrigBaseReg, OffsetVal);
+  const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
+  Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
+  if (ShouldSub) {
+    Operand *OffsetVal =
+        Target->legalize(Target->Ctx->getConstantInt32(-Offset),
+                         Legal_Reg | Legal_Flex, ScratchRegNum);
+    Target->_sub(ScratchReg, Base, OffsetVal);
+  } else {
+    Operand *OffsetVal =
+        Target->legalize(Target->Ctx->getConstantInt32(Offset),
+                         Legal_Reg | Legal_Flex, ScratchRegNum);
+    Target->_add(ScratchReg, Base, OffsetVal);
+  }
+
+  if (ScratchRegNum == Target->getReservedTmpReg()) {
+    const bool BaseIsStackOrFramePtr =
+        Base->getRegNum() == static_cast<int32_t>(Target->getFrameOrStackReg());
+    // There is currently no code path that would trigger this assertion, so we
+    // leave this assertion here in case it is ever violated. This is not a
+    // fatal error (thus the use of assert() and not llvm::report_fatal_error)
+    // as the program compiled by subzero will still work correctly.
+    assert(BaseIsStackOrFramePtr);
+    // Side-effect: updates TempBase to reflect the new Temporary.
+    if (BaseIsStackOrFramePtr) {
+      TempBaseReg = ScratchReg;
+      TempBaseOffset = Offset;
+    } else {
+      TempBaseReg = nullptr;
+      TempBaseOffset = 0;
+    }
+  }
+
   return ScratchReg;
 }
 
-OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
-                                               Variable *OrigBaseReg,
-                                               Variable **NewBaseReg,
-                                               int32_t *NewBaseOffset) {
-  assert(!OrigBaseReg->isRematerializable());
-  if (isLegalMemOffset(Ty, Offset)) {
+OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand(
+    Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) {
+  assert(!Base->isRematerializable());
+  if (AllowOffsets && Target->isLegalMemOffset(Ty, Offset)) {
     return OperandARM32Mem::create(
-        Func, Ty, OrigBaseReg,
-        llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)),
+        Target->Func, Ty, Base,
+        llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)),
         OperandARM32Mem::Offset);
   }
 
-  if (*NewBaseReg == nullptr) {
-    *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);
-    *NewBaseOffset = Offset;
+  if (!AllowOffsets || TempBaseReg == nullptr) {
+    newBaseRegister(Base, Offset, Target->getReservedTmpReg());
   }
 
-  int32_t OffsetDiff = Offset - *NewBaseOffset;
-  if (!isLegalMemOffset(Ty, OffsetDiff)) {
-    *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);
-    *NewBaseOffset = Offset;
+  int32_t OffsetDiff = Offset - TempBaseOffset;
+  assert(AllowOffsets || OffsetDiff == 0);
+
+  if (!Target->isLegalMemOffset(Ty, OffsetDiff)) {
+    newBaseRegister(Base, Offset, Target->getReservedTmpReg());
     OffsetDiff = 0;
   }
 
-  assert(!(*NewBaseReg)->isRematerializable());
+  assert(!TempBaseReg->isRematerializable());
   return OperandARM32Mem::create(
-      Func, Ty, *NewBaseReg,
-      llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)),
+      Target->Func, Ty, TempBaseReg,
+      llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(OffsetDiff)),
       OperandARM32Mem::Offset);
 }
 
-void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
-                              Variable **NewBaseReg, int32_t *NewBaseOffset) {
+void TargetARM32::PostLoweringLegalizer::resetTempBaseIfClobberedBy(
+    const Inst *Instr) {
+  bool ClobbersTempBase = false;
+  if (TempBaseReg != nullptr) {
+    Variable *Dest = Instr->getDest();
+    if (llvm::isa<InstARM32Call>(Instr)) {
+      // The following assertion is an invariant, so we remove it from the if
+      // test. If the invariant is ever broken/invalidated/changed, remember
+      // to add it back to the if condition.
+      assert(TempBaseReg->getRegNum() == Target->getReservedTmpReg());
+      // The linker may need to clobber IP if the call is too far from PC. Thus,
+      // we assume IP will be overwritten.
+      ClobbersTempBase = true;
+    } else if (Dest != nullptr &&
+               Dest->getRegNum() == TempBaseReg->getRegNum()) {
+      // Register redefinition.
+      ClobbersTempBase = true;
+    }
+  }
+
+  if (ClobbersTempBase) {
+    TempBaseReg = nullptr;
+    TempBaseOffset = 0;
+  }
+}
+
+void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
   Variable *Dest = MovInstr->getDest();
   assert(Dest != nullptr);
   Type DestTy = Dest->getType();
@@ -1405,31 +1451,33 @@
     assert(!SrcR->isRematerializable());
     const int32_t Offset = Dest->getStackOffset();
     // This is a _mov(Mem(), Variable), i.e., a store.
-    _str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
-                                NewBaseOffset),
-         MovInstr->getPredicate());
+    Target->_str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
+                 MovInstr->getPredicate());
     // _str() does not have a Dest, so we add a fake-def(Dest).
-    Context.insert(InstFakeDef::create(Func, Dest));
+    Target->Context.insert(InstFakeDef::create(Target->Func, Dest));
     Legalized = true;
   } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
     if (Var->isRematerializable()) {
-      // Rematerialization arithmetic.
+      // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
+
+      // ExtraOffset is only needed for frame-pointer based frames as we have
+      // to account for spill storage.
       const int32_t ExtraOffset =
-          (static_cast<SizeT>(Var->getRegNum()) == getFrameReg())
-              ? getFrameFixedAllocaOffset()
+          (static_cast<SizeT>(Var->getRegNum()) == Target->getFrameReg())
+              ? Target->getFrameFixedAllocaOffset()
               : 0;
 
       const int32_t Offset = Var->getStackOffset() + ExtraOffset;
-      Operand *OffsetRF = legalize(Ctx->getConstantInt32(Offset),
-                                   Legal_Reg | Legal_Flex, Dest->getRegNum());
-      _add(Dest, Var, OffsetRF);
+      Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
+      Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
+      Target->_mov(Dest, T);
       Legalized = true;
     } else {
       if (!Var->hasReg()) {
+        // This is a _mov(Variable, Mem()), i.e., a load.
         const int32_t Offset = Var->getStackOffset();
-        _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
-                                    NewBaseOffset),
-             MovInstr->getPredicate());
+        Target->_ldr(Dest, createMemOperand(DestTy, StackOrFrameReg, Offset),
+                     MovInstr->getPredicate());
         Legalized = true;
       }
     }
@@ -1437,13 +1485,105 @@
 
   if (Legalized) {
     if (MovInstr->isDestRedefined()) {
-      _set_dest_redefined();
+      Target->_set_dest_redefined();
     }
     MovInstr->setDeleted();
   }
 }
 
-void TargetARM32::legalizeStackSlots() {
+// ARM32 address modes:
+//  ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12],
+//                    [reg +/- reg << shamt5]
+//  ld/st f[32|64]  : [reg], [reg +/- imm8] , [pc +/- imm8]
+//  ld/st vectors   : [reg]
+//
+// For now, we don't handle address modes with Relocatables.
+namespace {
+// MemTraits contains per-type valid address mode information.
+#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr)  \
+  static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
+ICETYPEARM32_TABLE
+#undef X
+
+static const struct {
+  int32_t ValidImmMask;
+  bool CanHaveImm;
+  bool CanHaveIndex;
+  bool CanHaveShiftedIndex;
+} MemTraits[] = {
+#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr)  \
+  { (1 << ubits) - 1, (ubits) > 0, rraddr, shaddr, }                           \
+  ,
+    ICETYPEARM32_TABLE
+#undef X
+};
+static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits);
+} // end of anonymous namespace
+
+OperandARM32Mem *
+TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem,
+                                                       bool AllowOffsets) {
+  assert(!Mem->isRegReg() || !Mem->getIndex()->isRematerializable());
+  assert(
+      Mem->isRegReg() ||
+      Target->isLegalMemOffset(Mem->getType(), Mem->getOffset()->getValue()));
+
+  bool Legalized = false;
+  Variable *Base = Mem->getBase();
+  int32_t Offset = Mem->isRegReg() ? 0 : Mem->getOffset()->getValue();
+  if (Base->isRematerializable()) {
+    const int32_t ExtraOffset =
+        (static_cast<SizeT>(Base->getRegNum()) == Target->getFrameReg())
+            ? Target->getFrameFixedAllocaOffset()
+            : 0;
+    Offset += Base->getStackOffset() + ExtraOffset;
+    Base = Target->getPhysicalRegister(Base->getRegNum());
+    assert(!Base->isRematerializable());
+    Legalized = true;
+  }
+
+  if (!Legalized) {
+    return nullptr;
+  }
+
+  if (!Mem->isRegReg()) {
+    return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets);
+  }
+
+  assert(MemTraits[Mem->getType()].CanHaveIndex);
+
+  if (Offset != 0) {
+    if (TempBaseReg == nullptr) {
+      Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
+    } else {
+      uint32_t Imm8, Rotate;
+      const int32_t OffsetDiff = Offset - TempBaseOffset;
+      if (OffsetDiff == 0) {
+        Base = TempBaseReg;
+      } else if (OperandARM32FlexImm::canHoldImm(OffsetDiff, &Rotate, &Imm8)) {
+        auto *OffsetDiffF = OperandARM32FlexImm::create(
+            Target->Func, IceType_i32, Imm8, Rotate);
+        Target->_add(TempBaseReg, TempBaseReg, OffsetDiffF);
+        TempBaseOffset += OffsetDiff;
+        Base = TempBaseReg;
+      } else if (OperandARM32FlexImm::canHoldImm(-OffsetDiff, &Rotate, &Imm8)) {
+        auto *OffsetDiffF = OperandARM32FlexImm::create(
+            Target->Func, IceType_i32, Imm8, Rotate);
+        Target->_sub(TempBaseReg, TempBaseReg, OffsetDiffF);
+        TempBaseOffset += OffsetDiff;
+        Base = TempBaseReg;
+      } else {
+        Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
+      }
+    }
+  }
+
+  return OperandARM32Mem::create(Target->Func, Mem->getType(), Base,
+                                 Mem->getIndex(), Mem->getShiftOp(),
+                                 Mem->getShiftAmt(), Mem->getAddrMode());
+}
+
+void TargetARM32::postLowerLegalization() {
   // If a stack variable's frame offset doesn't fit, convert from:
   //   ldr X, OFF[SP]
   // to:
@@ -1453,9 +1593,8 @@
   //
   // This is safe because we have reserved TMP, and add for ARM does not
   // clobber the flags register.
-  Func->dump("Before legalizeStackSlots");
+  Func->dump("Before postLowerLegalization");
   assert(hasComputedFrame());
-  Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
   // Do a fairly naive greedy clustering for now. Pick the first stack slot
   // that's out of bounds and make a new base reg using the architecture's temp
   // register. If that works for the next slot, then great. Otherwise, create a
@@ -1467,24 +1606,53 @@
   // don't depend on this legalization.
   for (CfgNode *Node : Func->getNodes()) {
     Context.init(Node);
-    Variable *NewBaseReg = nullptr;
-    int32_t NewBaseOffset = 0;
+    // One legalizer per basic block, otherwise we would share the Temporary
+    // Base Register between basic blocks.
+    PostLoweringLegalizer Legalizer(this);
     while (!Context.atEnd()) {
       PostIncrLoweringContext PostIncrement(Context);
       Inst *CurInstr = Context.getCur();
-      Variable *Dest = CurInstr->getDest();
 
-      // Check if the previous NewBaseReg is clobbered, and reset if needed.
-      if ((Dest && NewBaseReg && Dest->hasReg() &&
-           Dest->getRegNum() == NewBaseReg->getBaseRegNum()) ||
-          llvm::isa<InstFakeKill>(CurInstr)) {
-        NewBaseReg = nullptr;
-        NewBaseOffset = 0;
-      }
+      // Check if the previous TempBaseReg is clobbered, and reset if needed.
+      Legalizer.resetTempBaseIfClobberedBy(CurInstr);
 
       if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
-        legalizeMov(MovInstr, OrigBaseReg, &NewBaseReg, &NewBaseOffset);
+        Legalizer.legalizeMov(MovInstr);
+      } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) {
+        if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
+                llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) {
+          _ldr(CurInstr->getDest(), LegalMem, LdrInstr->getPredicate());
+          CurInstr->setDeleted();
+        }
+      } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) {
+        constexpr bool DisallowOffsetsBecauseLdrex = false;
+        if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
+                llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)),
+                DisallowOffsetsBecauseLdrex)) {
+          _ldrex(CurInstr->getDest(), LegalMem, LdrexInstr->getPredicate());
+          CurInstr->setDeleted();
+        }
+      } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) {
+        if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
+                llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) {
+          _str(llvm::cast<Variable>(CurInstr->getSrc(0)), LegalMem,
+               StrInstr->getPredicate());
+          CurInstr->setDeleted();
+        }
+      } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) {
+        constexpr bool DisallowOffsetsBecauseStrex = false;
+        if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
+                llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)),
+                DisallowOffsetsBecauseStrex)) {
+          _strex(CurInstr->getDest(), llvm::cast<Variable>(CurInstr->getSrc(0)),
+                 LegalMem, StrexInstr->getPredicate());
+          CurInstr->setDeleted();
+        }
       }
+
+      // Sanity-check: the Legalizer will either have no Temp, or it will be
+      // bound to IP.
+      Legalizer.assertNoTempOrAssignedToIP();
     }
   }
 }
@@ -1502,15 +1670,14 @@
     // increment) in case of duplication.
     assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
            Mem->getAddrMode() == OperandARM32Mem::NegOffset);
-    Variable *BaseR = legalizeToReg(Mem->getBase());
     if (Mem->isRegReg()) {
       Variable *IndexR = legalizeToReg(Mem->getIndex());
-      return OperandARM32Mem::create(Func, IceType_i32, BaseR, IndexR,
+      return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), IndexR,
                                      Mem->getShiftOp(), Mem->getShiftAmt(),
                                      Mem->getAddrMode());
     } else {
-      return OperandARM32Mem::create(Func, IceType_i32, BaseR, Mem->getOffset(),
-                                     Mem->getAddrMode());
+      return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
+                                     Mem->getOffset(), Mem->getAddrMode());
     }
   }
   llvm::report_fatal_error("Unsupported operand type");
@@ -4791,35 +4958,6 @@
 }
 } // end of anonymous namespace
 
-// ARM32 address modes:
-//  ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12],
-//                    [reg +/- reg << shamt5]
-//  ld/st f[32|64]  : [reg], [reg +/- imm8] , [pc +/- imm8]
-//  ld/st vectors   : [reg]
-//
-// For now, we don't handle address modes with Relocatables.
-namespace {
-// MemTraits contains per-type valid address mode information.
-#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr)  \
-  static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
-ICETYPEARM32_TABLE
-#undef X
-
-static const struct {
-  int32_t ValidImmMask;
-  bool CanHaveImm;
-  bool CanHaveIndex;
-  bool CanHaveShiftedIndex;
-} MemTraits[] = {
-#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr)  \
-  { (1 << ubits) - 1, (ubits) > 0, rraddr, shaddr, }                           \
-  ,
-    ICETYPEARM32_TABLE
-#undef X
-};
-static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits);
-} // end of anonymous namespace
-
 OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
                                                  const Inst *LdSt,
                                                  Operand *Base) {
@@ -4955,17 +5093,15 @@
   assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
                        : (ValidImmMask & OffsetImm) == OffsetImm);
 
-  Variable *BaseR = makeReg(getPointerType());
-  Context.insert(InstAssign::create(Func, BaseR, BaseVar));
   if (OffsetReg != nullptr) {
     Variable *OffsetR = makeReg(getPointerType());
     Context.insert(InstAssign::create(Func, OffsetR, OffsetReg));
-    return OperandARM32Mem::create(Func, Ty, BaseR, OffsetR, ShiftKind,
+    return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind,
                                    OffsetRegShamt);
   }
 
   return OperandARM32Mem::create(
-      Func, Ty, BaseR,
+      Func, Ty, BaseVar,
       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
 }
 
@@ -5189,7 +5325,8 @@
     Variable *RegBase = nullptr;
     Variable *RegIndex = nullptr;
     assert(Base);
-    RegBase = legalizeToReg(Base);
+    RegBase = llvm::cast<Variable>(
+        legalize(Base, Legal_Reg | Legal_Rematerializable));
     assert(Ty < MemTraitsSize);
     if (Index) {
       assert(Offset == nullptr);
@@ -5324,6 +5461,10 @@
 
   if (auto *Var = llvm::dyn_cast<Variable>(From)) {
     if (Var->isRematerializable()) {
+      if (Allowed & Legal_Rematerializable) {
+        return From;
+      }
+
       // TODO(jpp): We don't need to rematerialize Var if legalize() was invoked
       // for a Variable in a Mem operand.
       Variable *T = makeReg(Var->getType(), RegNum);
@@ -5386,9 +5527,10 @@
   // If we didn't do address mode optimization, then we only have a
   // base/offset to work with. ARM always requires a base register, so
   // just use that to hold the operand.
-  Variable *BaseR = legalizeToReg(Operand);
+  Variable *Base = llvm::cast<Variable>(
+      legalize(Operand, Legal_Reg | Legal_Rematerializable));
   return OperandARM32Mem::create(
-      Func, Ty, BaseR,
+      Func, Ty, Base,
       llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
 }