ARM lowering integer divide and remainder, with div by 0 checks.
ARM normally just returns 0 when dividing by 0 with the
software and hw implementations, which is different from
what X86 does. So, for NaCl, we've modified LLVM to trap
by inserting explicit 0 checks.
Uses -mattr=hwdiv-arm attribute to decide if 32-bit
sdiv/udiv are supported.
Also lower the unreachable-inst to a trap-inst, since we
need a trap instruction for divide by 0 anyway.
Misc: fix switch test under MINIMAL=1, since ARM requires
allow_dump for filetype=asm.
Random clang-format changes...
TODO: check via cross tests
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1214693004.
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 1691d6c..02d9080 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -22,6 +22,30 @@
namespace Ice {
+// Class encapsulating ARM cpu features / instruction set.
+class TargetARM32Features {
+ TargetARM32Features() = delete;
+ TargetARM32Features(const TargetARM32Features &) = delete;
+ TargetARM32Features &operator=(const TargetARM32Features &) = delete;
+
+public:
+ explicit TargetARM32Features(const ClFlags &Flags);
+
+ enum ARM32InstructionSet {
+ Begin,
+ // Neon is the PNaCl baseline instruction set.
+ Neon = Begin,
+ HWDivArm, // HW divide in ARM mode (not just Thumb mode).
+ End
+ };
+
+ bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
+
+private:
+ ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
+};
+
+// The target lowering logic for ARM32.
class TargetARM32 : public TargetLowering {
TargetARM32() = delete;
TargetARM32(const TargetARM32 &) = delete;
@@ -75,15 +99,9 @@
void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset, size_t &InArgsSizeBytes);
- enum ARM32InstructionSet {
- Begin,
- // Neon is the PNaCl baseline instruction set.
- Neon = Begin,
- HWDivArm, // HW divide in ARM mode (not just Thumb mode).
- End
- };
-
- ARM32InstructionSet getInstructionSet() const { return InstructionSet; }
+ bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
+ return CPUFeatures.hasFeature(I);
+ }
protected:
explicit TargetARM32(Cfg *Func);
@@ -141,6 +159,18 @@
llvm::SmallVectorImpl<int32_t> &Permutation,
const llvm::SmallBitVector &ExcludeRegisters) const override;
+ // If a divide-by-zero check is needed, inserts a:
+ // test; branch .LSKIP; trap; .LSKIP: <continuation>.
+ // If no check is needed nothing is inserted.
+ void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
+ typedef void (TargetARM32::*ExtInstr)(Variable *, Variable *,
+ CondARM32::Cond);
+ typedef void (TargetARM32::*DivInstr)(Variable *, Variable *, Variable *,
+ CondARM32::Cond);
+ void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
+ ExtInstr ExtFunc, DivInstr DivFunc,
+ const char *DivHelperName, bool IsRemainder);
+
// The following are helpers that insert lowered ARM32 instructions
// with minimal syntactic overhead, so that the lowering code can
// look as close to assembly as practical.
@@ -175,8 +205,8 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Bic::create(Func, Dest, Src0, Src1, Pred));
}
- void _br(CondARM32::Cond Condition, CfgNode *TargetTrue,
- CfgNode *TargetFalse) {
+ void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
+ CondARM32::Cond Condition) {
Context.insert(
InstARM32Br::create(Func, TargetTrue, TargetFalse, Condition));
}
@@ -186,6 +216,9 @@
void _br(CfgNode *Target, CondARM32::Cond Condition) {
Context.insert(InstARM32Br::create(Func, Target, Condition));
}
+ void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
+ Context.insert(InstARM32Br::create(Func, Label, Condition));
+ }
void _cmp(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred));
@@ -210,6 +243,10 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Mla::create(Func, Dest, Src0, Src1, Acc, Pred));
}
+ void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred));
+ }
// If Dest=nullptr is passed in, then a new variable is created,
// marked as infinite register allocation weight, and returned
// through the in/out Dest argument.
@@ -248,6 +285,12 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Orr::create(Func, Dest, Src0, Src1, Pred));
}
+ void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ const bool SetFlags = true;
+ Context.insert(
+ InstARM32Orr::create(Func, Dest, Src0, Src1, Pred, SetFlags));
+ }
void _push(const VarList &Sources) {
Context.insert(InstARM32Push::create(Func, Sources));
}
@@ -257,6 +300,9 @@
for (Variable *Dest : Dests)
Context.insert(InstFakeDef::create(Func, Dest));
}
+ void _ret(Variable *LR, Variable *Src0 = nullptr) {
+ Context.insert(InstARM32Ret::create(Func, LR, Src0));
+ }
void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred));
@@ -271,6 +317,10 @@
Context.insert(
InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
+ void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert(InstARM32Sdiv::create(Func, Dest, Src0, Src1, Pred));
+ }
void _str(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Str::create(Func, Value, Addr, Pred));
@@ -289,8 +339,14 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Sxt::create(Func, Dest, Src0, Pred));
}
- void _ret(Variable *LR, Variable *Src0 = nullptr) {
- Context.insert(InstARM32Ret::create(Func, LR, Src0));
+ void _tst(Variable *Src0, Operand *Src1,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert(InstARM32Tst::create(Func, Src0, Src1, Pred));
+ }
+ void _trap() { Context.insert(InstARM32Trap::create(Func)); }
+ void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert(InstARM32Udiv::create(Func, Dest, Src0, Src1, Pred));
}
void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
@@ -305,7 +361,7 @@
Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred));
}
- ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
+ TargetARM32Features CPUFeatures;
bool UsesFramePointer = false;
bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true;
@@ -386,6 +442,8 @@
private:
~TargetHeaderARM32() = default;
+
+ TargetARM32Features CPUFeatures;
};
} // end of namespace Ice