ARM lowering integer divide and remainder, with div by 0 checks.
ARM normally just returns 0 when dividing by 0 with the
software and hw implementations, which is different from
what X86 does. So, for NaCl, we've modified LLVM to trap
by inserting explicit 0 checks.
Uses -mattr=hwdiv-arm attribute to decide if 32-bit
sdiv/udiv are supported.
Also lower the unreachable-inst to a trap-inst, since we
need a trap instruction for divide by 0 anyway.
Misc: fix switch test under MINIMAL=1, since ARM requires
allow_dump for filetype=asm.
Random clang-format changes...
TODO: check via cross tests
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1214693004.
diff --git a/runtime/szrt.c b/runtime/szrt.c
index 009ebe3..ec6b8cd 100644
--- a/runtime/szrt.c
+++ b/runtime/szrt.c
@@ -42,6 +42,10 @@
// Other helper calls emitted by Subzero but not implemented here:
// Compiler-rt:
+// __udivsi3 - udiv i32
+// __divsi3 - sdiv i32
+// __umodsi3 - urem i32
+// __modsi3 - srem i32
// __udivdi3 - udiv i64
// __divdi3 - sdiv i64
// __umoddi3 - urem i64
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 2620b29..0476934 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -125,6 +125,34 @@
Inst->getSrc(1)->emit(Func);
}
+void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
+ const Cfg *Func) {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(Inst->getSrcSize() == 3);
+ Str << "\t" << Opcode << Inst->getPredicate() << "\t";
+ Inst->getDest()->emit(Func);
+ Str << ", ";
+ Inst->getSrc(0)->emit(Func);
+ Str << ", ";
+ Inst->getSrc(1)->emit(Func);
+ Str << ", ";
+ Inst->getSrc(2)->emit(Func);
+}
+
+void InstARM32Pred::emitCmpLike(const char *Opcode, const InstARM32Pred *Inst,
+ const Cfg *Func) {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(Inst->getSrcSize() == 2);
+ Str << "\t" << Opcode << Inst->getPredicate() << "\t";
+ Inst->getSrc(0)->emit(Func);
+ Str << ", ";
+ Inst->getSrc(1)->emit(Func);
+}
+
OperandARM32Mem::OperandARM32Mem(Cfg * /* Func */, Type Ty, Variable *Base,
ConstantInteger32 *ImmOffset, AddrMode Mode)
: OperandARM32(kMem, Ty), Base(Base), ImmOffset(ImmOffset), Index(nullptr),
@@ -207,15 +235,19 @@
}
InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue,
- const CfgNode *TargetFalse, CondARM32::Cond Pred)
+ const CfgNode *TargetFalse,
+ const InstARM32Label *Label, CondARM32::Cond Pred)
: InstARM32Pred(Func, InstARM32::Br, 0, nullptr, Pred),
- TargetTrue(TargetTrue), TargetFalse(TargetFalse) {}
+ TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {}
bool InstARM32Br::optimizeBranch(const CfgNode *NextNode) {
// If there is no next block, then there can be no fallthrough to
// optimize.
if (NextNode == nullptr)
return false;
+ // Intra-block conditional branches can't be optimized.
+ if (Label)
+ return false;
// If there is no fallthrough node, such as a non-default case label
// for a switch instruction, then there is no opportunity to
// optimize.
@@ -264,11 +296,12 @@
addSource(CallTarget);
}
-InstARM32Cmp::InstARM32Cmp(Cfg *Func, Variable *Src1, Operand *Src2,
- CondARM32::Cond Predicate)
- : InstARM32Pred(Func, InstARM32::Cmp, 2, nullptr, Predicate) {
- addSource(Src1);
- addSource(Src2);
+InstARM32Label::InstARM32Label(Cfg *Func, TargetARM32 *Target)
+ : InstARM32(Func, InstARM32::Label, 0, nullptr),
+ Number(Target->makeNextLabelNumber()) {}
+
+IceString InstARM32Label::getName(const Cfg *Func) const {
+ return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number);
}
InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem,
@@ -277,15 +310,6 @@
addSource(Mem);
}
-InstARM32Mla::InstARM32Mla(Cfg *Func, Variable *Dest, Variable *Src0,
- Variable *Src1, Variable *Acc,
- CondARM32::Cond Predicate)
- : InstARM32Pred(Func, InstARM32::Mla, 3, Dest, Predicate) {
- addSource(Src0);
- addSource(Src1);
- addSource(Acc);
-}
-
InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
: InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
// Track modifications to Dests separately via FakeDefs.
@@ -314,6 +338,9 @@
addSource(Mem);
}
+InstARM32Trap::InstARM32Trap(Cfg *Func)
+ : InstARM32(Func, InstARM32::Trap, 0, nullptr) {}
+
InstARM32Umull::InstARM32Umull(Cfg *Func, Variable *DestLo, Variable *DestHi,
Variable *Src0, Variable *Src1,
CondARM32::Cond Predicate)
@@ -348,7 +375,15 @@
template <> const char *InstARM32Orr::Opcode = "orr";
template <> const char *InstARM32Rsb::Opcode = "rsb";
template <> const char *InstARM32Sbc::Opcode = "sbc";
+template <> const char *InstARM32Sdiv::Opcode = "sdiv";
template <> const char *InstARM32Sub::Opcode = "sub";
+template <> const char *InstARM32Udiv::Opcode = "udiv";
+// Four-addr ops
+template <> const char *InstARM32Mla::Opcode = "mla";
+template <> const char *InstARM32Mls::Opcode = "mls";
+// Cmp-like ops
+template <> const char *InstARM32Cmp::Opcode = "cmp";
+template <> const char *InstARM32Tst::Opcode = "tst";
void InstARM32::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
@@ -402,14 +437,18 @@
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\t"
<< "b" << getPredicate() << "\t";
- if (isUnconditionalBranch()) {
- Str << getTargetFalse()->getAsmName();
+ if (Label) {
+ Str << Label->getName(Func);
} else {
- Str << getTargetTrue()->getAsmName();
- if (getTargetFalse()) {
- Str << "\n\t"
- << "b"
- << "\t" << getTargetFalse()->getAsmName();
+ if (isUnconditionalBranch()) {
+ Str << getTargetFalse()->getAsmName();
+ } else {
+ Str << getTargetTrue()->getAsmName();
+ if (getTargetFalse()) {
+ Str << "\n\t"
+ << "b"
+ << "\t" << getTargetFalse()->getAsmName();
+ }
}
}
}
@@ -426,13 +465,18 @@
Str << "br ";
if (getPredicate() == CondARM32::AL) {
- Str << "label %" << getTargetFalse()->getName();
+ Str << "label %"
+ << (Label ? Label->getName(Func) : getTargetFalse()->getName());
return;
}
- Str << getPredicate() << ", label %" << getTargetTrue()->getName();
- if (getTargetFalse()) {
- Str << ", label %" << getTargetFalse()->getName();
+ if (Label) {
+ Str << "label %" << Label->getName(Func);
+ } else {
+ Str << getPredicate() << ", label %" << getTargetTrue()->getName();
+ if (getTargetFalse()) {
+ Str << ", label %" << getTargetFalse()->getName();
+ }
}
}
@@ -479,30 +523,23 @@
getCallTarget()->dump(Func);
}
-void InstARM32Cmp::emit(const Cfg *Func) const {
+void InstARM32Label::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
- assert(getSrcSize() == 2);
- Str << "\t"
- << "cmp" << getPredicate() << "\t";
- getSrc(0)->emit(Func);
- Str << ", ";
- getSrc(1)->emit(Func);
+ Str << getName(Func) << ":";
}
-void InstARM32Cmp::emitIAS(const Cfg *Func) const {
- assert(getSrcSize() == 2);
+void InstARM32Label::emitIAS(const Cfg *Func) const {
(void)Func;
llvm_unreachable("Not yet implemented");
}
-void InstARM32Cmp::dump(const Cfg *Func) const {
+void InstARM32Label::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
- dumpOpcodePred(Str, "cmp", getSrc(0)->getType());
- dumpSources(Func);
+ Str << getName(Func) << ":";
}
void InstARM32Ldr::emit(const Cfg *Func) const {
@@ -536,40 +573,6 @@
dumpSources(Func);
}
-void InstARM32Mla::emit(const Cfg *Func) const {
- if (!BuildDefs::dump())
- return;
- Ostream &Str = Func->getContext()->getStrEmit();
- assert(getSrcSize() == 3);
- assert(getDest()->hasReg());
- Str << "\t"
- << "mla" << getPredicate() << "\t";
- getDest()->emit(Func);
- Str << ", ";
- getSrc(0)->emit(Func);
- Str << ", ";
- getSrc(1)->emit(Func);
- Str << ", ";
- getSrc(2)->emit(Func);
-}
-
-void InstARM32Mla::emitIAS(const Cfg *Func) const {
- assert(getSrcSize() == 3);
- (void)Func;
- llvm_unreachable("Not yet implemented");
-}
-
-void InstARM32Mla::dump(const Cfg *Func) const {
- if (!BuildDefs::dump())
- return;
- Ostream &Str = Func->getContext()->getStrDump();
- dumpDest(Func);
- Str << " = ";
- dumpOpcodePred(Str, "mla", getDest()->getType());
- Str << " ";
- dumpSources(Func);
-}
-
template <> void InstARM32Movw::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
@@ -757,6 +760,33 @@
getSrc(0)->dump(Func);
}
+void InstARM32Trap::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(getSrcSize() == 0);
+ // There isn't a mnemonic for the special NaCl Trap encoding, so dump
+ // the raw bytes.
+ Str << "\t.long 0x";
+ ARM32::AssemblerARM32 *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+ for (uint8_t I : Asm->getNonExecBundlePadding()) {
+ Str.write_hex(I);
+ }
+}
+
+void InstARM32Trap::emitIAS(const Cfg *Func) const {
+ assert(getSrcSize() == 0);
+ (void)Func;
+ llvm_unreachable("Not yet implemented");
+}
+
+void InstARM32Trap::dump(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrDump();
+ Str << "trap";
+}
+
void InstARM32Umull::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index 8167ed4..caef19f 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -262,10 +262,12 @@
Call,
Cmp,
Eor,
+ Label,
Ldr,
Lsl,
Lsr,
Mla,
+ Mls,
Mov,
Movt,
Movw,
@@ -277,9 +279,13 @@
Ret,
Rsb,
Sbc,
+ Sdiv,
Str,
Sub,
Sxt,
+ Trap,
+ Tst,
+ Udiv,
Umull,
Uxt
};
@@ -322,6 +328,10 @@
const Cfg *Func);
static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst,
const Cfg *Func, bool SetFlags);
+ static void emitFourAddr(const char *Opcode, const InstARM32Pred *Inst,
+ const Cfg *Func);
+ static void emitCmpLike(const char *Opcode, const InstARM32Pred *Inst,
+ const Cfg *Func);
protected:
CondARM32::Cond Predicate;
@@ -477,11 +487,11 @@
// Create an ordinary binary-op instruction like add, and sub.
// Dest and Src1 must be registers.
static InstARM32ThreeAddrGPR *create(Cfg *Func, Variable *Dest,
- Variable *Src1, Operand *Src2,
+ Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate,
bool SetFlags = false) {
return new (Func->allocate<InstARM32ThreeAddrGPR>())
- InstARM32ThreeAddrGPR(Func, Dest, Src1, Src2, Predicate, SetFlags);
+ InstARM32ThreeAddrGPR(Func, Dest, Src0, Src1, Predicate, SetFlags);
}
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
@@ -505,15 +515,107 @@
static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
private:
- InstARM32ThreeAddrGPR(Cfg *Func, Variable *Dest, Variable *Src1,
- Operand *Src2, CondARM32::Cond Predicate, bool SetFlags)
+ InstARM32ThreeAddrGPR(Cfg *Func, Variable *Dest, Variable *Src0,
+ Operand *Src1, CondARM32::Cond Predicate, bool SetFlags)
: InstARM32Pred(Func, K, 2, Dest, Predicate), SetFlags(SetFlags) {
+ addSource(Src0);
+ addSource(Src1);
+ }
+
+ static const char *Opcode;
+ bool SetFlags;
+};
+
+// Instructions of the form x := a op1 (y op2 z). E.g., multiply accumulate.
+template <InstARM32::InstKindARM32 K>
+class InstARM32FourAddrGPR : public InstARM32Pred {
+ InstARM32FourAddrGPR() = delete;
+ InstARM32FourAddrGPR(const InstARM32FourAddrGPR &) = delete;
+ InstARM32FourAddrGPR &operator=(const InstARM32FourAddrGPR &) = delete;
+
+public:
+ // Every operand must be a register.
+ static InstARM32FourAddrGPR *create(Cfg *Func, Variable *Dest, Variable *Src0,
+ Variable *Src1, Variable *Src2,
+ CondARM32::Cond Predicate) {
+ return new (Func->allocate<InstARM32FourAddrGPR>())
+ InstARM32FourAddrGPR(Func, Dest, Src0, Src1, Src2, Predicate);
+ }
+ void emit(const Cfg *Func) const override {
+ if (!BuildDefs::dump())
+ return;
+ emitFourAddr(Opcode, this, Func);
+ }
+ void emitIAS(const Cfg *Func) const override {
+ (void)Func;
+ llvm::report_fatal_error("Not yet implemented");
+ }
+ void dump(const Cfg *Func) const override {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrDump();
+ dumpDest(Func);
+ Str << " = ";
+ dumpOpcodePred(Str, Opcode, getDest()->getType());
+ Str << " ";
+ dumpSources(Func);
+ }
+ static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
+
+private:
+ InstARM32FourAddrGPR(Cfg *Func, Variable *Dest, Variable *Src0,
+ Variable *Src1, Variable *Src2,
+ CondARM32::Cond Predicate)
+ : InstARM32Pred(Func, K, 3, Dest, Predicate) {
+ addSource(Src0);
addSource(Src1);
addSource(Src2);
}
static const char *Opcode;
- bool SetFlags;
+};
+
+// Instructions of the form x cmpop y (setting flags).
+template <InstARM32::InstKindARM32 K>
+class InstARM32CmpLike : public InstARM32Pred {
+ InstARM32CmpLike() = delete;
+ InstARM32CmpLike(const InstARM32CmpLike &) = delete;
+ InstARM32CmpLike &operator=(const InstARM32CmpLike &) = delete;
+
+public:
+ static InstARM32CmpLike *create(Cfg *Func, Variable *Src0, Operand *Src1,
+ CondARM32::Cond Predicate) {
+ return new (Func->allocate<InstARM32CmpLike>())
+ InstARM32CmpLike(Func, Src0, Src1, Predicate);
+ }
+ void emit(const Cfg *Func) const override {
+ if (!BuildDefs::dump())
+ return;
+ emitCmpLike(Opcode, this, Func);
+ }
+ void emitIAS(const Cfg *Func) const override {
+ (void)Func;
+ llvm_unreachable("Not yet implemented");
+ }
+ void dump(const Cfg *Func) const override {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrDump();
+ dumpOpcodePred(Str, Opcode, getSrc(0)->getType());
+ Str << " ";
+ dumpSources(Func);
+ }
+ static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
+
+private:
+ InstARM32CmpLike(Cfg *Func, Variable *Src0, Operand *Src1,
+ CondARM32::Cond Predicate)
+ : InstARM32Pred(Func, K, 2, nullptr, Predicate) {
+ addSource(Src0);
+ addSource(Src1);
+ }
+
+ static const char *Opcode;
};
typedef InstARM32ThreeAddrGPR<InstARM32::Adc> InstARM32Adc;
@@ -528,7 +630,9 @@
typedef InstARM32ThreeAddrGPR<InstARM32::Orr> InstARM32Orr;
typedef InstARM32ThreeAddrGPR<InstARM32::Rsb> InstARM32Rsb;
typedef InstARM32ThreeAddrGPR<InstARM32::Sbc> InstARM32Sbc;
+typedef InstARM32ThreeAddrGPR<InstARM32::Sdiv> InstARM32Sdiv;
typedef InstARM32ThreeAddrGPR<InstARM32::Sub> InstARM32Sub;
+typedef InstARM32ThreeAddrGPR<InstARM32::Udiv> InstARM32Udiv;
// Move instruction (variable <- flex). This is more of a pseudo-inst.
// If var is a register, then we use "mov". If var is stack, then we use
// "str" to store to the stack.
@@ -543,6 +647,35 @@
// but we aren't using that for now, so just model as a Unaryop.
typedef InstARM32UnaryopGPR<InstARM32::Sxt> InstARM32Sxt;
typedef InstARM32UnaryopGPR<InstARM32::Uxt> InstARM32Uxt;
+typedef InstARM32FourAddrGPR<InstARM32::Mla> InstARM32Mla;
+typedef InstARM32FourAddrGPR<InstARM32::Mls> InstARM32Mls;
+typedef InstARM32CmpLike<InstARM32::Cmp> InstARM32Cmp;
+typedef InstARM32CmpLike<InstARM32::Tst> InstARM32Tst;
+
+// InstARM32Label represents an intra-block label that is the target
+// of an intra-block branch. The offset between the label and the
+// branch must be fit in the instruction immediate (considered "near").
+class InstARM32Label : public InstARM32 {
+ InstARM32Label() = delete;
+ InstARM32Label(const InstARM32Label &) = delete;
+ InstARM32Label &operator=(const InstARM32Label &) = delete;
+
+public:
+ static InstARM32Label *create(Cfg *Func, TargetARM32 *Target) {
+ return new (Func->allocate<InstARM32Label>()) InstARM32Label(Func, Target);
+ }
+ uint32_t getEmitInstCount() const override { return 0; }
+ IceString getName(const Cfg *Func) const;
+ SizeT getNumber() const { return Number; }
+ void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
+ void dump(const Cfg *Func) const override;
+
+private:
+ InstARM32Label(Cfg *Func, TargetARM32 *Target);
+
+ SizeT Number; // used for unique label generation.
+};
// Direct branch instruction.
class InstARM32Br : public InstARM32Pred {
@@ -555,14 +688,16 @@
static InstARM32Br *create(Cfg *Func, CfgNode *TargetTrue,
CfgNode *TargetFalse, CondARM32::Cond Predicate) {
assert(Predicate != CondARM32::AL);
+ constexpr InstARM32Label *NoLabel = nullptr;
return new (Func->allocate<InstARM32Br>())
- InstARM32Br(Func, TargetTrue, TargetFalse, Predicate);
+ InstARM32Br(Func, TargetTrue, TargetFalse, NoLabel, Predicate);
}
// Create an unconditional branch to a node.
static InstARM32Br *create(Cfg *Func, CfgNode *Target) {
- const CfgNode *NoCondTarget = nullptr;
+ constexpr CfgNode *NoCondTarget = nullptr;
+ constexpr InstARM32Label *NoLabel = nullptr;
return new (Func->allocate<InstARM32Br>())
- InstARM32Br(Func, NoCondTarget, Target, CondARM32::AL);
+ InstARM32Br(Func, NoCondTarget, Target, NoLabel, CondARM32::AL);
}
// Create a non-terminator conditional branch to a node, with a
// fallthrough to the next instruction in the current node. This is
@@ -570,15 +705,27 @@
static InstARM32Br *create(Cfg *Func, CfgNode *Target,
CondARM32::Cond Predicate) {
assert(Predicate != CondARM32::AL);
- const CfgNode *NoUncondTarget = nullptr;
+ constexpr CfgNode *NoUncondTarget = nullptr;
+ constexpr InstARM32Label *NoLabel = nullptr;
return new (Func->allocate<InstARM32Br>())
- InstARM32Br(Func, Target, NoUncondTarget, Predicate);
+ InstARM32Br(Func, Target, NoUncondTarget, NoLabel, Predicate);
+ }
+ // Create a conditional intra-block branch (or unconditional, if
+ // Condition==AL) to a label in the current block.
+ static InstARM32Br *create(Cfg *Func, InstARM32Label *Label,
+ CondARM32::Cond Predicate) {
+ constexpr CfgNode *NoCondTarget = nullptr;
+ constexpr CfgNode *NoUncondTarget = nullptr;
+ return new (Func->allocate<InstARM32Br>())
+ InstARM32Br(Func, NoCondTarget, NoUncondTarget, Label, Predicate);
}
const CfgNode *getTargetTrue() const { return TargetTrue; }
const CfgNode *getTargetFalse() const { return TargetFalse; }
bool optimizeBranch(const CfgNode *NextNode);
uint32_t getEmitInstCount() const override {
uint32_t Sum = 0;
+ if (Label)
+ ++Sum;
if (getTargetTrue())
++Sum;
if (getTargetFalse())
@@ -596,10 +743,11 @@
private:
InstARM32Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse,
- CondARM32::Cond Predicate);
+ const InstARM32Label *Label, CondARM32::Cond Predicate);
const CfgNode *TargetTrue;
const CfgNode *TargetFalse;
+ const InstARM32Label *Label; // Intra-block branch target
};
// AdjustStack instruction - subtracts SP by the given amount and
@@ -653,28 +801,6 @@
InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget);
};
-// Integer compare instruction.
-class InstARM32Cmp : public InstARM32Pred {
- InstARM32Cmp() = delete;
- InstARM32Cmp(const InstARM32Cmp &) = delete;
- InstARM32Cmp &operator=(const InstARM32Cmp &) = delete;
-
-public:
- static InstARM32Cmp *create(Cfg *Func, Variable *Src1, Operand *Src2,
- CondARM32::Cond Predicate) {
- return new (Func->allocate<InstARM32Cmp>())
- InstARM32Cmp(Func, Src1, Src2, Predicate);
- }
- void emit(const Cfg *Func) const override;
- void emitIAS(const Cfg *Func) const override;
- void dump(const Cfg *Func) const override;
- static bool classof(const Inst *Inst) { return isClassof(Inst, Cmp); }
-
-private:
- InstARM32Cmp(Cfg *Func, Variable *Src1, Operand *Src2,
- CondARM32::Cond Predicate);
-};
-
// Load instruction.
class InstARM32Ldr : public InstARM32Pred {
InstARM32Ldr() = delete;
@@ -698,30 +824,6 @@
CondARM32::Cond Predicate);
};
-// Multiply Accumulate: d := x * y + a
-class InstARM32Mla : public InstARM32Pred {
- InstARM32Mla() = delete;
- InstARM32Mla(const InstARM32Mla &) = delete;
- InstARM32Mla &operator=(const InstARM32Mla &) = delete;
-
-public:
- // Everything must be a register.
- static InstARM32Mla *create(Cfg *Func, Variable *Dest, Variable *Src0,
- Variable *Src1, Variable *Acc,
- CondARM32::Cond Predicate) {
- return new (Func->allocate<InstARM32Mla>())
- InstARM32Mla(Func, Dest, Src0, Src1, Acc, Predicate);
- }
- void emit(const Cfg *Func) const override;
- void emitIAS(const Cfg *Func) const override;
- void dump(const Cfg *Func) const override;
- static bool classof(const Inst *Inst) { return isClassof(Inst, Mla); }
-
-private:
- InstARM32Mla(Cfg *Func, Variable *Dest, Variable *Src0, Variable *Src1,
- Variable *Acc, CondARM32::Cond Predicate);
-};
-
// Pop into a list of GPRs. Technically this can be predicated, but we don't
// need that functionality.
class InstARM32Pop : public InstARM32 {
@@ -816,6 +918,24 @@
CondARM32::Cond Predicate);
};
+class InstARM32Trap : public InstARM32 {
+ InstARM32Trap() = delete;
+ InstARM32Trap(const InstARM32Trap &) = delete;
+ InstARM32Trap &operator=(const InstARM32Trap &) = delete;
+
+public:
+ static InstARM32Trap *create(Cfg *Func) {
+ return new (Func->allocate<InstARM32Trap>()) InstARM32Trap(Func);
+ }
+ void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
+ void dump(const Cfg *Func) const override;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, Trap); }
+
+private:
+ explicit InstARM32Trap(Cfg *Func);
+};
+
// Unsigned Multiply Long: d.lo, d.hi := x * y
class InstARM32Umull : public InstARM32Pred {
InstARM32Umull() = delete;
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index ca62d79..96d801a 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -1939,7 +1939,7 @@
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
- Str << "ud2\n";
+ Str << "ud2";
}
void InstX8632Test::emit(const Cfg *Func) const {
@@ -1998,7 +1998,7 @@
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
- Str << "mfence\n";
+ Str << "mfence";
}
void InstX8632Store::emit(const Cfg *Func) const {
@@ -2512,7 +2512,6 @@
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = fstp." << getDest()->getType() << ", st(0)";
- Str << "\n";
}
template <> void InstX8632Pcmpeq::emit(const Cfg *Func) const {
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 205e573..b543208 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -353,16 +353,20 @@
const static constexpr char *H_fptoui_f64_i64 = "__Sz_fptoui_f64_i64";
const static constexpr char *H_frem_f32 = "fmodf";
const static constexpr char *H_frem_f64 = "fmod";
+ const static constexpr char *H_sdiv_i32 = "__divsi3";
const static constexpr char *H_sdiv_i64 = "__divdi3";
const static constexpr char *H_sitofp_i64_f32 = "__Sz_sitofp_i64_f32";
const static constexpr char *H_sitofp_i64_f64 = "__Sz_sitofp_i64_f64";
+ const static constexpr char *H_srem_i32 = "__modsi3";
const static constexpr char *H_srem_i64 = "__moddi3";
+ const static constexpr char *H_udiv_i32 = "__udivsi3";
const static constexpr char *H_udiv_i64 = "__udivdi3";
const static constexpr char *H_uitofp_4xi32_4xf32 = "__Sz_uitofp_4xi32_4xf32";
const static constexpr char *H_uitofp_i32_f32 = "__Sz_uitofp_i32_f32";
const static constexpr char *H_uitofp_i32_f64 = "__Sz_uitofp_i32_f64";
const static constexpr char *H_uitofp_i64_f32 = "__Sz_uitofp_i64_f32";
const static constexpr char *H_uitofp_i64_f64 = "__Sz_uitofp_i64_f64";
+ const static constexpr char *H_urem_i32 = "__umodsi3";
const static constexpr char *H_urem_i64 = "__umoddi3";
private:
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index fad9bcf..c090075 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -141,21 +141,34 @@
return Utils::applyAlignment(Value, typeAlignInBytes);
}
+// Conservatively check if at compile time we know that the operand is
+// definitely a non-zero integer.
+bool isGuaranteedNonzeroInt(const Operand *Op) {
+ if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
+ return Const->getValue() != 0;
+ }
+ return false;
+}
+
} // end of anonymous namespace
-TargetARM32::TargetARM32(Cfg *Func) : TargetLowering(Func) {
+TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
static_assert(
(ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
(TargetInstructionSet::ARM32InstructionSet_End -
TargetInstructionSet::ARM32InstructionSet_Begin),
"ARM32InstructionSet range different from TargetInstructionSet");
- if (Func->getContext()->getFlags().getTargetInstructionSet() !=
+ if (Flags.getTargetInstructionSet() !=
TargetInstructionSet::BaseInstructionSet) {
InstructionSet = static_cast<ARM32InstructionSet>(
- (Func->getContext()->getFlags().getTargetInstructionSet() -
+ (Flags.getTargetInstructionSet() -
TargetInstructionSet::ARM32InstructionSet_Begin) +
ARM32InstructionSet::Begin);
}
+}
+
+TargetARM32::TargetARM32(Cfg *Func)
+ : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
// TODO: Don't initialize IntegerRegisters and friends every time.
// Instead, initialize in some sort of static initializer for the
// class.
@@ -1009,6 +1022,75 @@
_mov(Dest, SP);
}
+void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
+ if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
+ return;
+ Variable *SrcLoReg = legalizeToVar(SrcLo);
+ switch (Ty) {
+ default:
+ llvm_unreachable("Unexpected type");
+ case IceType_i8: {
+ Operand *Mask =
+ legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
+ _tst(SrcLoReg, Mask);
+ break;
+ }
+ case IceType_i16: {
+ Operand *Mask =
+ legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex);
+ _tst(SrcLoReg, Mask);
+ break;
+ }
+ case IceType_i32: {
+ _tst(SrcLoReg, SrcLoReg);
+ break;
+ }
+ case IceType_i64: {
+ Variable *ScratchReg = makeReg(IceType_i32);
+ _orrs(ScratchReg, SrcLoReg, SrcHi);
+ // ScratchReg isn't going to be used, but we need the
+ // side-effect of setting flags from this operation.
+ Context.insert(InstFakeUse::create(Func, ScratchReg));
+ }
+ }
+ InstARM32Label *Label = InstARM32Label::create(Func, this);
+ _br(Label, CondARM32::NE);
+ _trap();
+ Context.insert(Label);
+}
+
+void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
+ Operand *Src1, ExtInstr ExtFunc,
+ DivInstr DivFunc, const char *DivHelperName,
+ bool IsRemainder) {
+ div0Check(Dest->getType(), Src1, nullptr);
+ Variable *Src1R = legalizeToVar(Src1);
+ Variable *T0R = Src0R;
+ Variable *T1R = Src1R;
+ if (Dest->getType() != IceType_i32) {
+ T0R = makeReg(IceType_i32);
+ (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
+ T1R = makeReg(IceType_i32);
+ (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
+ }
+ if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
+ (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
+ if (IsRemainder) {
+ Variable *T2 = makeReg(IceType_i32);
+ _mls(T2, T, T1R, T0R);
+ T = T2;
+ }
+ _mov(Dest, T);
+ } else {
+ constexpr SizeT MaxSrcs = 2;
+ InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
+ Call->addArg(T0R);
+ Call->addArg(T1R);
+ lowerCall(Call);
+ }
+ return;
+}
+
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
// TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
@@ -1182,9 +1264,47 @@
case InstArithmetic::Udiv:
case InstArithmetic::Sdiv:
case InstArithmetic::Urem:
- case InstArithmetic::Srem:
- UnimplementedError(Func->getContext()->getFlags());
- break;
+ case InstArithmetic::Srem: {
+ // Check for divide by 0 (ARM normally doesn't trap, but we want it
+ // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized
+ // to a register, which will hide a constant source operand.
+ // Instead, check the not-yet-legalized Src1 to optimize-out a divide
+ // by 0 check.
+ if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
+ if (C64->getValue() == 0) {
+ div0Check(IceType_i64, Src1Lo, Src1Hi);
+ }
+ } else {
+ div0Check(IceType_i64, Src1Lo, Src1Hi);
+ }
+ // Technically, ARM has their own aeabi routines, but we can use the
+ // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div,
+ // but uses the more standard __moddi3 for rem.
+ const char *HelperName = "";
+ switch (Inst->getOp()) {
+ case InstArithmetic::Udiv:
+ HelperName = H_udiv_i64;
+ break;
+ case InstArithmetic::Sdiv:
+ HelperName = H_sdiv_i64;
+ break;
+ case InstArithmetic::Urem:
+ HelperName = H_urem_i64;
+ break;
+ case InstArithmetic::Srem:
+ HelperName = H_srem_i64;
+ break;
+ default:
+ llvm_unreachable("Should have only matched div ops.");
+ break;
+ }
+ constexpr SizeT MaxSrcs = 2;
+ InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
+ Call->addArg(Inst->getSrc(0));
+ Call->addArg(Inst->getSrc(1));
+ lowerCall(Call);
+ return;
+ }
case InstArithmetic::Fadd:
case InstArithmetic::Fsub:
case InstArithmetic::Fmul:
@@ -1197,61 +1317,73 @@
UnimplementedError(Func->getContext()->getFlags());
} else { // Dest->getType() is non-i64 scalar
Variable *Src0R = legalizeToVar(Inst->getSrc(0));
- Src1 = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex);
+ Operand *Src1RF = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex);
Variable *T = makeReg(Dest->getType());
switch (Inst->getOp()) {
case InstArithmetic::_num:
llvm_unreachable("Unknown arithmetic operator");
break;
case InstArithmetic::Add: {
- _add(T, Src0R, Src1);
+ _add(T, Src0R, Src1RF);
_mov(Dest, T);
} break;
case InstArithmetic::And: {
- _and(T, Src0R, Src1);
+ _and(T, Src0R, Src1RF);
_mov(Dest, T);
} break;
case InstArithmetic::Or: {
- _orr(T, Src0R, Src1);
+ _orr(T, Src0R, Src1RF);
_mov(Dest, T);
} break;
case InstArithmetic::Xor: {
- _eor(T, Src0R, Src1);
+ _eor(T, Src0R, Src1RF);
_mov(Dest, T);
} break;
case InstArithmetic::Sub: {
- _sub(T, Src0R, Src1);
+ _sub(T, Src0R, Src1RF);
_mov(Dest, T);
} break;
case InstArithmetic::Mul: {
- Variable *Src1R = legalizeToVar(Src1);
+ Variable *Src1R = legalizeToVar(Src1RF);
_mul(T, Src0R, Src1R);
_mov(Dest, T);
} break;
case InstArithmetic::Shl:
- _lsl(T, Src0R, Src1);
+ _lsl(T, Src0R, Src1RF);
_mov(Dest, T);
break;
case InstArithmetic::Lshr:
- _lsr(T, Src0R, Src1);
+ _lsr(T, Src0R, Src1RF);
_mov(Dest, T);
break;
case InstArithmetic::Ashr:
- _asr(T, Src0R, Src1);
+ _asr(T, Src0R, Src1RF);
_mov(Dest, T);
break;
- case InstArithmetic::Udiv:
- UnimplementedError(Func->getContext()->getFlags());
- break;
- case InstArithmetic::Sdiv:
- UnimplementedError(Func->getContext()->getFlags());
- break;
- case InstArithmetic::Urem:
- UnimplementedError(Func->getContext()->getFlags());
- break;
- case InstArithmetic::Srem:
- UnimplementedError(Func->getContext()->getFlags());
- break;
+ case InstArithmetic::Udiv: {
+ constexpr bool IsRemainder = false;
+ lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt,
+ &TargetARM32::_udiv, H_udiv_i32, IsRemainder);
+ return;
+ }
+ case InstArithmetic::Sdiv: {
+ constexpr bool IsRemainder = false;
+ lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt,
+ &TargetARM32::_sdiv, H_sdiv_i32, IsRemainder);
+ return;
+ }
+ case InstArithmetic::Urem: {
+ constexpr bool IsRemainder = true;
+ lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt,
+ &TargetARM32::_udiv, H_urem_i32, IsRemainder);
+ return;
+ }
+ case InstArithmetic::Srem: {
+ constexpr bool IsRemainder = true;
+ lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt,
+ &TargetARM32::_sdiv, H_srem_i32, IsRemainder);
+ return;
+ }
case InstArithmetic::Fadd:
UnimplementedError(Func->getContext()->getFlags());
break;
@@ -1322,7 +1454,7 @@
Variable *Src0R = legalizeToVar(Cond);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
_cmp(Src0R, Zero);
- _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse());
+ _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);
}
void TargetARM32::lowerCall(const InstCall *Instr) {
@@ -2113,7 +2245,7 @@
}
void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) {
- UnimplementedError(Func->getContext()->getFlags());
+ _trap();
}
// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
@@ -2417,7 +2549,7 @@
}
TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
- : TargetHeaderLowering(Ctx) {}
+ : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
void TargetHeaderARM32::lower() {
OstreamLocker L(Ctx);
@@ -2431,12 +2563,18 @@
// sub-subsection of the first public subsection of the attributes.
Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
// Chromebooks are at least A15, but do A9 for higher compat.
- Str << ".cpu cortex-a9\n"
- << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
+ // For some reason, the LLVM ARM asm parser has the .cpu directive override
+ // the mattr specified on the commandline. So to test hwdiv, we need to set
+ // the .cpu directive higher (can't just rely on --mattr=...).
+ if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
+ Str << ".cpu cortex-a15\n";
+ } else {
+ Str << ".cpu cortex-a9\n";
+ }
+ Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
<< ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
<< ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
- // TODO(jvoung): check other CPU features like HW div.
Str << ".fpu neon\n"
<< ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
<< ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
@@ -2450,6 +2588,9 @@
<< ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
<< ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
<< ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
+ if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
+ Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
+ }
// Technically R9 is used for TLS with Sandboxing, and we reserve it.
// However, for compatibility with current NaCl LLVM, don't claim that.
Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 1691d6c..02d9080 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -22,6 +22,30 @@
namespace Ice {
+// Class encapsulating ARM cpu features / instruction set.
+class TargetARM32Features {
+ TargetARM32Features() = delete;
+ TargetARM32Features(const TargetARM32Features &) = delete;
+ TargetARM32Features &operator=(const TargetARM32Features &) = delete;
+
+public:
+ explicit TargetARM32Features(const ClFlags &Flags);
+
+ enum ARM32InstructionSet {
+ Begin,
+ // Neon is the PNaCl baseline instruction set.
+ Neon = Begin,
+ HWDivArm, // HW divide in ARM mode (not just Thumb mode).
+ End
+ };
+
+ bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
+
+private:
+ ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
+};
+
+// The target lowering logic for ARM32.
class TargetARM32 : public TargetLowering {
TargetARM32() = delete;
TargetARM32(const TargetARM32 &) = delete;
@@ -75,15 +99,9 @@
void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset, size_t &InArgsSizeBytes);
- enum ARM32InstructionSet {
- Begin,
- // Neon is the PNaCl baseline instruction set.
- Neon = Begin,
- HWDivArm, // HW divide in ARM mode (not just Thumb mode).
- End
- };
-
- ARM32InstructionSet getInstructionSet() const { return InstructionSet; }
+ bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
+ return CPUFeatures.hasFeature(I);
+ }
protected:
explicit TargetARM32(Cfg *Func);
@@ -141,6 +159,18 @@
llvm::SmallVectorImpl<int32_t> &Permutation,
const llvm::SmallBitVector &ExcludeRegisters) const override;
+ // If a divide-by-zero check is needed, inserts a:
+ // test; branch .LSKIP; trap; .LSKIP: <continuation>.
+ // If no check is needed nothing is inserted.
+ void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
+ typedef void (TargetARM32::*ExtInstr)(Variable *, Variable *,
+ CondARM32::Cond);
+ typedef void (TargetARM32::*DivInstr)(Variable *, Variable *, Variable *,
+ CondARM32::Cond);
+ void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
+ ExtInstr ExtFunc, DivInstr DivFunc,
+ const char *DivHelperName, bool IsRemainder);
+
// The following are helpers that insert lowered ARM32 instructions
// with minimal syntactic overhead, so that the lowering code can
// look as close to assembly as practical.
@@ -175,8 +205,8 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Bic::create(Func, Dest, Src0, Src1, Pred));
}
- void _br(CondARM32::Cond Condition, CfgNode *TargetTrue,
- CfgNode *TargetFalse) {
+ void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
+ CondARM32::Cond Condition) {
Context.insert(
InstARM32Br::create(Func, TargetTrue, TargetFalse, Condition));
}
@@ -186,6 +216,9 @@
void _br(CfgNode *Target, CondARM32::Cond Condition) {
Context.insert(InstARM32Br::create(Func, Target, Condition));
}
+ void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
+ Context.insert(InstARM32Br::create(Func, Label, Condition));
+ }
void _cmp(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred));
@@ -210,6 +243,10 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Mla::create(Func, Dest, Src0, Src1, Acc, Pred));
}
+ void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred));
+ }
// If Dest=nullptr is passed in, then a new variable is created,
// marked as infinite register allocation weight, and returned
// through the in/out Dest argument.
@@ -248,6 +285,12 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Orr::create(Func, Dest, Src0, Src1, Pred));
}
+ void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ const bool SetFlags = true;
+ Context.insert(
+ InstARM32Orr::create(Func, Dest, Src0, Src1, Pred, SetFlags));
+ }
void _push(const VarList &Sources) {
Context.insert(InstARM32Push::create(Func, Sources));
}
@@ -257,6 +300,9 @@
for (Variable *Dest : Dests)
Context.insert(InstFakeDef::create(Func, Dest));
}
+ void _ret(Variable *LR, Variable *Src0 = nullptr) {
+ Context.insert(InstARM32Ret::create(Func, LR, Src0));
+ }
void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred));
@@ -271,6 +317,10 @@
Context.insert(
InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags));
}
+ void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert(InstARM32Sdiv::create(Func, Dest, Src0, Src1, Pred));
+ }
void _str(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Str::create(Func, Value, Addr, Pred));
@@ -289,8 +339,14 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Sxt::create(Func, Dest, Src0, Pred));
}
- void _ret(Variable *LR, Variable *Src0 = nullptr) {
- Context.insert(InstARM32Ret::create(Func, LR, Src0));
+ void _tst(Variable *Src0, Operand *Src1,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert(InstARM32Tst::create(Func, Src0, Src1, Pred));
+ }
+ void _trap() { Context.insert(InstARM32Trap::create(Func)); }
+ void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert(InstARM32Udiv::create(Func, Dest, Src0, Src1, Pred));
}
void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
@@ -305,7 +361,7 @@
Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred));
}
- ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
+ TargetARM32Features CPUFeatures;
bool UsesFramePointer = false;
bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true;
@@ -386,6 +442,8 @@
private:
~TargetHeaderARM32() = default;
+
+ TargetARM32Features CPUFeatures;
};
} // end of namespace Ice
diff --git a/tests_lit/llvm2ice_tests/64bit.pnacl.ll b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
index 54d8ff0..2460a24 100644
--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -315,6 +315,11 @@
; OPTM1-LABEL: div64BitSigned
; OPTM1: call {{.*}} R_{{.*}} __divdi3
+;
+; ARM32-LABEL: div64BitSigned
+; ARM32: orrs {{r.*}}, {{r.*}}
+; ARM32: bne
+; ARM32: bl {{.*}} __divdi3
define internal i64 @div64BitSignedConst(i64 %a) {
entry:
@@ -330,6 +335,14 @@
; OPTM1: mov DWORD PTR [esp+0xc],0xb3a
; OPTM1: mov DWORD PTR [esp+0x8],0x73ce2ff2
; OPTM1: call {{.*}} R_{{.*}} __divdi3
+;
+; ARM32-LABEL: div64BitSignedConst
+; For a constant, we should be able to optimize-out the divide by zero check.
+; ARM32-NOT: orrs
+; ARM32: movw {{.*}} ; 0x2ff2
+; ARM32: movt {{.*}} ; 0x73ce
+; ARM32: movw {{.*}} ; 0xb3a
+; ARM32: bl {{.*}} __divdi3
define internal i64 @div64BitUnsigned(i64 %a, i64 %b) {
entry:
@@ -341,6 +354,11 @@
;
; OPTM1-LABEL: div64BitUnsigned
; OPTM1: call {{.*}} R_{{.*}} __udivdi3
+;
+; ARM32-LABEL: div64BitUnsigned
+; ARM32: orrs {{r.*}}, {{r.*}}
+; ARM32: bne
+; ARM32: bl {{.*}} __udivdi3
define internal i64 @rem64BitSigned(i64 %a, i64 %b) {
entry:
@@ -352,6 +370,11 @@
;
; OPTM1-LABEL: rem64BitSigned
; OPTM1: call {{.*}} R_{{.*}} __moddi3
+;
+; ARM32-LABEL: rem64BitSigned
+; ARM32: orrs {{r.*}}, {{r.*}}
+; ARM32: bne
+; ARM32: bl {{.*}} __moddi3
define internal i64 @rem64BitUnsigned(i64 %a, i64 %b) {
entry:
@@ -363,6 +386,11 @@
;
; OPTM1-LABEL: rem64BitUnsigned
; OPTM1: call {{.*}} R_{{.*}} __umoddi3
+;
+; ARM32-LABEL: rem64BitUnsigned
+; ARM32: orrs {{r.*}}, {{r.*}}
+; ARM32: bne
+; ARM32: bl {{.*}} __umoddi3
define internal i64 @shl64BitSigned(i64 %a, i64 %b) {
entry:
diff --git a/tests_lit/llvm2ice_tests/arith.ll b/tests_lit/llvm2ice_tests/arith.ll
index ed82426..f6318ae 100644
--- a/tests_lit/llvm2ice_tests/arith.ll
+++ b/tests_lit/llvm2ice_tests/arith.ll
@@ -8,10 +8,15 @@
; once enough infrastructure is in. Also, switch to --filetype=obj
; when possible.
; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command %p2i --filetype=asm --assemble \
-; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
+; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
+; RUN: -i %s --args -O2 --skip-unimplemented \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
+; RUN: %if --need=target_ARM32 --need=allow_dump \
+; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
+; RUN: -i %s --args -O2 --mattr=hwdiv-arm --skip-unimplemented \
+; RUN: | %if --need=target_ARM32 --need=allow_dump \
+; RUN: --command FileCheck --check-prefix ARM32HWDIV %s
define i32 @Add(i32 %a, i32 %b) {
entry:
@@ -117,10 +122,32 @@
; CHECK-LABEL: Sdiv
; CHECK: cdq
; CHECK: idiv e
+;
; ARM32-LABEL: Sdiv
-; TODO(jvoung) -- implement divide and check here.
-; The lowering needs to check if the denominator is 0 and trap, since
-; ARM normally doesn't trap on divide by 0.
+; ARM32: tst [[DENOM:r.*]], [[DENOM]]
+; ARM32: bne [[LABEL:[0-9a-f]+]]
+; ARM32: .word 0xe7fedef0
+; ARM32: [[LABEL]]: {{.*}} bl {{.*}} __divsi3
+; ARM32HWDIV-LABEL: Sdiv
+; ARM32HWDIV: tst
+; ARM32HWDIV: bne
+; ARM32HWDIV: sdiv
+
+define i32 @SdivConst(i32 %a) {
+entry:
+ %div = sdiv i32 %a, 219
+ ret i32 %div
+}
+; CHECK-LABEL: SdivConst
+; CHECK: cdq
+; CHECK: idiv e
+;
+; ARM32-LABEL: SdivConst
+; ARM32-NOT: tst
+; ARM32: bl {{.*}} __divsi3
+; ARM32HWDIV-LABEL: SdivConst
+; ARM32HWDIV-NOT: tst
+; ARM32HWDIV: sdiv
define i32 @Srem(i32 %a, i32 %b) {
entry:
@@ -130,7 +157,16 @@
; CHECK-LABEL: Srem
; CHECK: cdq
; CHECK: idiv e
+;
; ARM32-LABEL: Srem
+; ARM32: tst [[DENOM:r.*]], [[DENOM]]
+; ARM32: bne
+; ARM32: bl {{.*}} __modsi3
+; ARM32HWDIV-LABEL: Srem
+; ARM32HWDIV: tst
+; ARM32HWDIV: bne
+; ARM32HWDIV: sdiv
+; ARM32HWDIV: mls
define i32 @Udiv(i32 %a, i32 %b) {
entry:
@@ -139,7 +175,15 @@
}
; CHECK-LABEL: Udiv
; CHECK: div e
+;
; ARM32-LABEL: Udiv
+; ARM32: tst [[DENOM:r.*]], [[DENOM]]
+; ARM32: bne
+; ARM32: bl {{.*}} __udivsi3
+; ARM32HWDIV-LABEL: Udiv
+; ARM32HWDIV: tst
+; ARM32HWDIV: bne
+; ARM32HWDIV: udiv
define i32 @Urem(i32 %a, i32 %b) {
entry:
@@ -148,4 +192,13 @@
}
; CHECK-LABEL: Urem
; CHECK: div e
+;
; ARM32-LABEL: Urem
+; ARM32: tst [[DENOM:r.*]], [[DENOM]]
+; ARM32: bne
+; ARM32: bl {{.*}} __umodsi3
+; ARM32HWDIV-LABEL: Urem
+; ARM32HWDIV: tst
+; ARM32HWDIV: bne
+; ARM32HWDIV: udiv
+; ARM32HWDIV: mls
diff --git a/tests_lit/llvm2ice_tests/switch-opt.ll b/tests_lit/llvm2ice_tests/switch-opt.ll
index ed6b8b7..3cf0daf 100644
--- a/tests_lit/llvm2ice_tests/switch-opt.ll
+++ b/tests_lit/llvm2ice_tests/switch-opt.ll
@@ -5,9 +5,11 @@
; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s
; TODO(jvoung): Update to -02 once the phi assignments is done for ARM
-; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
-; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
-; RUN: | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
+; RUN: %if --need=target_ARM32 --need=allow_dump \
+; RUN: --command %p2i --filetype=asm --assemble --disassemble \
+; RUN: --target arm32 -i %s --args -Om1 --skip-unimplemented \
+; RUN: | %if --need=target_ARM32 --need=allow_dump \
+; RUN: --command FileCheck --check-prefix ARM32 %s
define i32 @testSwitch(i32 %a) {
entry:
diff --git a/tests_lit/llvm2ice_tests/unreachable.ll b/tests_lit/llvm2ice_tests/unreachable.ll
index bc08bcd..1309b6f 100644
--- a/tests_lit/llvm2ice_tests/unreachable.ll
+++ b/tests_lit/llvm2ice_tests/unreachable.ll
@@ -1,7 +1,17 @@
; This tests the basic structure of the Unreachable instruction.
-; RUN: %p2i -i %s --filetype=obj --disassemble -a -O2 | FileCheck %s
-; RUN: %p2i -i %s --filetype=obj --disassemble -a -Om1 | FileCheck %s
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN: --target x8632 -i %s --args -O2 \
+; RUN: | %if --need=target_X8632 --command FileCheck %s
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN: --target x8632 -i %s --args -Om1 \
+; RUN: | %if --need=target_X8632 --command FileCheck %s
+
+; RUN: %if --need=target_ARM32 --need=allow_dump \
+; RUN: --command %p2i --filetype=asm --assemble \
+; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
+; RUN: | %if --need=target_ARM32 --need=allow_dump \
+; RUN: --command FileCheck --check-prefix ARM32 %s
define internal i32 @divide(i32 %num, i32 %den) {
entry:
@@ -22,3 +32,9 @@
; CHECK: cdq
; CHECK: idiv
; CHECK: ret
+
+; ARM32-LABEL: divide
+; ARM32: cmp
+; ARM32: .word 0xe7fedef0
+; ARM32: bl {{.*}} __divsi3
+; ARM32: bx lr