ARM lowering integer divide and remainder, with div by 0 checks. ARM normally just returns 0 when dividing by 0 with the software and hw implementations, which is different from what X86 does. So, for NaCl, we've modified LLVM to trap by inserting explicit 0 checks. Uses -mattr=hwdiv-arm attribute to decide if 32-bit sdiv/udiv are supported. Also lower the unreachable-inst to a trap-inst, since we need a trap instruction for divide by 0 anyway. Misc: fix switch test under MINIMAL=1, since ARM requires allow_dump for filetype=asm. Random clang-format changes... TODO: check via cross tests BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1214693004.
diff --git a/runtime/szrt.c b/runtime/szrt.c index 009ebe3..ec6b8cd 100644 --- a/runtime/szrt.c +++ b/runtime/szrt.c
@@ -42,6 +42,10 @@ // Other helper calls emitted by Subzero but not implemented here: // Compiler-rt: +// __udivsi3 - udiv i32 +// __divsi3 - sdiv i32 +// __umodsi3 - urem i32 +// __modsi3 - srem i32 // __udivdi3 - udiv i64 // __divdi3 - sdiv i64 // __umoddi3 - urem i64
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp index 2620b29..0476934 100644 --- a/src/IceInstARM32.cpp +++ b/src/IceInstARM32.cpp
@@ -125,6 +125,34 @@ Inst->getSrc(1)->emit(Func); } +void InstARM32Pred::emitFourAddr(const char *Opcode, const InstARM32Pred *Inst, + const Cfg *Func) { + if (!BuildDefs::dump()) + return; + Ostream &Str = Func->getContext()->getStrEmit(); + assert(Inst->getSrcSize() == 3); + Str << "\t" << Opcode << Inst->getPredicate() << "\t"; + Inst->getDest()->emit(Func); + Str << ", "; + Inst->getSrc(0)->emit(Func); + Str << ", "; + Inst->getSrc(1)->emit(Func); + Str << ", "; + Inst->getSrc(2)->emit(Func); +} + +void InstARM32Pred::emitCmpLike(const char *Opcode, const InstARM32Pred *Inst, + const Cfg *Func) { + if (!BuildDefs::dump()) + return; + Ostream &Str = Func->getContext()->getStrEmit(); + assert(Inst->getSrcSize() == 2); + Str << "\t" << Opcode << Inst->getPredicate() << "\t"; + Inst->getSrc(0)->emit(Func); + Str << ", "; + Inst->getSrc(1)->emit(Func); +} + OperandARM32Mem::OperandARM32Mem(Cfg * /* Func */, Type Ty, Variable *Base, ConstantInteger32 *ImmOffset, AddrMode Mode) : OperandARM32(kMem, Ty), Base(Base), ImmOffset(ImmOffset), Index(nullptr), @@ -207,15 +235,19 @@ } InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue, - const CfgNode *TargetFalse, CondARM32::Cond Pred) + const CfgNode *TargetFalse, + const InstARM32Label *Label, CondARM32::Cond Pred) : InstARM32Pred(Func, InstARM32::Br, 0, nullptr, Pred), - TargetTrue(TargetTrue), TargetFalse(TargetFalse) {} + TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {} bool InstARM32Br::optimizeBranch(const CfgNode *NextNode) { // If there is no next block, then there can be no fallthrough to // optimize. if (NextNode == nullptr) return false; + // Intra-block conditional branches can't be optimized. + if (Label) + return false; // If there is no fallthrough node, such as a non-default case label // for a switch instruction, then there is no opportunity to // optimize. @@ -264,11 +296,12 @@ addSource(CallTarget); } -InstARM32Cmp::InstARM32Cmp(Cfg *Func, Variable *Src1, Operand *Src2, - CondARM32::Cond Predicate) - : InstARM32Pred(Func, InstARM32::Cmp, 2, nullptr, Predicate) { - addSource(Src1); - addSource(Src2); +InstARM32Label::InstARM32Label(Cfg *Func, TargetARM32 *Target) + : InstARM32(Func, InstARM32::Label, 0, nullptr), + Number(Target->makeNextLabelNumber()) {} + +IceString InstARM32Label::getName(const Cfg *Func) const { + return ".L" + Func->getFunctionName() + "$local$__" + std::to_string(Number); } InstARM32Ldr::InstARM32Ldr(Cfg *Func, Variable *Dest, OperandARM32Mem *Mem, @@ -277,15 +310,6 @@ addSource(Mem); } -InstARM32Mla::InstARM32Mla(Cfg *Func, Variable *Dest, Variable *Src0, - Variable *Src1, Variable *Acc, - CondARM32::Cond Predicate) - : InstARM32Pred(Func, InstARM32::Mla, 3, Dest, Predicate) { - addSource(Src0); - addSource(Src1); - addSource(Acc); -} - InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests) : InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) { // Track modifications to Dests separately via FakeDefs. @@ -314,6 +338,9 @@ addSource(Mem); } +InstARM32Trap::InstARM32Trap(Cfg *Func) + : InstARM32(Func, InstARM32::Trap, 0, nullptr) {} + InstARM32Umull::InstARM32Umull(Cfg *Func, Variable *DestLo, Variable *DestHi, Variable *Src0, Variable *Src1, CondARM32::Cond Predicate) @@ -348,7 +375,15 @@ template <> const char *InstARM32Orr::Opcode = "orr"; template <> const char *InstARM32Rsb::Opcode = "rsb"; template <> const char *InstARM32Sbc::Opcode = "sbc"; +template <> const char *InstARM32Sdiv::Opcode = "sdiv"; template <> const char *InstARM32Sub::Opcode = "sub"; +template <> const char *InstARM32Udiv::Opcode = "udiv"; +// Four-addr ops +template <> const char *InstARM32Mla::Opcode = "mla"; +template <> const char *InstARM32Mls::Opcode = "mls"; +// Cmp-like ops +template <> const char *InstARM32Cmp::Opcode = "cmp"; +template <> const char *InstARM32Tst::Opcode = "tst"; void InstARM32::dump(const Cfg *Func) const { if (!BuildDefs::dump()) @@ -402,14 +437,18 @@ Ostream &Str = Func->getContext()->getStrEmit(); Str << "\t" << "b" << getPredicate() << "\t"; - if (isUnconditionalBranch()) { - Str << getTargetFalse()->getAsmName(); + if (Label) { + Str << Label->getName(Func); } else { - Str << getTargetTrue()->getAsmName(); - if (getTargetFalse()) { - Str << "\n\t" - << "b" - << "\t" << getTargetFalse()->getAsmName(); + if (isUnconditionalBranch()) { + Str << getTargetFalse()->getAsmName(); + } else { + Str << getTargetTrue()->getAsmName(); + if (getTargetFalse()) { + Str << "\n\t" + << "b" + << "\t" << getTargetFalse()->getAsmName(); + } } } } @@ -426,13 +465,18 @@ Str << "br "; if (getPredicate() == CondARM32::AL) { - Str << "label %" << getTargetFalse()->getName(); + Str << "label %" + << (Label ? Label->getName(Func) : getTargetFalse()->getName()); return; } - Str << getPredicate() << ", label %" << getTargetTrue()->getName(); - if (getTargetFalse()) { - Str << ", label %" << getTargetFalse()->getName(); + if (Label) { + Str << "label %" << Label->getName(Func); + } else { + Str << getPredicate() << ", label %" << getTargetTrue()->getName(); + if (getTargetFalse()) { + Str << ", label %" << getTargetFalse()->getName(); + } } } @@ -479,30 +523,23 @@ getCallTarget()->dump(Func); } -void InstARM32Cmp::emit(const Cfg *Func) const { +void InstARM32Label::emit(const Cfg *Func) const { if (!BuildDefs::dump()) return; Ostream &Str = Func->getContext()->getStrEmit(); - assert(getSrcSize() == 2); - Str << "\t" - << "cmp" << getPredicate() << "\t"; - getSrc(0)->emit(Func); - Str << ", "; - getSrc(1)->emit(Func); + Str << getName(Func) << ":"; } -void InstARM32Cmp::emitIAS(const Cfg *Func) const { - assert(getSrcSize() == 2); +void InstARM32Label::emitIAS(const Cfg *Func) const { (void)Func; llvm_unreachable("Not yet implemented"); } -void InstARM32Cmp::dump(const Cfg *Func) const { +void InstARM32Label::dump(const Cfg *Func) const { if (!BuildDefs::dump()) return; Ostream &Str = Func->getContext()->getStrDump(); - dumpOpcodePred(Str, "cmp", getSrc(0)->getType()); - dumpSources(Func); + Str << getName(Func) << ":"; } void InstARM32Ldr::emit(const Cfg *Func) const { @@ -536,40 +573,6 @@ dumpSources(Func); } -void InstARM32Mla::emit(const Cfg *Func) const { - if (!BuildDefs::dump()) - return; - Ostream &Str = Func->getContext()->getStrEmit(); - assert(getSrcSize() == 3); - assert(getDest()->hasReg()); - Str << "\t" - << "mla" << getPredicate() << "\t"; - getDest()->emit(Func); - Str << ", "; - getSrc(0)->emit(Func); - Str << ", "; - getSrc(1)->emit(Func); - Str << ", "; - getSrc(2)->emit(Func); -} - -void InstARM32Mla::emitIAS(const Cfg *Func) const { - assert(getSrcSize() == 3); - (void)Func; - llvm_unreachable("Not yet implemented"); -} - -void InstARM32Mla::dump(const Cfg *Func) const { - if (!BuildDefs::dump()) - return; - Ostream &Str = Func->getContext()->getStrDump(); - dumpDest(Func); - Str << " = "; - dumpOpcodePred(Str, "mla", getDest()->getType()); - Str << " "; - dumpSources(Func); -} - template <> void InstARM32Movw::emit(const Cfg *Func) const { if (!BuildDefs::dump()) return; @@ -757,6 +760,33 @@ getSrc(0)->dump(Func); } +void InstARM32Trap::emit(const Cfg *Func) const { + if (!BuildDefs::dump()) + return; + Ostream &Str = Func->getContext()->getStrEmit(); + assert(getSrcSize() == 0); + // There isn't a mnemonic for the special NaCl Trap encoding, so dump + // the raw bytes. + Str << "\t.long 0x"; + ARM32::AssemblerARM32 *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); + for (uint8_t I : Asm->getNonExecBundlePadding()) { + Str.write_hex(I); + } +} + +void InstARM32Trap::emitIAS(const Cfg *Func) const { + assert(getSrcSize() == 0); + (void)Func; + llvm_unreachable("Not yet implemented"); +} + +void InstARM32Trap::dump(const Cfg *Func) const { + if (!BuildDefs::dump()) + return; + Ostream &Str = Func->getContext()->getStrDump(); + Str << "trap"; +} + void InstARM32Umull::emit(const Cfg *Func) const { if (!BuildDefs::dump()) return;
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h index 8167ed4..caef19f 100644 --- a/src/IceInstARM32.h +++ b/src/IceInstARM32.h
@@ -262,10 +262,12 @@ Call, Cmp, Eor, + Label, Ldr, Lsl, Lsr, Mla, + Mls, Mov, Movt, Movw, @@ -277,9 +279,13 @@ Ret, Rsb, Sbc, + Sdiv, Str, Sub, Sxt, + Trap, + Tst, + Udiv, Umull, Uxt }; @@ -322,6 +328,10 @@ const Cfg *Func); static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst, const Cfg *Func, bool SetFlags); + static void emitFourAddr(const char *Opcode, const InstARM32Pred *Inst, + const Cfg *Func); + static void emitCmpLike(const char *Opcode, const InstARM32Pred *Inst, + const Cfg *Func); protected: CondARM32::Cond Predicate; @@ -477,11 +487,11 @@ // Create an ordinary binary-op instruction like add, and sub. // Dest and Src1 must be registers. static InstARM32ThreeAddrGPR *create(Cfg *Func, Variable *Dest, - Variable *Src1, Operand *Src2, + Variable *Src0, Operand *Src1, CondARM32::Cond Predicate, bool SetFlags = false) { return new (Func->allocate<InstARM32ThreeAddrGPR>()) - InstARM32ThreeAddrGPR(Func, Dest, Src1, Src2, Predicate, SetFlags); + InstARM32ThreeAddrGPR(Func, Dest, Src0, Src1, Predicate, SetFlags); } void emit(const Cfg *Func) const override { if (!BuildDefs::dump()) @@ -505,15 +515,107 @@ static bool classof(const Inst *Inst) { return isClassof(Inst, K); } private: - InstARM32ThreeAddrGPR(Cfg *Func, Variable *Dest, Variable *Src1, - Operand *Src2, CondARM32::Cond Predicate, bool SetFlags) + InstARM32ThreeAddrGPR(Cfg *Func, Variable *Dest, Variable *Src0, + Operand *Src1, CondARM32::Cond Predicate, bool SetFlags) : InstARM32Pred(Func, K, 2, Dest, Predicate), SetFlags(SetFlags) { + addSource(Src0); + addSource(Src1); + } + + static const char *Opcode; + bool SetFlags; +}; + +// Instructions of the form x := a op1 (y op2 z). E.g., multiply accumulate. +template <InstARM32::InstKindARM32 K> +class InstARM32FourAddrGPR : public InstARM32Pred { + InstARM32FourAddrGPR() = delete; + InstARM32FourAddrGPR(const InstARM32FourAddrGPR &) = delete; + InstARM32FourAddrGPR &operator=(const InstARM32FourAddrGPR &) = delete; + +public: + // Every operand must be a register. + static InstARM32FourAddrGPR *create(Cfg *Func, Variable *Dest, Variable *Src0, + Variable *Src1, Variable *Src2, + CondARM32::Cond Predicate) { + return new (Func->allocate<InstARM32FourAddrGPR>()) + InstARM32FourAddrGPR(Func, Dest, Src0, Src1, Src2, Predicate); + } + void emit(const Cfg *Func) const override { + if (!BuildDefs::dump()) + return; + emitFourAddr(Opcode, this, Func); + } + void emitIAS(const Cfg *Func) const override { + (void)Func; + llvm::report_fatal_error("Not yet implemented"); + } + void dump(const Cfg *Func) const override { + if (!BuildDefs::dump()) + return; + Ostream &Str = Func->getContext()->getStrDump(); + dumpDest(Func); + Str << " = "; + dumpOpcodePred(Str, Opcode, getDest()->getType()); + Str << " "; + dumpSources(Func); + } + static bool classof(const Inst *Inst) { return isClassof(Inst, K); } + +private: + InstARM32FourAddrGPR(Cfg *Func, Variable *Dest, Variable *Src0, + Variable *Src1, Variable *Src2, + CondARM32::Cond Predicate) + : InstARM32Pred(Func, K, 3, Dest, Predicate) { + addSource(Src0); addSource(Src1); addSource(Src2); } static const char *Opcode; - bool SetFlags; +}; + +// Instructions of the form x cmpop y (setting flags). +template <InstARM32::InstKindARM32 K> +class InstARM32CmpLike : public InstARM32Pred { + InstARM32CmpLike() = delete; + InstARM32CmpLike(const InstARM32CmpLike &) = delete; + InstARM32CmpLike &operator=(const InstARM32CmpLike &) = delete; + +public: + static InstARM32CmpLike *create(Cfg *Func, Variable *Src0, Operand *Src1, + CondARM32::Cond Predicate) { + return new (Func->allocate<InstARM32CmpLike>()) + InstARM32CmpLike(Func, Src0, Src1, Predicate); + } + void emit(const Cfg *Func) const override { + if (!BuildDefs::dump()) + return; + emitCmpLike(Opcode, this, Func); + } + void emitIAS(const Cfg *Func) const override { + (void)Func; + llvm_unreachable("Not yet implemented"); + } + void dump(const Cfg *Func) const override { + if (!BuildDefs::dump()) + return; + Ostream &Str = Func->getContext()->getStrDump(); + dumpOpcodePred(Str, Opcode, getSrc(0)->getType()); + Str << " "; + dumpSources(Func); + } + static bool classof(const Inst *Inst) { return isClassof(Inst, K); } + +private: + InstARM32CmpLike(Cfg *Func, Variable *Src0, Operand *Src1, + CondARM32::Cond Predicate) + : InstARM32Pred(Func, K, 2, nullptr, Predicate) { + addSource(Src0); + addSource(Src1); + } + + static const char *Opcode; }; typedef InstARM32ThreeAddrGPR<InstARM32::Adc> InstARM32Adc; @@ -528,7 +630,9 @@ typedef InstARM32ThreeAddrGPR<InstARM32::Orr> InstARM32Orr; typedef InstARM32ThreeAddrGPR<InstARM32::Rsb> InstARM32Rsb; typedef InstARM32ThreeAddrGPR<InstARM32::Sbc> InstARM32Sbc; +typedef InstARM32ThreeAddrGPR<InstARM32::Sdiv> InstARM32Sdiv; typedef InstARM32ThreeAddrGPR<InstARM32::Sub> InstARM32Sub; +typedef InstARM32ThreeAddrGPR<InstARM32::Udiv> InstARM32Udiv; // Move instruction (variable <- flex). This is more of a pseudo-inst. // If var is a register, then we use "mov". If var is stack, then we use // "str" to store to the stack. @@ -543,6 +647,35 @@ // but we aren't using that for now, so just model as a Unaryop. typedef InstARM32UnaryopGPR<InstARM32::Sxt> InstARM32Sxt; typedef InstARM32UnaryopGPR<InstARM32::Uxt> InstARM32Uxt; +typedef InstARM32FourAddrGPR<InstARM32::Mla> InstARM32Mla; +typedef InstARM32FourAddrGPR<InstARM32::Mls> InstARM32Mls; +typedef InstARM32CmpLike<InstARM32::Cmp> InstARM32Cmp; +typedef InstARM32CmpLike<InstARM32::Tst> InstARM32Tst; + +// InstARM32Label represents an intra-block label that is the target +// of an intra-block branch. The offset between the label and the +// branch must be fit in the instruction immediate (considered "near"). +class InstARM32Label : public InstARM32 { + InstARM32Label() = delete; + InstARM32Label(const InstARM32Label &) = delete; + InstARM32Label &operator=(const InstARM32Label &) = delete; + +public: + static InstARM32Label *create(Cfg *Func, TargetARM32 *Target) { + return new (Func->allocate<InstARM32Label>()) InstARM32Label(Func, Target); + } + uint32_t getEmitInstCount() const override { return 0; } + IceString getName(const Cfg *Func) const; + SizeT getNumber() const { return Number; } + void emit(const Cfg *Func) const override; + void emitIAS(const Cfg *Func) const override; + void dump(const Cfg *Func) const override; + +private: + InstARM32Label(Cfg *Func, TargetARM32 *Target); + + SizeT Number; // used for unique label generation. +}; // Direct branch instruction. class InstARM32Br : public InstARM32Pred { @@ -555,14 +688,16 @@ static InstARM32Br *create(Cfg *Func, CfgNode *TargetTrue, CfgNode *TargetFalse, CondARM32::Cond Predicate) { assert(Predicate != CondARM32::AL); + constexpr InstARM32Label *NoLabel = nullptr; return new (Func->allocate<InstARM32Br>()) - InstARM32Br(Func, TargetTrue, TargetFalse, Predicate); + InstARM32Br(Func, TargetTrue, TargetFalse, NoLabel, Predicate); } // Create an unconditional branch to a node. static InstARM32Br *create(Cfg *Func, CfgNode *Target) { - const CfgNode *NoCondTarget = nullptr; + constexpr CfgNode *NoCondTarget = nullptr; + constexpr InstARM32Label *NoLabel = nullptr; return new (Func->allocate<InstARM32Br>()) - InstARM32Br(Func, NoCondTarget, Target, CondARM32::AL); + InstARM32Br(Func, NoCondTarget, Target, NoLabel, CondARM32::AL); } // Create a non-terminator conditional branch to a node, with a // fallthrough to the next instruction in the current node. This is @@ -570,15 +705,27 @@ static InstARM32Br *create(Cfg *Func, CfgNode *Target, CondARM32::Cond Predicate) { assert(Predicate != CondARM32::AL); - const CfgNode *NoUncondTarget = nullptr; + constexpr CfgNode *NoUncondTarget = nullptr; + constexpr InstARM32Label *NoLabel = nullptr; return new (Func->allocate<InstARM32Br>()) - InstARM32Br(Func, Target, NoUncondTarget, Predicate); + InstARM32Br(Func, Target, NoUncondTarget, NoLabel, Predicate); + } + // Create a conditional intra-block branch (or unconditional, if + // Condition==AL) to a label in the current block. + static InstARM32Br *create(Cfg *Func, InstARM32Label *Label, + CondARM32::Cond Predicate) { + constexpr CfgNode *NoCondTarget = nullptr; + constexpr CfgNode *NoUncondTarget = nullptr; + return new (Func->allocate<InstARM32Br>()) + InstARM32Br(Func, NoCondTarget, NoUncondTarget, Label, Predicate); } const CfgNode *getTargetTrue() const { return TargetTrue; } const CfgNode *getTargetFalse() const { return TargetFalse; } bool optimizeBranch(const CfgNode *NextNode); uint32_t getEmitInstCount() const override { uint32_t Sum = 0; + if (Label) + ++Sum; if (getTargetTrue()) ++Sum; if (getTargetFalse()) @@ -596,10 +743,11 @@ private: InstARM32Br(Cfg *Func, const CfgNode *TargetTrue, const CfgNode *TargetFalse, - CondARM32::Cond Predicate); + const InstARM32Label *Label, CondARM32::Cond Predicate); const CfgNode *TargetTrue; const CfgNode *TargetFalse; + const InstARM32Label *Label; // Intra-block branch target }; // AdjustStack instruction - subtracts SP by the given amount and @@ -653,28 +801,6 @@ InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget); }; -// Integer compare instruction. -class InstARM32Cmp : public InstARM32Pred { - InstARM32Cmp() = delete; - InstARM32Cmp(const InstARM32Cmp &) = delete; - InstARM32Cmp &operator=(const InstARM32Cmp &) = delete; - -public: - static InstARM32Cmp *create(Cfg *Func, Variable *Src1, Operand *Src2, - CondARM32::Cond Predicate) { - return new (Func->allocate<InstARM32Cmp>()) - InstARM32Cmp(Func, Src1, Src2, Predicate); - } - void emit(const Cfg *Func) const override; - void emitIAS(const Cfg *Func) const override; - void dump(const Cfg *Func) const override; - static bool classof(const Inst *Inst) { return isClassof(Inst, Cmp); } - -private: - InstARM32Cmp(Cfg *Func, Variable *Src1, Operand *Src2, - CondARM32::Cond Predicate); -}; - // Load instruction. class InstARM32Ldr : public InstARM32Pred { InstARM32Ldr() = delete; @@ -698,30 +824,6 @@ CondARM32::Cond Predicate); }; -// Multiply Accumulate: d := x * y + a -class InstARM32Mla : public InstARM32Pred { - InstARM32Mla() = delete; - InstARM32Mla(const InstARM32Mla &) = delete; - InstARM32Mla &operator=(const InstARM32Mla &) = delete; - -public: - // Everything must be a register. - static InstARM32Mla *create(Cfg *Func, Variable *Dest, Variable *Src0, - Variable *Src1, Variable *Acc, - CondARM32::Cond Predicate) { - return new (Func->allocate<InstARM32Mla>()) - InstARM32Mla(Func, Dest, Src0, Src1, Acc, Predicate); - } - void emit(const Cfg *Func) const override; - void emitIAS(const Cfg *Func) const override; - void dump(const Cfg *Func) const override; - static bool classof(const Inst *Inst) { return isClassof(Inst, Mla); } - -private: - InstARM32Mla(Cfg *Func, Variable *Dest, Variable *Src0, Variable *Src1, - Variable *Acc, CondARM32::Cond Predicate); -}; - // Pop into a list of GPRs. Technically this can be predicated, but we don't // need that functionality. class InstARM32Pop : public InstARM32 { @@ -816,6 +918,24 @@ CondARM32::Cond Predicate); }; +class InstARM32Trap : public InstARM32 { + InstARM32Trap() = delete; + InstARM32Trap(const InstARM32Trap &) = delete; + InstARM32Trap &operator=(const InstARM32Trap &) = delete; + +public: + static InstARM32Trap *create(Cfg *Func) { + return new (Func->allocate<InstARM32Trap>()) InstARM32Trap(Func); + } + void emit(const Cfg *Func) const override; + void emitIAS(const Cfg *Func) const override; + void dump(const Cfg *Func) const override; + static bool classof(const Inst *Inst) { return isClassof(Inst, Trap); } + +private: + explicit InstARM32Trap(Cfg *Func); +}; + // Unsigned Multiply Long: d.lo, d.hi := x * y class InstARM32Umull : public InstARM32Pred { InstARM32Umull() = delete;
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp index ca62d79..96d801a 100644 --- a/src/IceInstX8632.cpp +++ b/src/IceInstX8632.cpp
@@ -1939,7 +1939,7 @@ if (!BuildDefs::dump()) return; Ostream &Str = Func->getContext()->getStrDump(); - Str << "ud2\n"; + Str << "ud2"; } void InstX8632Test::emit(const Cfg *Func) const { @@ -1998,7 +1998,7 @@ if (!BuildDefs::dump()) return; Ostream &Str = Func->getContext()->getStrDump(); - Str << "mfence\n"; + Str << "mfence"; } void InstX8632Store::emit(const Cfg *Func) const { @@ -2512,7 +2512,6 @@ Ostream &Str = Func->getContext()->getStrDump(); dumpDest(Func); Str << " = fstp." << getDest()->getType() << ", st(0)"; - Str << "\n"; } template <> void InstX8632Pcmpeq::emit(const Cfg *Func) const {
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h index 205e573..b543208 100644 --- a/src/IceTargetLowering.h +++ b/src/IceTargetLowering.h
@@ -353,16 +353,20 @@ const static constexpr char *H_fptoui_f64_i64 = "__Sz_fptoui_f64_i64"; const static constexpr char *H_frem_f32 = "fmodf"; const static constexpr char *H_frem_f64 = "fmod"; + const static constexpr char *H_sdiv_i32 = "__divsi3"; const static constexpr char *H_sdiv_i64 = "__divdi3"; const static constexpr char *H_sitofp_i64_f32 = "__Sz_sitofp_i64_f32"; const static constexpr char *H_sitofp_i64_f64 = "__Sz_sitofp_i64_f64"; + const static constexpr char *H_srem_i32 = "__modsi3"; const static constexpr char *H_srem_i64 = "__moddi3"; + const static constexpr char *H_udiv_i32 = "__udivsi3"; const static constexpr char *H_udiv_i64 = "__udivdi3"; const static constexpr char *H_uitofp_4xi32_4xf32 = "__Sz_uitofp_4xi32_4xf32"; const static constexpr char *H_uitofp_i32_f32 = "__Sz_uitofp_i32_f32"; const static constexpr char *H_uitofp_i32_f64 = "__Sz_uitofp_i32_f64"; const static constexpr char *H_uitofp_i64_f32 = "__Sz_uitofp_i64_f32"; const static constexpr char *H_uitofp_i64_f64 = "__Sz_uitofp_i64_f64"; + const static constexpr char *H_urem_i32 = "__umodsi3"; const static constexpr char *H_urem_i64 = "__umoddi3"; private:
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp index fad9bcf..c090075 100644 --- a/src/IceTargetLoweringARM32.cpp +++ b/src/IceTargetLoweringARM32.cpp
@@ -141,21 +141,34 @@ return Utils::applyAlignment(Value, typeAlignInBytes); } +// Conservatively check if at compile time we know that the operand is +// definitely a non-zero integer. +bool isGuaranteedNonzeroInt(const Operand *Op) { + if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) { + return Const->getValue() != 0; + } + return false; +} + } // end of anonymous namespace -TargetARM32::TargetARM32(Cfg *Func) : TargetLowering(Func) { +TargetARM32Features::TargetARM32Features(const ClFlags &Flags) { static_assert( (ARM32InstructionSet::End - ARM32InstructionSet::Begin) == (TargetInstructionSet::ARM32InstructionSet_End - TargetInstructionSet::ARM32InstructionSet_Begin), "ARM32InstructionSet range different from TargetInstructionSet"); - if (Func->getContext()->getFlags().getTargetInstructionSet() != + if (Flags.getTargetInstructionSet() != TargetInstructionSet::BaseInstructionSet) { InstructionSet = static_cast<ARM32InstructionSet>( - (Func->getContext()->getFlags().getTargetInstructionSet() - + (Flags.getTargetInstructionSet() - TargetInstructionSet::ARM32InstructionSet_Begin) + ARM32InstructionSet::Begin); } +} + +TargetARM32::TargetARM32(Cfg *Func) + : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) { // TODO: Don't initialize IntegerRegisters and friends every time. // Instead, initialize in some sort of static initializer for the // class. @@ -1009,6 +1022,75 @@ _mov(Dest, SP); } +void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { + if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) + return; + Variable *SrcLoReg = legalizeToVar(SrcLo); + switch (Ty) { + default: + llvm_unreachable("Unexpected type"); + case IceType_i8: { + Operand *Mask = + legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex); + _tst(SrcLoReg, Mask); + break; + } + case IceType_i16: { + Operand *Mask = + legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); + _tst(SrcLoReg, Mask); + break; + } + case IceType_i32: { + _tst(SrcLoReg, SrcLoReg); + break; + } + case IceType_i64: { + Variable *ScratchReg = makeReg(IceType_i32); + _orrs(ScratchReg, SrcLoReg, SrcHi); + // ScratchReg isn't going to be used, but we need the + // side-effect of setting flags from this operation. + Context.insert(InstFakeUse::create(Func, ScratchReg)); + } + } + InstARM32Label *Label = InstARM32Label::create(Func, this); + _br(Label, CondARM32::NE); + _trap(); + Context.insert(Label); +} + +void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, + Operand *Src1, ExtInstr ExtFunc, + DivInstr DivFunc, const char *DivHelperName, + bool IsRemainder) { + div0Check(Dest->getType(), Src1, nullptr); + Variable *Src1R = legalizeToVar(Src1); + Variable *T0R = Src0R; + Variable *T1R = Src1R; + if (Dest->getType() != IceType_i32) { + T0R = makeReg(IceType_i32); + (this->*ExtFunc)(T0R, Src0R, CondARM32::AL); + T1R = makeReg(IceType_i32); + (this->*ExtFunc)(T1R, Src1R, CondARM32::AL); + } + if (hasCPUFeature(TargetARM32Features::HWDivArm)) { + (this->*DivFunc)(T, T0R, T1R, CondARM32::AL); + if (IsRemainder) { + Variable *T2 = makeReg(IceType_i32); + _mls(T2, T, T1R, T0R); + T = T2; + } + _mov(Dest, T); + } else { + constexpr SizeT MaxSrcs = 2; + InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs); + Call->addArg(T0R); + Call->addArg(T1R); + lowerCall(Call); + } + return; +} + void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { Variable *Dest = Inst->getDest(); // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier @@ -1182,9 +1264,47 @@ case InstArithmetic::Udiv: case InstArithmetic::Sdiv: case InstArithmetic::Urem: - case InstArithmetic::Srem: - UnimplementedError(Func->getContext()->getFlags()); - break; + case InstArithmetic::Srem: { + // Check for divide by 0 (ARM normally doesn't trap, but we want it + // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized + // to a register, which will hide a constant source operand. + // Instead, check the not-yet-legalized Src1 to optimize-out a divide + // by 0 check. + if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { + if (C64->getValue() == 0) { + div0Check(IceType_i64, Src1Lo, Src1Hi); + } + } else { + div0Check(IceType_i64, Src1Lo, Src1Hi); + } + // Technically, ARM has their own aeabi routines, but we can use the + // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, + // but uses the more standard __moddi3 for rem. + const char *HelperName = ""; + switch (Inst->getOp()) { + case InstArithmetic::Udiv: + HelperName = H_udiv_i64; + break; + case InstArithmetic::Sdiv: + HelperName = H_sdiv_i64; + break; + case InstArithmetic::Urem: + HelperName = H_urem_i64; + break; + case InstArithmetic::Srem: + HelperName = H_srem_i64; + break; + default: + llvm_unreachable("Should have only matched div ops."); + break; + } + constexpr SizeT MaxSrcs = 2; + InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); + Call->addArg(Inst->getSrc(0)); + Call->addArg(Inst->getSrc(1)); + lowerCall(Call); + return; + } case InstArithmetic::Fadd: case InstArithmetic::Fsub: case InstArithmetic::Fmul: @@ -1197,61 +1317,73 @@ UnimplementedError(Func->getContext()->getFlags()); } else { // Dest->getType() is non-i64 scalar Variable *Src0R = legalizeToVar(Inst->getSrc(0)); - Src1 = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex); + Operand *Src1RF = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex); Variable *T = makeReg(Dest->getType()); switch (Inst->getOp()) { case InstArithmetic::_num: llvm_unreachable("Unknown arithmetic operator"); break; case InstArithmetic::Add: { - _add(T, Src0R, Src1); + _add(T, Src0R, Src1RF); _mov(Dest, T); } break; case InstArithmetic::And: { - _and(T, Src0R, Src1); + _and(T, Src0R, Src1RF); _mov(Dest, T); } break; case InstArithmetic::Or: { - _orr(T, Src0R, Src1); + _orr(T, Src0R, Src1RF); _mov(Dest, T); } break; case InstArithmetic::Xor: { - _eor(T, Src0R, Src1); + _eor(T, Src0R, Src1RF); _mov(Dest, T); } break; case InstArithmetic::Sub: { - _sub(T, Src0R, Src1); + _sub(T, Src0R, Src1RF); _mov(Dest, T); } break; case InstArithmetic::Mul: { - Variable *Src1R = legalizeToVar(Src1); + Variable *Src1R = legalizeToVar(Src1RF); _mul(T, Src0R, Src1R); _mov(Dest, T); } break; case InstArithmetic::Shl: - _lsl(T, Src0R, Src1); + _lsl(T, Src0R, Src1RF); _mov(Dest, T); break; case InstArithmetic::Lshr: - _lsr(T, Src0R, Src1); + _lsr(T, Src0R, Src1RF); _mov(Dest, T); break; case InstArithmetic::Ashr: - _asr(T, Src0R, Src1); + _asr(T, Src0R, Src1RF); _mov(Dest, T); break; - case InstArithmetic::Udiv: - UnimplementedError(Func->getContext()->getFlags()); - break; - case InstArithmetic::Sdiv: - UnimplementedError(Func->getContext()->getFlags()); - break; - case InstArithmetic::Urem: - UnimplementedError(Func->getContext()->getFlags()); - break; - case InstArithmetic::Srem: - UnimplementedError(Func->getContext()->getFlags()); - break; + case InstArithmetic::Udiv: { + constexpr bool IsRemainder = false; + lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, + &TargetARM32::_udiv, H_udiv_i32, IsRemainder); + return; + } + case InstArithmetic::Sdiv: { + constexpr bool IsRemainder = false; + lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, + &TargetARM32::_sdiv, H_sdiv_i32, IsRemainder); + return; + } + case InstArithmetic::Urem: { + constexpr bool IsRemainder = true; + lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, + &TargetARM32::_udiv, H_urem_i32, IsRemainder); + return; + } + case InstArithmetic::Srem: { + constexpr bool IsRemainder = true; + lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, + &TargetARM32::_sdiv, H_srem_i32, IsRemainder); + return; + } case InstArithmetic::Fadd: UnimplementedError(Func->getContext()->getFlags()); break; @@ -1322,7 +1454,7 @@ Variable *Src0R = legalizeToVar(Cond); Constant *Zero = Ctx->getConstantZero(IceType_i32); _cmp(Src0R, Zero); - _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse()); + _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE); } void TargetARM32::lowerCall(const InstCall *Instr) { @@ -2113,7 +2245,7 @@ } void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) { - UnimplementedError(Func->getContext()->getFlags()); + _trap(); } // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to @@ -2417,7 +2549,7 @@ } TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx) - : TargetHeaderLowering(Ctx) {} + : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {} void TargetHeaderARM32::lower() { OstreamLocker L(Ctx); @@ -2431,12 +2563,18 @@ // sub-subsection of the first public subsection of the attributes. Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n"; // Chromebooks are at least A15, but do A9 for higher compat. - Str << ".cpu cortex-a9\n" - << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n" + // For some reason, the LLVM ARM asm parser has the .cpu directive override + // the mattr specified on the commandline. So to test hwdiv, we need to set + // the .cpu directive higher (can't just rely on --mattr=...). + if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { + Str << ".cpu cortex-a15\n"; + } else { + Str << ".cpu cortex-a9\n"; + } + Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n" << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n"; Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n" << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n"; - // TODO(jvoung): check other CPU features like HW div. Str << ".fpu neon\n" << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n" << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n" @@ -2450,6 +2588,9 @@ << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n" << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n" << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; + if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { + Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; + } // Technically R9 is used for TLS with Sandboxing, and we reserve it. // However, for compatibility with current NaCl LLVM, don't claim that. Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h index 1691d6c..02d9080 100644 --- a/src/IceTargetLoweringARM32.h +++ b/src/IceTargetLoweringARM32.h
@@ -22,6 +22,30 @@ namespace Ice { +// Class encapsulating ARM cpu features / instruction set. +class TargetARM32Features { + TargetARM32Features() = delete; + TargetARM32Features(const TargetARM32Features &) = delete; + TargetARM32Features &operator=(const TargetARM32Features &) = delete; + +public: + explicit TargetARM32Features(const ClFlags &Flags); + + enum ARM32InstructionSet { + Begin, + // Neon is the PNaCl baseline instruction set. + Neon = Begin, + HWDivArm, // HW divide in ARM mode (not just Thumb mode). + End + }; + + bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; } + +private: + ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin; +}; + +// The target lowering logic for ARM32. class TargetARM32 : public TargetLowering { TargetARM32() = delete; TargetARM32(const TargetARM32 &) = delete; @@ -75,15 +99,9 @@ void finishArgumentLowering(Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset, size_t &InArgsSizeBytes); - enum ARM32InstructionSet { - Begin, - // Neon is the PNaCl baseline instruction set. - Neon = Begin, - HWDivArm, // HW divide in ARM mode (not just Thumb mode). - End - }; - - ARM32InstructionSet getInstructionSet() const { return InstructionSet; } + bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const { + return CPUFeatures.hasFeature(I); + } protected: explicit TargetARM32(Cfg *Func); @@ -141,6 +159,18 @@ llvm::SmallVectorImpl<int32_t> &Permutation, const llvm::SmallBitVector &ExcludeRegisters) const override; + // If a divide-by-zero check is needed, inserts a: + // test; branch .LSKIP; trap; .LSKIP: <continuation>. + // If no check is needed nothing is inserted. + void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi); + typedef void (TargetARM32::*ExtInstr)(Variable *, Variable *, + CondARM32::Cond); + typedef void (TargetARM32::*DivInstr)(Variable *, Variable *, Variable *, + CondARM32::Cond); + void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1, + ExtInstr ExtFunc, DivInstr DivFunc, + const char *DivHelperName, bool IsRemainder); + // The following are helpers that insert lowered ARM32 instructions // with minimal syntactic overhead, so that the lowering code can // look as close to assembly as practical. @@ -175,8 +205,8 @@ CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32Bic::create(Func, Dest, Src0, Src1, Pred)); } - void _br(CondARM32::Cond Condition, CfgNode *TargetTrue, - CfgNode *TargetFalse) { + void _br(CfgNode *TargetTrue, CfgNode *TargetFalse, + CondARM32::Cond Condition) { Context.insert( InstARM32Br::create(Func, TargetTrue, TargetFalse, Condition)); } @@ -186,6 +216,9 @@ void _br(CfgNode *Target, CondARM32::Cond Condition) { Context.insert(InstARM32Br::create(Func, Target, Condition)); } + void _br(InstARM32Label *Label, CondARM32::Cond Condition) { + Context.insert(InstARM32Br::create(Func, Label, Condition)); + } void _cmp(Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32Cmp::create(Func, Src0, Src1, Pred)); @@ -210,6 +243,10 @@ CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32Mla::create(Func, Dest, Src0, Src1, Acc, Pred)); } + void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc, + CondARM32::Cond Pred = CondARM32::AL) { + Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred)); + } // If Dest=nullptr is passed in, then a new variable is created, // marked as infinite register allocation weight, and returned // through the in/out Dest argument. @@ -248,6 +285,12 @@ CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32Orr::create(Func, Dest, Src0, Src1, Pred)); } + void _orrs(Variable *Dest, Variable *Src0, Operand *Src1, + CondARM32::Cond Pred = CondARM32::AL) { + const bool SetFlags = true; + Context.insert( + InstARM32Orr::create(Func, Dest, Src0, Src1, Pred, SetFlags)); + } void _push(const VarList &Sources) { Context.insert(InstARM32Push::create(Func, Sources)); } @@ -257,6 +300,9 @@ for (Variable *Dest : Dests) Context.insert(InstFakeDef::create(Func, Dest)); } + void _ret(Variable *LR, Variable *Src0 = nullptr) { + Context.insert(InstARM32Ret::create(Func, LR, Src0)); + } void _rsb(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred)); @@ -271,6 +317,10 @@ Context.insert( InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred, SetFlags)); } + void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1, + CondARM32::Cond Pred = CondARM32::AL) { + Context.insert(InstARM32Sdiv::create(Func, Dest, Src0, Src1, Pred)); + } void _str(Variable *Value, OperandARM32Mem *Addr, CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32Str::create(Func, Value, Addr, Pred)); @@ -289,8 +339,14 @@ CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32Sxt::create(Func, Dest, Src0, Pred)); } - void _ret(Variable *LR, Variable *Src0 = nullptr) { - Context.insert(InstARM32Ret::create(Func, LR, Src0)); + void _tst(Variable *Src0, Operand *Src1, + CondARM32::Cond Pred = CondARM32::AL) { + Context.insert(InstARM32Tst::create(Func, Src0, Src1, Pred)); + } + void _trap() { Context.insert(InstARM32Trap::create(Func)); } + void _udiv(Variable *Dest, Variable *Src0, Variable *Src1, + CondARM32::Cond Pred = CondARM32::AL) { + Context.insert(InstARM32Udiv::create(Func, Dest, Src0, Src1, Pred)); } void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0, Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { @@ -305,7 +361,7 @@ Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred)); } - ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin; + TargetARM32Features CPUFeatures; bool UsesFramePointer = false; bool NeedsStackAlignment = false; bool MaybeLeafFunc = true; @@ -386,6 +442,8 @@ private: ~TargetHeaderARM32() = default; + + TargetARM32Features CPUFeatures; }; } // end of namespace Ice
diff --git a/tests_lit/llvm2ice_tests/64bit.pnacl.ll b/tests_lit/llvm2ice_tests/64bit.pnacl.ll index 54d8ff0..2460a24 100644 --- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll +++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -315,6 +315,11 @@ ; OPTM1-LABEL: div64BitSigned ; OPTM1: call {{.*}} R_{{.*}} __divdi3 +; +; ARM32-LABEL: div64BitSigned +; ARM32: orrs {{r.*}}, {{r.*}} +; ARM32: bne +; ARM32: bl {{.*}} __divdi3 define internal i64 @div64BitSignedConst(i64 %a) { entry: @@ -330,6 +335,14 @@ ; OPTM1: mov DWORD PTR [esp+0xc],0xb3a ; OPTM1: mov DWORD PTR [esp+0x8],0x73ce2ff2 ; OPTM1: call {{.*}} R_{{.*}} __divdi3 +; +; ARM32-LABEL: div64BitSignedConst +; For a constant, we should be able to optimize-out the divide by zero check. +; ARM32-NOT: orrs +; ARM32: movw {{.*}} ; 0x2ff2 +; ARM32: movt {{.*}} ; 0x73ce +; ARM32: movw {{.*}} ; 0xb3a +; ARM32: bl {{.*}} __divdi3 define internal i64 @div64BitUnsigned(i64 %a, i64 %b) { entry: @@ -341,6 +354,11 @@ ; ; OPTM1-LABEL: div64BitUnsigned ; OPTM1: call {{.*}} R_{{.*}} __udivdi3 +; +; ARM32-LABEL: div64BitUnsigned +; ARM32: orrs {{r.*}}, {{r.*}} +; ARM32: bne +; ARM32: bl {{.*}} __udivdi3 define internal i64 @rem64BitSigned(i64 %a, i64 %b) { entry: @@ -352,6 +370,11 @@ ; ; OPTM1-LABEL: rem64BitSigned ; OPTM1: call {{.*}} R_{{.*}} __moddi3 +; +; ARM32-LABEL: rem64BitSigned +; ARM32: orrs {{r.*}}, {{r.*}} +; ARM32: bne +; ARM32: bl {{.*}} __moddi3 define internal i64 @rem64BitUnsigned(i64 %a, i64 %b) { entry: @@ -363,6 +386,11 @@ ; ; OPTM1-LABEL: rem64BitUnsigned ; OPTM1: call {{.*}} R_{{.*}} __umoddi3 +; +; ARM32-LABEL: rem64BitUnsigned +; ARM32: orrs {{r.*}}, {{r.*}} +; ARM32: bne +; ARM32: bl {{.*}} __umoddi3 define internal i64 @shl64BitSigned(i64 %a, i64 %b) { entry:
diff --git a/tests_lit/llvm2ice_tests/arith.ll b/tests_lit/llvm2ice_tests/arith.ll index ed82426..f6318ae 100644 --- a/tests_lit/llvm2ice_tests/arith.ll +++ b/tests_lit/llvm2ice_tests/arith.ll
@@ -8,10 +8,15 @@ ; once enough infrastructure is in. Also, switch to --filetype=obj ; when possible. ; RUN: %if --need=target_ARM32 --need=allow_dump \ -; RUN: --command %p2i --filetype=asm --assemble \ -; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \ +; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \ +; RUN: -i %s --args -O2 --skip-unimplemented \ ; RUN: | %if --need=target_ARM32 --need=allow_dump \ ; RUN: --command FileCheck --check-prefix ARM32 %s +; RUN: %if --need=target_ARM32 --need=allow_dump \ +; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \ +; RUN: -i %s --args -O2 --mattr=hwdiv-arm --skip-unimplemented \ +; RUN: | %if --need=target_ARM32 --need=allow_dump \ +; RUN: --command FileCheck --check-prefix ARM32HWDIV %s define i32 @Add(i32 %a, i32 %b) { entry: @@ -117,10 +122,32 @@ ; CHECK-LABEL: Sdiv ; CHECK: cdq ; CHECK: idiv e +; ; ARM32-LABEL: Sdiv -; TODO(jvoung) -- implement divide and check here. -; The lowering needs to check if the denominator is 0 and trap, since -; ARM normally doesn't trap on divide by 0. +; ARM32: tst [[DENOM:r.*]], [[DENOM]] +; ARM32: bne [[LABEL:[0-9a-f]+]] +; ARM32: .word 0xe7fedef0 +; ARM32: [[LABEL]]: {{.*}} bl {{.*}} __divsi3 +; ARM32HWDIV-LABEL: Sdiv +; ARM32HWDIV: tst +; ARM32HWDIV: bne +; ARM32HWDIV: sdiv + +define i32 @SdivConst(i32 %a) { +entry: + %div = sdiv i32 %a, 219 + ret i32 %div +} +; CHECK-LABEL: SdivConst +; CHECK: cdq +; CHECK: idiv e +; +; ARM32-LABEL: SdivConst +; ARM32-NOT: tst +; ARM32: bl {{.*}} __divsi3 +; ARM32HWDIV-LABEL: SdivConst +; ARM32HWDIV-NOT: tst +; ARM32HWDIV: sdiv define i32 @Srem(i32 %a, i32 %b) { entry: @@ -130,7 +157,16 @@ ; CHECK-LABEL: Srem ; CHECK: cdq ; CHECK: idiv e +; ; ARM32-LABEL: Srem +; ARM32: tst [[DENOM:r.*]], [[DENOM]] +; ARM32: bne +; ARM32: bl {{.*}} __modsi3 +; ARM32HWDIV-LABEL: Srem +; ARM32HWDIV: tst +; ARM32HWDIV: bne +; ARM32HWDIV: sdiv +; ARM32HWDIV: mls define i32 @Udiv(i32 %a, i32 %b) { entry: @@ -139,7 +175,15 @@ } ; CHECK-LABEL: Udiv ; CHECK: div e +; ; ARM32-LABEL: Udiv +; ARM32: tst [[DENOM:r.*]], [[DENOM]] +; ARM32: bne +; ARM32: bl {{.*}} __udivsi3 +; ARM32HWDIV-LABEL: Udiv +; ARM32HWDIV: tst +; ARM32HWDIV: bne +; ARM32HWDIV: udiv define i32 @Urem(i32 %a, i32 %b) { entry: @@ -148,4 +192,13 @@ } ; CHECK-LABEL: Urem ; CHECK: div e +; ; ARM32-LABEL: Urem +; ARM32: tst [[DENOM:r.*]], [[DENOM]] +; ARM32: bne +; ARM32: bl {{.*}} __umodsi3 +; ARM32HWDIV-LABEL: Urem +; ARM32HWDIV: tst +; ARM32HWDIV: bne +; ARM32HWDIV: udiv +; ARM32HWDIV: mls
diff --git a/tests_lit/llvm2ice_tests/switch-opt.ll b/tests_lit/llvm2ice_tests/switch-opt.ll index ed6b8b7..3cf0daf 100644 --- a/tests_lit/llvm2ice_tests/switch-opt.ll +++ b/tests_lit/llvm2ice_tests/switch-opt.ll
@@ -5,9 +5,11 @@ ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s ; TODO(jvoung): Update to -02 once the phi assignments is done for ARM -; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \ -; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \ -; RUN: | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s +; RUN: %if --need=target_ARM32 --need=allow_dump \ +; RUN: --command %p2i --filetype=asm --assemble --disassemble \ +; RUN: --target arm32 -i %s --args -Om1 --skip-unimplemented \ +; RUN: | %if --need=target_ARM32 --need=allow_dump \ +; RUN: --command FileCheck --check-prefix ARM32 %s define i32 @testSwitch(i32 %a) { entry:
diff --git a/tests_lit/llvm2ice_tests/unreachable.ll b/tests_lit/llvm2ice_tests/unreachable.ll index bc08bcd..1309b6f 100644 --- a/tests_lit/llvm2ice_tests/unreachable.ll +++ b/tests_lit/llvm2ice_tests/unreachable.ll
@@ -1,7 +1,17 @@ ; This tests the basic structure of the Unreachable instruction. -; RUN: %p2i -i %s --filetype=obj --disassemble -a -O2 | FileCheck %s -; RUN: %p2i -i %s --filetype=obj --disassemble -a -Om1 | FileCheck %s +; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ +; RUN: --target x8632 -i %s --args -O2 \ +; RUN: | %if --need=target_X8632 --command FileCheck %s +; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ +; RUN: --target x8632 -i %s --args -Om1 \ +; RUN: | %if --need=target_X8632 --command FileCheck %s + +; RUN: %if --need=target_ARM32 --need=allow_dump \ +; RUN: --command %p2i --filetype=asm --assemble \ +; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \ +; RUN: | %if --need=target_ARM32 --need=allow_dump \ +; RUN: --command FileCheck --check-prefix ARM32 %s define internal i32 @divide(i32 %num, i32 %den) { entry: @@ -22,3 +32,9 @@ ; CHECK: cdq ; CHECK: idiv ; CHECK: ret + +; ARM32-LABEL: divide +; ARM32: cmp +; ARM32: .word 0xe7fedef0 +; ARM32: bl {{.*}} __divsi3 +; ARM32: bx lr