Lower a few basic ARM binops for i{8,16,32,64}.

Do basic lowering for add, sub, and, or, xor, mul.
We don't yet take advantage of commuting immediate operands
(e.g., use rsb to reverse subtract instead of sub) or
inverting immediate operands (use bic to bit clear instead
of using and).

The binary operations can set the flags register (e.g., to
have the carry bit for use with a subsequent adc
instruction). That is optional for the "data processing"
instructions.

I'm not yet able to compile 8bit.pnacl.ll and
64bit.pnacl.ll so 8-bit and 64-bit are not well tested yet.
Only tests are in the arith.ll file (like arith-opt.ll, but
assembled instead of testing the "verbose inst" output).

Not doing divide yet. ARM divide by 0 does not trap, but
PNaCl requires uniform behavior for such bad code. Thus,
in LLVM we insert a 0 check and would have to do the same.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1127003003
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 6ac1698..67f0fd4 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -60,11 +60,24 @@
   assert(Inst->getSrcSize() == 2);
   Variable *Dest = Inst->getDest();
   assert(Dest == Inst->getSrc(0));
-  Operand *Src1 = Inst->getSrc(1);
   Str << "\t" << Opcode << "\t";
   Dest->emit(Func);
   Str << ", ";
-  Src1->emit(Func);
+  Inst->getSrc(1)->emit(Func);
+}
+
+void emitThreeAddr(const char *Opcode, const Inst *Inst, const Cfg *Func,
+                   bool SetFlags) {
+  if (!ALLOW_DUMP)
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(Inst->getSrcSize() == 2);
+  Str << "\t" << Opcode << (SetFlags ? "s" : "") << "\t";
+  Inst->getDest()->emit(Func);
+  Str << ", ";
+  Inst->getSrc(0)->emit(Func);
+  Str << ", ";
+  Inst->getSrc(1)->emit(Func);
 }
 
 OperandARM32Mem::OperandARM32Mem(Cfg * /* Func */, Type Ty, Variable *Base,
@@ -146,6 +159,14 @@
   addSource(Mem);
 }
 
+InstARM32Mla::InstARM32Mla(Cfg *Func, Variable *Dest, Variable *Src0,
+                           Variable *Src1, Variable *Acc)
+    : InstARM32(Func, InstARM32::Mla, 3, Dest) {
+  addSource(Src0);
+  addSource(Src1);
+  addSource(Acc);
+}
+
 InstARM32Ret::InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source)
     : InstARM32(Func, InstARM32::Ret, Source ? 2 : 1, nullptr) {
   addSource(LR);
@@ -153,6 +174,15 @@
     addSource(Source);
 }
 
+InstARM32Umull::InstARM32Umull(Cfg *Func, Variable *DestLo, Variable *DestHi,
+                               Variable *Src0, Variable *Src1)
+    : InstARM32(Func, InstARM32::Umull, 2, DestLo),
+      // DestHi is expected to have a FakeDef inserted by the lowering code.
+      DestHi(DestHi) {
+  addSource(Src0);
+  addSource(Src1);
+}
+
 // ======================== Dump routines ======================== //
 
 // Two-addr ops
@@ -162,6 +192,15 @@
 template <> const char *InstARM32Mvn::Opcode = "mvn";
 // Mov-like ops
 template <> const char *InstARM32Mov::Opcode = "mov";
+// Three-addr ops
+template <> const char *InstARM32Adc::Opcode = "adc";
+template <> const char *InstARM32Add::Opcode = "add";
+template <> const char *InstARM32And::Opcode = "and";
+template <> const char *InstARM32Eor::Opcode = "eor";
+template <> const char *InstARM32Mul::Opcode = "mul";
+template <> const char *InstARM32Orr::Opcode = "orr";
+template <> const char *InstARM32Sbc::Opcode = "sbc";
+template <> const char *InstARM32Sub::Opcode = "sub";
 
 void InstARM32::dump(const Cfg *Func) const {
   if (!ALLOW_DUMP)
@@ -217,7 +256,7 @@
 }
 
 void InstARM32Ldr::emitIAS(const Cfg *Func) const {
-  assert(getSrcSize() == 2);
+  assert(getSrcSize() == 1);
   (void)Func;
   llvm_unreachable("Not yet implemented");
 }
@@ -227,7 +266,40 @@
     return;
   Ostream &Str = Func->getContext()->getStrDump();
   dumpDest(Func);
-  Str << "ldr." << getSrc(0)->getType() << " ";
+  Str << " = ldr." << getSrc(0)->getType() << " ";
+  dumpSources(Func);
+}
+
+void InstARM32Mla::emit(const Cfg *Func) const {
+  if (!ALLOW_DUMP)
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 3);
+  assert(getDest()->hasReg());
+  Str << "\t"
+      << "mla"
+      << "\t";
+  getDest()->emit(Func);
+  Str << ", ";
+  getSrc(0)->emit(Func);
+  Str << ", ";
+  getSrc(1)->emit(Func);
+  Str << ", ";
+  getSrc(2)->emit(Func);
+}
+
+void InstARM32Mla::emitIAS(const Cfg *Func) const {
+  assert(getSrcSize() == 3);
+  (void)Func;
+  llvm_unreachable("Not yet implemented");
+}
+
+void InstARM32Mla::dump(const Cfg *Func) const {
+  if (!ALLOW_DUMP)
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  dumpDest(Func);
+  Str << " = mla." << getSrc(0)->getType() << " ";
   dumpSources(Func);
 }
 
@@ -274,7 +346,9 @@
   assert(LR->hasReg());
   assert(LR->getRegNum() == RegARM32::Reg_lr);
   Ostream &Str = Func->getContext()->getStrEmit();
-  Str << "\tbx\t";
+  Str << "\t"
+      << "bx"
+      << "\t";
   LR->emit(Func);
 }
 
@@ -292,6 +366,39 @@
   dumpSources(Func);
 }
 
+void InstARM32Umull::emit(const Cfg *Func) const {
+  if (!ALLOW_DUMP)
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 2);
+  assert(getDest()->hasReg());
+  Str << "\t"
+      << "umull"
+      << "\t";
+  getDest()->emit(Func);
+  Str << ", ";
+  DestHi->emit(Func);
+  Str << ", ";
+  getSrc(0)->emit(Func);
+  Str << ", ";
+  getSrc(1)->emit(Func);
+}
+
+void InstARM32Umull::emitIAS(const Cfg *Func) const {
+  assert(getSrcSize() == 2);
+  (void)Func;
+  llvm_unreachable("Not yet implemented");
+}
+
+void InstARM32Umull::dump(const Cfg *Func) const {
+  if (!ALLOW_DUMP)
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  dumpDest(Func);
+  Str << " = umull." << getSrc(0)->getType() << " ";
+  dumpSources(Func);
+}
+
 void OperandARM32Mem::emit(const Cfg *Func) const {
   if (!ALLOW_DUMP)
     return;