Lower a few basic ARM binops for i{8,16,32,64}. Do basic lowering for add, sub, and, or, xor, mul. We don't yet take advantage of commuting immediate operands (e.g., use rsb to reverse subtract instead of sub) or inverting immediate operands (use bic to bit clear instead of using and). The binary operations can set the flags register (e.g., to have the carry bit for use with a subsequent adc instruction). That is optional for the "data processing" instructions. I'm not yet able to compile 8bit.pnacl.ll and 64bit.pnacl.ll so 8-bit and 64-bit are not well tested yet. Only tests are in the arith.ll file (like arith-opt.ll, but assembled instead of testing the "verbose inst" output). Not doing divide yet. ARM divide by 0 does not trap, but PNaCl requires uniform behavior for such bad code. Thus, in LLVM we insert a 0 check and would have to do the same. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1127003003

commit: 2971997a96b7484165cfedead111c7cafb3073ae [log] [tgz]
author: Jan Voung <jvoung@chromium.org> Tue May 19 11:24:51 2015 -0700
committer: Jan Voung <jvoung@chromium.org> Tue May 19 11:24:51 2015 -0700
tree: 7a502d6494be2c70b4b2cad08d1b5c140fe1d3ed
parent: 537b5ba030eac501734276a39223e2b7a5465ad6 [diff] [blame]
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index d65b546..73eb77c 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp

@@ -540,40 +540,135 @@
   // Or it may be the case that the operands aren't swapped, but the
   // bits can be flipped and a different operation applied.
   // E.g., use BIC (bit clear) instead of AND for some masks.
-  Variable *Src0 = legalizeToVar(Inst->getSrc(0));
-  Operand *Src1 = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex);
-  (void)Src0;
-  (void)Src1;
+  Operand *Src0 = Inst->getSrc(0);
+  Operand *Src1 = Inst->getSrc(1);
   if (Dest->getType() == IceType_i64) {
-    UnimplementedError(Func->getContext()->getFlags());
+    Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+    Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+    Variable *Src0RLo = legalizeToVar(loOperand(Src0));
+    Variable *Src0RHi = legalizeToVar(hiOperand(Src0));
+    Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
+    Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
+    Variable *T_Lo = makeReg(DestLo->getType());
+    Variable *T_Hi = makeReg(DestHi->getType());
+    switch (Inst->getOp()) {
+    case InstArithmetic::_num:
+      llvm_unreachable("Unknown arithmetic operator");
+      break;
+    case InstArithmetic::Add:
+      _adds(T_Lo, Src0RLo, Src1Lo);
+      _mov(DestLo, T_Lo);
+      _adc(T_Hi, Src0RHi, Src1Hi);
+      _mov(DestHi, T_Hi);
+      break;
+    case InstArithmetic::And:
+      _and(T_Lo, Src0RLo, Src1Lo);
+      _mov(DestLo, T_Lo);
+      _and(T_Hi, Src0RHi, Src1Hi);
+      _mov(DestHi, T_Hi);
+      break;
+    case InstArithmetic::Or:
+      _orr(T_Lo, Src0RLo, Src1Lo);
+      _mov(DestLo, T_Lo);
+      _orr(T_Hi, Src0RHi, Src1Hi);
+      _mov(DestHi, T_Hi);
+      break;
+    case InstArithmetic::Xor:
+      _eor(T_Lo, Src0RLo, Src1Lo);
+      _mov(DestLo, T_Lo);
+      _eor(T_Hi, Src0RHi, Src1Hi);
+      _mov(DestHi, T_Hi);
+      break;
+    case InstArithmetic::Sub:
+      _subs(T_Lo, Src0RLo, Src1Lo);
+      _mov(DestLo, T_Lo);
+      _sbc(T_Hi, Src0RHi, Src1Hi);
+      _mov(DestHi, T_Hi);
+      break;
+    case InstArithmetic::Mul: {
+      // GCC 4.8 does:
+      // a=b*c ==>
+      //   t_acc =(mul) (b.lo * c.hi)
+      //   t_acc =(mla) (c.lo * b.hi) + t_acc
+      //   t.hi,t.lo =(umull) b.lo * c.lo
+      //   t.hi += t_acc
+      //   a.lo = t.lo
+      //   a.hi = t.hi
+      //
+      // LLVM does:
+      //   t.hi,t.lo =(umull) b.lo * c.lo
+      //   t.hi =(mla) (b.lo * c.hi) + t.hi
+      //   t.hi =(mla) (b.hi * c.lo) + t.hi
+      //   a.lo = t.lo
+      //   a.hi = t.hi
+      //
+      // LLVM's lowering has fewer instructions, but more register pressure:
+      // t.lo is live from beginning to end, while GCC delays the two-dest
+      // instruction till the end, and kills c.hi immediately.
+      Variable *T_Acc = makeReg(IceType_i32);
+      Variable *T_Acc1 = makeReg(IceType_i32);
+      Variable *T_Hi1 = makeReg(IceType_i32);
+      Variable *Src1RLo = legalizeToVar(Src1Lo);
+      Variable *Src1RHi = legalizeToVar(Src1Hi);
+      _mul(T_Acc, Src0RLo, Src1RHi);
+      _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
+      _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
+      _add(T_Hi, T_Hi1, T_Acc1);
+      _mov(DestLo, T_Lo);
+      _mov(DestHi, T_Hi);
+    } break;
+    case InstArithmetic::Shl:
+    case InstArithmetic::Lshr:
+    case InstArithmetic::Ashr:
+    case InstArithmetic::Udiv:
+    case InstArithmetic::Sdiv:
+    case InstArithmetic::Urem:
+    case InstArithmetic::Srem:
+      UnimplementedError(Func->getContext()->getFlags());
+      break;
+    case InstArithmetic::Fadd:
+    case InstArithmetic::Fsub:
+    case InstArithmetic::Fmul:
+    case InstArithmetic::Fdiv:
+    case InstArithmetic::Frem:
+      llvm_unreachable("FP instruction with i64 type");
+      break;
+    }
   } else if (isVectorType(Dest->getType())) {
     UnimplementedError(Func->getContext()->getFlags());
   } else { // Dest->getType() is non-i64 scalar
+    Variable *Src0R = legalizeToVar(Inst->getSrc(0));
+    Src1 = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex);
+    Variable *T = makeReg(Dest->getType());
     switch (Inst->getOp()) {
     case InstArithmetic::_num:
       llvm_unreachable("Unknown arithmetic operator");
       break;
     case InstArithmetic::Add: {
-      UnimplementedError(Func->getContext()->getFlags());
-      // Variable *T = makeReg(Dest->getType());
-      // _add(T, Src0, Src1);
-      // _mov(Dest, T);
+      _add(T, Src0R, Src1);
+      _mov(Dest, T);
     } break;
-    case InstArithmetic::And:
-      UnimplementedError(Func->getContext()->getFlags());
-      break;
-    case InstArithmetic::Or:
-      UnimplementedError(Func->getContext()->getFlags());
-      break;
-    case InstArithmetic::Xor:
-      UnimplementedError(Func->getContext()->getFlags());
-      break;
-    case InstArithmetic::Sub:
-      UnimplementedError(Func->getContext()->getFlags());
-      break;
-    case InstArithmetic::Mul:
-      UnimplementedError(Func->getContext()->getFlags());
-      break;
+    case InstArithmetic::And: {
+      _and(T, Src0R, Src1);
+      _mov(Dest, T);
+    } break;
+    case InstArithmetic::Or: {
+      _orr(T, Src0R, Src1);
+      _mov(Dest, T);
+    } break;
+    case InstArithmetic::Xor: {
+      _eor(T, Src0R, Src1);
+      _mov(Dest, T);
+    } break;
+    case InstArithmetic::Sub: {
+      _sub(T, Src0R, Src1);
+      _mov(Dest, T);
+    } break;
+    case InstArithmetic::Mul: {
+      Variable *Src1R = legalizeToVar(Src1);
+      _mul(T, Src0R, Src1R);
+      _mov(Dest, T);
+    } break;
     case InstArithmetic::Shl:
       UnimplementedError(Func->getContext()->getFlags());
       break;
commit	2971997a96b7484165cfedead111c7cafb3073ae	[log] [tgz]
author	Jan Voung <jvoung@chromium.org>	Tue May 19 11:24:51 2015 -0700
committer	Jan Voung <jvoung@chromium.org>	Tue May 19 11:24:51 2015 -0700
tree	7a502d6494be2c70b4b2cad08d1b5c140fe1d3ed
parent	537b5ba030eac501734276a39223e2b7a5465ad6 [diff] [blame]