Implement vector intrinsics for ARM32.

MultiplyAddPairs is implemented using VMULL+VPADD.
MultiplyHighSigned/Unsigned is implemented using VMULL+VSHRN.
SubVectorLoad/Store is implemented using VLDR/VLD1/VSTR/VST1.
VectorPackSigned/Unsigned is implemented using two VQMOVN.

Bug b/37496078
Bug b/37496856
Bug b/37496321
Bug b/37496082

Change-Id: I141fd901d53da24ce780f503dc7ad17b94fc6ba8
Reviewed-on: https://chromium-review.googlesource.com/693049
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
Reviewed-on: https://swiftshader-review.googlesource.com/12709
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/third_party/subzero/src/IceTargetLoweringARM32.cpp b/third_party/subzero/src/IceTargetLoweringARM32.cpp
index 65dca3a..9856f7a 100644
--- a/third_party/subzero/src/IceTargetLoweringARM32.cpp
+++ b/third_party/subzero/src/IceTargetLoweringARM32.cpp
@@ -5331,23 +5331,75 @@
     return;
   }
   case Intrinsics::LoadSubVector: {
-    UnimplementedLoweringError(this, Instr);
+    assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) &&
+           "LoadSubVector second argument must be a constant");
+    Variable *Dest = Instr->getDest();
+    Type Ty = Dest->getType();
+    auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1));
+    Operand *Addr = Instr->getArg(0);
+    OperandARM32Mem *Src = formMemoryOperand(Addr, Ty);
+    doMockBoundsCheck(Src);
+
+    if (Dest->isRematerializable()) {
+      Context.insert<InstFakeDef>(Dest);
+      return;
+    }
+
+    auto *T = makeReg(Ty);
+    switch (SubVectorSize->getValue()) {
+    case 4:
+      _vldr1d(T, Src);
+      break;
+    case 8:
+      _vldr1q(T, Src);
+      break;
+    default:
+      Func->setError("Unexpected size for LoadSubVector");
+      return;
+    }
+    _mov(Dest, T); // FIXME: necessary?
     return;
   }
   case Intrinsics::StoreSubVector: {
-    UnimplementedLoweringError(this, Instr);
+    assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) &&
+           "StoreSubVector third argument must be a constant");
+    auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2));
+    Variable *Value = legalizeToReg(Instr->getArg(0));
+    Operand *Addr = Instr->getArg(1);
+    OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
+    doMockBoundsCheck(NewAddr);
+
+    Value = legalizeToReg(Value);
+
+    switch (SubVectorSize->getValue()) {
+    case 4:
+      _vstr1d(Value, NewAddr);
+      break;
+    case 8:
+      _vstr1q(Value, NewAddr);
+      break;
+    default:
+      Func->setError("Unexpected size for StoreSubVector");
+      return;
+    }
     return;
   }
   case Intrinsics::MultiplyAddPairs: {
-    UnimplementedLoweringError(this, Instr);
+    Variable *Src0 = legalizeToReg(Instr->getArg(0));
+    Variable *Src1 = legalizeToReg(Instr->getArg(1));
+    Variable *T = makeReg(DestTy);
+    _vmlap(T, Src0, Src1);
+    _mov(Dest, T);
     return;
   }
-  case Intrinsics::MultiplyHighSigned: {
-    UnimplementedLoweringError(this, Instr);
-    return;
-  }
+  case Intrinsics::MultiplyHighSigned:
   case Intrinsics::MultiplyHighUnsigned: {
-    UnimplementedLoweringError(this, Instr);
+    bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned);
+    Variable *Src0 = legalizeToReg(Instr->getArg(0));
+    Variable *Src1 = legalizeToReg(Instr->getArg(1));
+    Variable *T = makeReg(DestTy);
+    _vmulh(T, Src0, Src1, Unsigned);
+    _mov(Dest, T);
     return;
   }
   case Intrinsics::Nearbyint: {
@@ -5372,12 +5424,15 @@
     _mov(Dest, T);
     return;
   }
-  case Intrinsics::VectorPackSigned: {
-    UnimplementedLoweringError(this, Instr);
-    return;
-  }
+  case Intrinsics::VectorPackSigned:
   case Intrinsics::VectorPackUnsigned: {
-    UnimplementedLoweringError(this, Instr);
+    bool Unsigned = (ID == Intrinsics::VectorPackUnsigned);
+    bool Saturating = true;
+    Variable *Src0 = legalizeToReg(Instr->getArg(0));
+    Variable *Src1 = legalizeToReg(Instr->getArg(1));
+    Variable *T = makeReg(DestTy);
+    _vqmovn2(T, Src0, Src1, Unsigned, Saturating);
+    _mov(Dest, T);
     return;
   }
   default: // UnknownIntrinsic