Subzero. Rematerializes shufflevector instructions.

This CL is a first step towards optimizing vector shuffles in
Subzero.

PNaCl bitcode does not support the shufflevector instruction,
so pnacl-clang emits a series of extractelement/insertelement.
pnacl-llc is then responsible for performing a pattern match
on the output bitcode and rematerialize the shufflevector.

With this CL, we enable shufflevector rematerialization in
Subzero. To keep this CL simple, we introduce no efficient
shufflevector lowering. Instead, we scalarize the
rematerialized instructions.

BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4136
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1897243002 .
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index fbfa914..22d4c0a 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -1020,6 +1020,7 @@
   // Address mode optimization.
   Func->getVMetadata()->init(VMK_SingleDefs);
   Func->doAddressOpt();
+  Func->materializeVectorShuffles();
 
   // Argument lowering
   Func->doArgLowering();
@@ -5812,6 +5813,44 @@
   Context.insert<InstFakeUse>(SP);
 }
 
+void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) {
+  auto *Dest = Instr->getDest();
+  const Type DestTy = Dest->getType();
+
+  auto *T = makeReg(DestTy);
+
+  switch (DestTy) {
+  default:
+    break;
+    // TODO(jpp): figure out how to properly lower this without scalarization.
+  }
+
+  // Unoptimized shuffle. Perform a series of inserts and extracts.
+  Context.insert<InstFakeDef>(T);
+  auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
+  auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
+  const SizeT NumElements = typeNumElements(DestTy);
+  const Type ElementType = typeElementType(DestTy);
+  for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
+    auto *Index = Instr->getIndex(I);
+    const SizeT Elem = Index->getValue();
+    auto *ExtElmt = makeReg(ElementType);
+    if (Elem < NumElements) {
+      lowerExtractElement(
+          InstExtractElement::create(Func, ExtElmt, Src0, Index));
+    } else {
+      lowerExtractElement(InstExtractElement::create(
+          Func, ExtElmt, Src1,
+          Ctx->getConstantInt32(Index->getValue() - NumElements)));
+    }
+    auto *NewT = makeReg(DestTy);
+    lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
+                                                 Ctx->getConstantInt32(I)));
+    T = NewT;
+  }
+  _mov(Dest, T);
+}
+
 void TargetARM32::lowerSelect(const InstSelect *Instr) {
   Variable *Dest = Instr->getDest();
   Type DestTy = Dest->getType();