Subzero. Rematerializes shufflevector instructions. This CL is a first step towards optimizing vector shuffles in Subzero. PNaCl bitcode does not support the shufflevector instruction, so pnacl-clang emits a series of extractelement/insertelement. pnacl-llc is then responsible for performing a pattern match on the output bitcode and rematerialize the shufflevector. With this CL, we enable shufflevector rematerialization in Subzero. To keep this CL simple, we introduce no efficient shufflevector lowering. Instead, we scalarize the rematerialized instructions. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4136 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1897243002 .
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp index fbfa914..22d4c0a 100644 --- a/src/IceTargetLoweringARM32.cpp +++ b/src/IceTargetLoweringARM32.cpp
@@ -1020,6 +1020,7 @@ // Address mode optimization. Func->getVMetadata()->init(VMK_SingleDefs); Func->doAddressOpt(); + Func->materializeVectorShuffles(); // Argument lowering Func->doArgLowering(); @@ -5812,6 +5813,44 @@ Context.insert<InstFakeUse>(SP); } +void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) { + auto *Dest = Instr->getDest(); + const Type DestTy = Dest->getType(); + + auto *T = makeReg(DestTy); + + switch (DestTy) { + default: + break; + // TODO(jpp): figure out how to properly lower this without scalarization. + } + + // Unoptimized shuffle. Perform a series of inserts and extracts. + Context.insert<InstFakeDef>(T); + auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); + auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); + const SizeT NumElements = typeNumElements(DestTy); + const Type ElementType = typeElementType(DestTy); + for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { + auto *Index = Instr->getIndex(I); + const SizeT Elem = Index->getValue(); + auto *ExtElmt = makeReg(ElementType); + if (Elem < NumElements) { + lowerExtractElement( + InstExtractElement::create(Func, ExtElmt, Src0, Index)); + } else { + lowerExtractElement(InstExtractElement::create( + Func, ExtElmt, Src1, + Ctx->getConstantInt32(Index->getValue() - NumElements))); + } + auto *NewT = makeReg(DestTy); + lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt, + Ctx->getConstantInt32(I))); + T = NewT; + } + _mov(Dest, T); +} + void TargetARM32::lowerSelect(const InstSelect *Instr) { Variable *Dest = Instr->getDest(); Type DestTy = Dest->getType();