Implement vector intrinsics for ARM32. MultiplyAddPairs is implemented using VMULL+VPADD. MultiplyHighSigned/Unsigned is implemented using VMULL+VSHRN. SubVectorLoad/Store is implemented using VLDR/VLD1/VSTR/VST1. VectorPackSigned/Unsigned is implemented using two VQMOVN. Bug b/37496078 Bug b/37496856 Bug b/37496321 Bug b/37496082 Change-Id: I141fd901d53da24ce780f503dc7ad17b94fc6ba8 Reviewed-on: https://chromium-review.googlesource.com/693049 Reviewed-by: Jim Stichnoth <stichnot@chromium.org> Reviewed-on: https://swiftshader-review.googlesource.com/12709 Reviewed-by: Nicolas Capens <nicolascapens@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/third_party/subzero/src/IceTargetLoweringARM32.cpp b/third_party/subzero/src/IceTargetLoweringARM32.cpp index 65dca3a..9856f7a 100644 --- a/third_party/subzero/src/IceTargetLoweringARM32.cpp +++ b/third_party/subzero/src/IceTargetLoweringARM32.cpp
@@ -5331,23 +5331,75 @@ return; } case Intrinsics::LoadSubVector: { - UnimplementedLoweringError(this, Instr); + assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) && + "LoadSubVector second argument must be a constant"); + Variable *Dest = Instr->getDest(); + Type Ty = Dest->getType(); + auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1)); + Operand *Addr = Instr->getArg(0); + OperandARM32Mem *Src = formMemoryOperand(Addr, Ty); + doMockBoundsCheck(Src); + + if (Dest->isRematerializable()) { + Context.insert<InstFakeDef>(Dest); + return; + } + + auto *T = makeReg(Ty); + switch (SubVectorSize->getValue()) { + case 4: + _vldr1d(T, Src); + break; + case 8: + _vldr1q(T, Src); + break; + default: + Func->setError("Unexpected size for LoadSubVector"); + return; + } + _mov(Dest, T); // FIXME: necessary? return; } case Intrinsics::StoreSubVector: { - UnimplementedLoweringError(this, Instr); + assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) && + "StoreSubVector third argument must be a constant"); + auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2)); + Variable *Value = legalizeToReg(Instr->getArg(0)); + Operand *Addr = Instr->getArg(1); + OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); + doMockBoundsCheck(NewAddr); + + Value = legalizeToReg(Value); + + switch (SubVectorSize->getValue()) { + case 4: + _vstr1d(Value, NewAddr); + break; + case 8: + _vstr1q(Value, NewAddr); + break; + default: + Func->setError("Unexpected size for StoreSubVector"); + return; + } return; } case Intrinsics::MultiplyAddPairs: { - UnimplementedLoweringError(this, Instr); + Variable *Src0 = legalizeToReg(Instr->getArg(0)); + Variable *Src1 = legalizeToReg(Instr->getArg(1)); + Variable *T = makeReg(DestTy); + _vmlap(T, Src0, Src1); + _mov(Dest, T); return; } - case Intrinsics::MultiplyHighSigned: { - UnimplementedLoweringError(this, Instr); - return; - } + case Intrinsics::MultiplyHighSigned: case Intrinsics::MultiplyHighUnsigned: { - UnimplementedLoweringError(this, Instr); + bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned); + Variable *Src0 = legalizeToReg(Instr->getArg(0)); + Variable *Src1 = legalizeToReg(Instr->getArg(1)); + Variable *T = makeReg(DestTy); + _vmulh(T, Src0, Src1, Unsigned); + _mov(Dest, T); return; } case Intrinsics::Nearbyint: { @@ -5372,12 +5424,15 @@ _mov(Dest, T); return; } - case Intrinsics::VectorPackSigned: { - UnimplementedLoweringError(this, Instr); - return; - } + case Intrinsics::VectorPackSigned: case Intrinsics::VectorPackUnsigned: { - UnimplementedLoweringError(this, Instr); + bool Unsigned = (ID == Intrinsics::VectorPackUnsigned); + bool Saturating = true; + Variable *Src0 = legalizeToReg(Instr->getArg(0)); + Variable *Src1 = legalizeToReg(Instr->getArg(1)); + Variable *T = makeReg(DestTy); + _vqmovn2(T, Src0, Src1, Unsigned, Saturating); + _mov(Dest, T); return; } default: // UnknownIntrinsic