Implement intrinsics for loading/storing subvectors.
This enables emulating 64-bit and 32-bit vectors using 128-bit
vectors internally (x86 only for now). Note that these Intrinsics
are not part of the PNaCL specification.
BUG=swiftshader:15
Change-Id: I61a666243832c2856e60eb477d42a72dec07d01d
Reviewed-on: https://chromium-review.googlesource.com/392246
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 2ed35fe..230df8f 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -166,6 +166,7 @@
Store,
StoreP,
StoreQ,
+ StoreD,
Sub,
SubRMW,
Subps,
@@ -2595,7 +2596,7 @@
InstX86StoreQ &operator=(const InstX86StoreQ &) = delete;
public:
- static InstX86StoreQ *create(Cfg *Func, Variable *Value,
+ static InstX86StoreQ *create(Cfg *Func, Operand *Value,
X86OperandMem *Mem) {
return new (Func->allocate<InstX86StoreQ>())
InstX86StoreQ(Func, Value, Mem);
@@ -2608,7 +2609,29 @@
}
private:
- InstX86StoreQ(Cfg *Func, Variable *Value, X86OperandMem *Mem);
+ InstX86StoreQ(Cfg *Func, Operand *Value, X86OperandMem *Mem);
+ };
+
+ class InstX86StoreD final : public InstX86Base {
+ InstX86StoreD() = delete;
+ InstX86StoreD(const InstX86StoreD &) = delete;
+ InstX86StoreD &operator=(const InstX86StoreD &) = delete;
+
+ public:
+ static InstX86StoreD *create(Cfg *Func, Operand *Value,
+ X86OperandMem *Mem) {
+ return new (Func->allocate<InstX86StoreD>())
+ InstX86StoreD(Func, Value, Mem);
+ }
+ void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
+ void dump(const Cfg *Func) const override;
+ static bool classof(const Inst *Instr) {
+ return InstX86Base::isClassof(Instr, InstX86Base::StoreQ);
+ }
+
+ private:
+ InstX86StoreD(Cfg *Func, Operand *Value, X86OperandMem *Mem);
};
/// Nop instructions of varying length
@@ -3007,6 +3030,7 @@
using Store = typename InstImpl<TraitsType>::InstX86Store;
using StoreP = typename InstImpl<TraitsType>::InstX86StoreP;
using StoreQ = typename InstImpl<TraitsType>::InstX86StoreQ;
+ using StoreD = typename InstImpl<TraitsType>::InstX86StoreD;
using Nop = typename InstImpl<TraitsType>::InstX86Nop;
template <typename T = typename InstImpl<TraitsType>::Traits>
using Fld =
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 26fa2c4..1bc2a0c 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -297,7 +297,7 @@
}
template <typename TraitsType>
-InstImpl<TraitsType>::InstX86StoreQ::InstX86StoreQ(Cfg *Func, Variable *Value,
+InstImpl<TraitsType>::InstX86StoreQ::InstX86StoreQ(Cfg *Func, Operand *Value,
X86OperandMem *Mem)
: InstX86Base(Func, InstX86Base::StoreQ, 2, nullptr) {
this->addSource(Value);
@@ -305,6 +305,14 @@
}
template <typename TraitsType>
+InstImpl<TraitsType>::InstX86StoreD::InstX86StoreD(Cfg *Func, Operand *Value,
+ X86OperandMem *Mem)
+ : InstX86Base(Func, InstX86Base::StoreD, 2, nullptr) {
+ this->addSource(Value);
+ this->addSource(Mem);
+}
+
+template <typename TraitsType>
InstImpl<TraitsType>::InstX86Nop::InstX86Nop(Cfg *Func, NopVariant Variant)
: InstX86Base(Func, InstX86Base::Nop, 0, nullptr), Variant(Variant) {}
@@ -2021,6 +2029,46 @@
}
template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreD::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(this->getSrcSize() == 2);
+ assert(this->getSrc(1)->getType() == IceType_i64 ||
+ this->getSrc(1)->getType() == IceType_f64 ||
+ isVectorType(this->getSrc(1)->getType()));
+ Str << "\t"
+ "movd\t";
+ this->getSrc(0)->emit(Func);
+ Str << ", ";
+ this->getSrc(1)->emit(Func);
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreD::emitIAS(const Cfg *Func) const {
+ Assembler *Asm = Func->getAssembler<Assembler>();
+ assert(this->getSrcSize() == 2);
+ const auto *SrcVar = llvm::cast<Variable>(this->getSrc(0));
+ const auto DestMem = llvm::cast<X86OperandMem>(this->getSrc(1));
+ assert(DestMem->getSegmentRegister() == X86OperandMem::DefaultSegment);
+ assert(SrcVar->hasReg());
+ auto *Target = InstX86Base::getTarget(Func);
+ Asm->movd(SrcVar->getType(), DestMem->toAsmAddress(Asm, Target),
+ Traits::getEncodedXmm(SrcVar->getRegNum()));
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreD::dump(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrDump();
+ Str << "stored." << this->getSrc(0)->getType() << " ";
+ this->getSrc(1)->dump(Func);
+ Str << ", ";
+ this->getSrc(0)->dump(Func);
+}
+
+template <typename TraitsType>
void InstImpl<TraitsType>::InstX86Lea::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
@@ -2279,7 +2327,8 @@
void InstImpl<TraitsType>::InstX86Movq::emitIAS(const Cfg *Func) const {
assert(this->getSrcSize() == 1);
assert(this->getDest()->getType() == IceType_i64 ||
- this->getDest()->getType() == IceType_f64);
+ this->getDest()->getType() == IceType_f64 ||
+ isVectorType(this->getDest()->getType()));
const Variable *Dest = this->getDest();
const Operand *Src = this->getSrc(0);
static const XmmEmitterMovOps Emitter = {&Assembler::movq, &Assembler::movq,
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index 4b2fbc4..3acfbf4 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -60,7 +60,10 @@
Sqrt,
Stacksave,
Stackrestore,
- Trap
+ Trap,
+ // The intrinsics below are not part of the PNaCl specification.
+ LoadSubVector,
+ StoreSubVector
};
/// Operations that can be represented by the AtomicRMW intrinsic.
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 7564652..4577997 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -5279,6 +5279,14 @@
case Intrinsics::Trap:
_trap();
return;
+ case Intrinsics::LoadSubVector: {
+ UnimplementedLoweringError(this, Instr);
+ return;
+ }
+ case Intrinsics::StoreSubVector: {
+ UnimplementedLoweringError(this, Instr);
+ return;
+ }
case Intrinsics::UnknownIntrinsic:
Func->setError("Should not be lowering UnknownIntrinsic");
return;
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 9b71d46..56e0679 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -4043,6 +4043,14 @@
_teq(getZero(), getZero(), TrapCodeZero);
return;
}
+ case Intrinsics::LoadSubVector: {
+ UnimplementedLoweringError(this, Instr); // Not required for PNaCl
+ return;
+ }
+ case Intrinsics::StoreSubVector: {
+ UnimplementedLoweringError(this, Instr); // Not required for PNaCl
+ return;
+ }
case Intrinsics::UnknownIntrinsic:
Func->setError("Should not be lowering UnknownIntrinsic");
return;
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 940954d..a7c89f9 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -904,10 +904,14 @@
AutoMemorySandboxer<> _(this, &Value, &Mem);
Context.insert<typename Traits::Insts::StoreP>(Value, Mem);
}
- void _storeq(Variable *Value, X86OperandMem *Mem) {
+ void _storeq(Operand *Value, X86OperandMem *Mem) {
AutoMemorySandboxer<> _(this, &Value, &Mem);
Context.insert<typename Traits::Insts::StoreQ>(Value, Mem);
}
+ void _stored(Operand *Value, X86OperandMem *Mem) {
+ AutoMemorySandboxer<> _(this, &Value, &Mem);
+ Context.insert<typename Traits::Insts::StoreD>(Value, Mem);
+ }
void _sub(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Sub>(Dest, Src0);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 8ecef55..d0a2aa3 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4344,6 +4344,58 @@
case Intrinsics::Trap:
_ud2();
return;
+ case Intrinsics::LoadSubVector: {
+ assert(llvm::isa<ConstantInteger32>(Instr->getArg(0)) &&
+ "LoadSubVector first argument must be a constant");
+ Variable *Dest = Instr->getDest();
+ Type Ty = Dest->getType();
+ auto *SubVectorSize = llvm::dyn_cast<ConstantInteger32>(Instr->getArg(0));
+ Operand *Addr = Instr->getArg(1);
+ X86OperandMem *Src = formMemoryOperand(Addr, Ty);
+ doMockBoundsCheck(Src);
+
+ if (Dest->isRematerializable()) {
+ Context.insert<InstFakeDef>(Dest);
+ return;
+ }
+
+ switch (SubVectorSize->getValue()) {
+ case 4:
+ _movd(Dest, Src);
+ break;
+ case 8:
+ _movq(Dest, Src);
+ break;
+ default:
+ Func->setError("Unexpected size for LoadSubVector");
+ return;
+ }
+ return;
+ }
+ case Intrinsics::StoreSubVector: {
+ assert(llvm::isa<ConstantInteger32>(Instr->getArg(0)) &&
+ "StoreSubVector first argument must be a constant");
+ auto *SubVectorSize = llvm::dyn_cast<ConstantInteger32>(Instr->getArg(0));
+ Operand *Value = Instr->getArg(1);
+ Operand *Addr = Instr->getArg(2);
+ X86OperandMem *NewAddr = formMemoryOperand(Addr, Value->getType());
+ doMockBoundsCheck(NewAddr);
+
+ Value = legalizeToReg(Value);
+
+ switch (SubVectorSize->getValue()) {
+ case 4:
+ _stored(Value, NewAddr);
+ break;
+ case 8:
+ _storeq(Value, NewAddr);
+ break;
+ default:
+ Func->setError("Unexpected size for StoreSubVector");
+ return;
+ }
+ return;
+ }
case Intrinsics::UnknownIntrinsic:
Func->setError("Should not be lowering UnknownIntrinsic");
return;