Implement floating-point rounding intrinsic.
BUG=swiftshader:15
Change-Id: I8e53f2fdb8208f8be0f4cdff3241b4a5efe9bc8a
Reviewed-on: https://chromium-review.googlesource.com/404352
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 5cd4faf..96b1df4 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -511,7 +511,9 @@
kRoundUp = 0x2,
kRoundToZero = 0x3
};
- void roundsd(XmmRegister dst, XmmRegister src, RoundingMode mode);
+ void round(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mode);
+ void round(Type Ty, XmmRegister dst, const Address &src,
+ const Immediate &mode);
//----------------------------------------------------------------------------
//
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index 0abf587..655e5ad 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -2392,17 +2392,58 @@
}
template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::roundsd(XmmRegister dst, XmmRegister src,
- RoundingMode mode) {
+void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
+ XmmRegister src,
+ const Immediate &mode) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F);
emitUint8(0x3A);
- emitUint8(0x0B);
+ switch (Ty) {
+ case IceType_v4f32:
+ emitUint8(0x08);
+ break;
+ case IceType_f32:
+ emitUint8(0x0A);
+ break;
+ case IceType_f64:
+ emitUint8(0x0B);
+ break;
+ default:
+ assert(false && "Unsupported round operand type");
+ }
emitXmmRegisterOperand(dst, src);
// Mask precision exeption.
- emitUint8(static_cast<uint8_t>(mode) | 0x8);
+ emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
+}
+
+template <typename TraitsType>
+void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
+ const Address &src,
+ const Immediate &mode) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x66);
+ emitAddrSizeOverridePrefix();
+ emitRex(RexTypeIrrelevant, src, dst);
+ emitUint8(0x0F);
+ emitUint8(0x3A);
+ switch (Ty) {
+ case IceType_v4f32:
+ emitUint8(0x08);
+ break;
+ case IceType_f32:
+ emitUint8(0x0A);
+ break;
+ case IceType_f64:
+ emitUint8(0x0B);
+ break;
+ default:
+ assert(false && "Unsupported round operand type");
+ }
+ emitOperand(gprEncoding(dst), src);
+ // Mask precision exeption.
+ emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
}
template <typename TraitsType>
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 489ffea..62d6c61 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -164,6 +164,7 @@
Pxor,
Ret,
Rol,
+ Round,
Sar,
Sbb,
SbbRMW,
@@ -2564,6 +2565,25 @@
InstX86Cvt(Cfg *Func, Variable *Dest, Operand *Source, CvtVariant Variant);
};
+ /// Round instruction
+ class InstX86Round final
+ : public InstX86BaseThreeAddressop<InstX86Base::Round> {
+ public:
+ static InstX86Round *create(Cfg *Func, Variable *Dest, Operand *Source,
+ Operand *Imm) {
+ return new (Func->allocate<InstX86Round>())
+ InstX86Round(Func, Dest, Source, Imm);
+ }
+
+ void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
+
+ private:
+ InstX86Round(Cfg *Func, Variable *Dest, Operand *Source, Operand *Imm)
+ : InstX86BaseThreeAddressop<InstX86Base::Round>(Func, Dest, Source,
+ Imm) {}
+ };
+
/// cmp - Integer compare instruction.
class InstX86Icmp final : public InstX86Base {
InstX86Icmp() = delete;
@@ -3229,6 +3249,7 @@
using Cmpxchg = typename InstImpl<TraitsType>::InstX86Cmpxchg;
using Cmpxchg8b = typename InstImpl<TraitsType>::InstX86Cmpxchg8b;
using Cvt = typename InstImpl<TraitsType>::InstX86Cvt;
+ using Round = typename InstImpl<TraitsType>::InstX86Round;
using Icmp = typename InstImpl<TraitsType>::InstX86Icmp;
using Ucomiss = typename InstImpl<TraitsType>::InstX86Ucomiss;
using UD2 = typename InstImpl<TraitsType>::InstX86UD2;
@@ -3494,6 +3515,9 @@
"insertps"; \
template <> \
template <> \
+ const char *InstImpl<TraitsType>::InstX86Round::Base::Opcode = "round"; \
+ template <> \
+ template <> \
const char *InstImpl<TraitsType>::InstX86Shufps::Base::Opcode = "shufps"; \
template <> \
template <> \
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 8eae1b3..b738a02 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -1767,6 +1767,35 @@
}
template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Round::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(this->getSrcSize() == 3);
+ Str << "\t" << this->Opcode
+ << Traits::TypeAttributes[this->getDest()->getType()].SpSdString
+ << "\t";
+ this->getSrc(1)->emit(Func);
+ Str << ", ";
+ this->getSrc(0)->emit(Func);
+ Str << ", ";
+ this->getDest()->emit(Func);
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86Round::emitIAS(const Cfg *Func) const {
+ assert(this->getSrcSize() == 2);
+ assert(InstX86Base::getTarget(Func)->getInstructionSet() >= Traits::SSE4_1);
+ const Variable *Dest = this->getDest();
+ Type Ty = Dest->getType();
+ static const ThreeOpImmEmitter<XmmRegister, XmmRegister> Emitter = {
+ &Assembler::round, &Assembler::round};
+ emitIASThreeOpImmOps<XmmRegister, XmmRegister, Traits::getEncodedXmm,
+ Traits::getEncodedXmm>(Func, Ty, Dest, this->getSrc(0),
+ this->getSrc(1), Emitter);
+}
+
+template <typename TraitsType>
void InstImpl<TraitsType>::InstX86Icmp::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index 3e2a738..c696fca 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -68,6 +68,7 @@
MultiplyAddPairs,
MultiplyHighSigned,
MultiplyHighUnsigned,
+ Round,
SignMask,
StoreSubVector,
SubtractSaturateSigned,
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index e6276ce..8e0ce83 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -619,6 +619,10 @@
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Cvt>(Dest, Src0, Variant);
}
+ void _round(Variable *Dest, Operand *Src0, Operand *Imm) {
+ AutoMemorySandboxer<> _(this, &Dest, &Src0);
+ Context.insert<typename Traits::Insts::Round>(Dest, Src0, Imm);
+ }
void _div(Variable *Dest, Operand *Src0, Operand *Src1) {
AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1);
Context.insert<typename Traits::Insts::Div>(Dest, Src0, Src1);
@@ -894,6 +898,10 @@
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Rol>(Dest, Src0);
}
+ void _round(Variable *Dest, Operand *Src, Constant *Imm) {
+ AutoMemorySandboxer<> _(this, &Dest, &Src);
+ Context.insert<typename Traits::Insts::Round>(Dest, Src, Imm);
+ }
X86OperandMem *_sandbox_mem_reference(X86OperandMem *Mem) {
return dispatchToConcrete(&Traits::ConcreteTarget::_sandbox_mem_reference,
std::move(Mem));
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index a5de2ae..3472b00 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4557,6 +4557,21 @@
_movp(Dest, T);
return;
}
+ case Intrinsics::Round: {
+ Variable *Dest = Instr->getDest();
+ Operand *Src = Instr->getArg(0);
+ Operand *Mode = Instr->getArg(1);
+ assert(llvm::isa<ConstantInteger32>(Mode) &&
+ "Round last argument must be a constant");
+ auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem);
+ int32_t Imm = llvm::cast<ConstantInteger32>(Mode)->getValue();
+ (void)Imm;
+ assert(Imm >= 0 && Imm < 4 && "Invalid rounding mode");
+ auto *T = makeReg(Dest->getType());
+ _round(T, SrcRM, Mode);
+ _movp(Dest, T);
+ return;
+ }
default: // UnknownIntrinsic
Func->setError("Unexpected intrinsic");
return;