Generalize the Sqrt intrinsic to process vectors.
BUG=swiftshader:15
Change-Id: Ib89d628c85696c20a249b8810cd357a292d10402
Reviewed-on: https://chromium-review.googlesource.com/405293
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 937e997..6633e65 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -467,8 +467,8 @@
void movmsk(Type Ty, GPRRegister dst, XmmRegister src);
- void sqrtss(Type Ty, XmmRegister dst, const Address &src);
- void sqrtss(Type Ty, XmmRegister dst, XmmRegister src);
+ void sqrt(Type Ty, XmmRegister dst, const Address &src);
+ void sqrt(Type Ty, XmmRegister dst, XmmRegister src);
void xorps(Type Ty, XmmRegister dst, const Address &src);
void xorps(Type Ty, XmmRegister dst, XmmRegister src);
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index 77b746d..347a07e 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -1939,10 +1939,11 @@
}
template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrtss(Type Ty, XmmRegister dst,
- const Address &src) {
+void AssemblerX86Base<TraitsType>::sqrt(Type Ty, XmmRegister dst,
+ const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+ if (isScalarFloatingType(Ty))
+ emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, src, dst);
emitUint8(0x0F);
@@ -1951,10 +1952,11 @@
}
template <typename TraitsType>
-void AssemblerX86Base<TraitsType>::sqrtss(Type Ty, XmmRegister dst,
- XmmRegister src) {
+void AssemblerX86Base<TraitsType>::sqrt(Type Ty, XmmRegister dst,
+ XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
+ if (isScalarFloatingType(Ty))
+ emitUint8(isFloat32Asserting32Or64(Ty) ? 0xF3 : 0xF2);
emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F);
emitUint8(0x51);
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index bd10572..52d566c 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -166,7 +166,7 @@
Shr,
Shrd,
Shufps,
- Sqrtss,
+ Sqrt,
Store,
StoreP,
StoreQ,
@@ -1272,18 +1272,17 @@
InstX86Movmsk(Cfg *Func, Variable *Dest, Operand *Source);
};
- class InstX86Sqrtss : public InstX86BaseUnaryopXmm<InstX86Base::Sqrtss> {
+ class InstX86Sqrt : public InstX86BaseUnaryopXmm<InstX86Base::Sqrt> {
public:
- static InstX86Sqrtss *create(Cfg *Func, Variable *Dest, Operand *Src) {
- return new (Func->allocate<InstX86Sqrtss>())
- InstX86Sqrtss(Func, Dest, Src);
+ static InstX86Sqrt *create(Cfg *Func, Variable *Dest, Operand *Src) {
+ return new (Func->allocate<InstX86Sqrt>()) InstX86Sqrt(Func, Dest, Src);
}
virtual void emit(const Cfg *Func) const override;
private:
- InstX86Sqrtss(Cfg *Func, Variable *Dest, Operand *Src)
- : InstX86BaseUnaryopXmm<InstX86Base::Sqrtss>(Func, Dest, Src) {}
+ InstX86Sqrt(Cfg *Func, Variable *Dest, Operand *Src)
+ : InstX86BaseUnaryopXmm<InstX86Base::Sqrt>(Func, Dest, Src) {}
};
/// Move/assignment instruction - wrapper for mov/movss/movsd.
@@ -3028,7 +3027,7 @@
using Movzx = typename InstImpl<TraitsType>::InstX86Movzx;
using Movd = typename InstImpl<TraitsType>::InstX86Movd;
using Movmsk = typename InstImpl<TraitsType>::InstX86Movmsk;
- using Sqrtss = typename InstImpl<TraitsType>::InstX86Sqrtss;
+ using Sqrt = typename InstImpl<TraitsType>::InstX86Sqrt;
using Mov = typename InstImpl<TraitsType>::InstX86Mov;
using Movp = typename InstImpl<TraitsType>::InstX86Movp;
using Movq = typename InstImpl<TraitsType>::InstX86Movq;
@@ -3169,7 +3168,7 @@
const char *InstImpl<TraitsType>::InstX86Movzx::Base::Opcode = "movz"; \
template <> \
template <> \
- const char *InstImpl<TraitsType>::InstX86Sqrtss::Base::Opcode = "sqrtss"; \
+ const char *InstImpl<TraitsType>::InstX86Sqrt::Base::Opcode = "sqrt"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Cbwdq::Base::Opcode = \
@@ -3436,9 +3435,9 @@
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
- InstImpl<TraitsType>::InstX86Sqrtss::Base::Emitter = { \
- &InstImpl<TraitsType>::Assembler::sqrtss, \
- &InstImpl<TraitsType>::Assembler::sqrtss}; \
+ InstImpl<TraitsType>::InstX86Sqrt::Base::Emitter = { \
+ &InstImpl<TraitsType>::Assembler::sqrt, \
+ &InstImpl<TraitsType>::Assembler::sqrt}; \
\
/* Binary GPR ops */ \
template <> \
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 88c0272..c06b256 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -1074,7 +1074,7 @@
}
template <typename TraitsType>
-void InstImpl<TraitsType>::InstX86Sqrtss::emit(const Cfg *Func) const {
+void InstImpl<TraitsType>::InstX86Sqrt::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
@@ -1082,7 +1082,8 @@
Type Ty = this->getSrc(0)->getType();
assert(isScalarFloatingType(Ty));
Str << "\t"
- "sqrt" << Traits::TypeAttributes[Ty].SdSsString << "\t";
+ "sqrt"
+ << Traits::TypeAttributes[Ty].SpSdString << "\t";
this->getSrc(0)->emit(Func);
Str << ", ";
this->getDest()->emit(Func);
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index a2f1123..45cab9d 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -5264,6 +5264,8 @@
llvm::report_fatal_error("setjmp should have been prelowered.");
}
case Intrinsics::Sqrt: {
+ assert(isScalarFloatingType(Dest->getType()) ||
+ getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
Variable *Src = legalizeToReg(Instr->getArg(0));
Variable *T = makeReg(Dest->getType());
_vsqrt(T, Src);
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 3bc60c8..a9e59e2 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -4422,6 +4422,9 @@
_sqrt_d(T, legalizeToReg(Instr->getArg(0)));
}
_mov(Dest, T);
+ } else {
+ assert(getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
+ UnimplementedLoweringError(this, Instr); // Not required for PNaCl
}
return;
}
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index 8844519..380ba00 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -949,7 +949,7 @@
const char *CvtString; // i (integer), s (single FP), d (double FP)
const char *SdSsString; // ss, sd, or <blank>
const char *PdPsString; // ps, pd, or <blank>
- const char *SpsdString; // ss, sd, ps, pd, or <blank>
+ const char *SpSdString; // ss, sd, ps, pd, or <blank>
const char *IntegralString; // b, w, d, or <blank>
const char *UnpackString; // bw, wd, dq, or <blank>
const char *PackString; // wb, dw, or <blank>
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index fa0b9f1..da1fff4 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -909,9 +909,9 @@
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Movmsk>(Dest, Src0);
}
- void _sqrtss(Variable *Dest, Operand *Src0) {
+ void _sqrt(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
- Context.insert<typename Traits::Insts::Sqrtss>(Dest, Src0);
+ Context.insert<typename Traits::Insts::Sqrt>(Dest, Src0);
}
void _store(Operand *Value, X86Operand *Mem) {
AutoMemorySandboxer<> _(this, &Value, &Mem);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 1ac77c4..e0739df 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4349,10 +4349,12 @@
return;
}
case Intrinsics::Sqrt: {
+ assert(isScalarFloatingType(Instr->getDest()->getType()) ||
+ getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
Operand *Src = legalize(Instr->getArg(0));
Variable *Dest = Instr->getDest();
Variable *T = makeReg(Dest->getType());
- _sqrtss(T, Src);
+ _sqrt(T, Src);
_mov(Dest, T);
return;
}
diff --git a/unittest/AssemblerX8632/XmmArith.cpp b/unittest/AssemblerX8632/XmmArith.cpp
index 1c85e2b..d21f153 100644
--- a/unittest/AssemblerX8632/XmmArith.cpp
+++ b/unittest/AssemblerX8632/XmmArith.cpp
@@ -1663,8 +1663,8 @@
\
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
- __ sqrtss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
- XmmRegister::Encoded_Reg_##Src); \
+ __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
+ XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, test##Size##SrcValue); \
@@ -1686,8 +1686,8 @@
const uint32_t T1 = allocateDqword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
- __ sqrtss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
- dwordAddress(T0)); \
+ __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
+ dwordAddress(T0)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, test##Size##SrcValue); \
diff --git a/unittest/AssemblerX8664/XmmArith.cpp b/unittest/AssemblerX8664/XmmArith.cpp
index 4ceed00..72c6730 100644
--- a/unittest/AssemblerX8664/XmmArith.cpp
+++ b/unittest/AssemblerX8664/XmmArith.cpp
@@ -1752,7 +1752,7 @@
\
__ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
__ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
- __ sqrtss(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, test##Size##SrcValue); \
@@ -1774,7 +1774,7 @@
const uint32_t T1 = allocateDqword(); \
\
__ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
- __ sqrtss(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, test##Size##SrcValue); \