Emulate vectors shorter than 128-bit.
Bug swiftshader:15
Change-Id: I065c52711719b12fe55cf0190e735240c008629e
Reviewed-on: https://swiftshader-review.googlesource.com/7712
Reviewed-by: Nicolas Capens <capn@google.com>
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-on: https://swiftshader-review.googlesource.com/7390
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Reactor/Main.cpp b/src/Reactor/Main.cpp
index f12b9ac..cafb363 100644
--- a/src/Reactor/Main.cpp
+++ b/src/Reactor/Main.cpp
@@ -33,7 +33,7 @@
return sum;
}
-TEST(SubzeroReactorSample, SubzeroReactor)
+TEST(SubzeroReactorTest, Sample)
{
Routine *routine = nullptr;
@@ -73,6 +73,66 @@
delete routine;
}
+TEST(SubzeroReactorTest, SubVectorLoadStore)
+{
+ Routine *routine = nullptr;
+
+ {
+ Function<Int(Pointer<Byte>, Pointer<Byte>)> function;
+ {
+ Pointer<Byte> in = function.Arg<0>();
+ Pointer<Byte> out = function.Arg<1>();
+
+ *Pointer<Int4>(out + 16 * 0) = *Pointer<Int4>(in + 16 * 0);
+ *Pointer<Short4>(out + 16 * 1) = *Pointer<Short4>(in + 16 * 1);
+ *Pointer<Byte8>(out + 16 * 2) = *Pointer<Byte8>(in + 16 * 2);
+ *Pointer<Byte4>(out + 16 * 3) = *Pointer<Byte4>(in + 16 * 3);
+ *Pointer<Short2>(out + 16 * 4) = *Pointer<Short2>(in + 16 * 4);
+
+ Return(0);
+ }
+
+ routine = function(L"one");
+
+ if(routine)
+ {
+ int8_t in[16 * 5] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 0, 0, 0, 0, 0, 0, 0, 0,
+ 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0,
+ 33, 34, 35, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 37, 38, 39, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ int8_t out[16 * 5] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
+
+ int (*callable)(void*, void*) = (int(*)(void*,void*))routine->getEntry();
+ callable(in, out);
+
+ for(int row = 0; row < 5; row++)
+ {
+ for(int col = 0; col < 16; col++)
+ {
+ int i = row * 16 + col;
+
+ if(in[i] == 0)
+ {
+ EXPECT_EQ(out[i], -1) << "Row " << row << " column " << col << " not left untouched.";
+ }
+ else
+ {
+ EXPECT_EQ(out[i], in[i]) << "Row " << row << " column " << col << " not equal to input.";
+ }
+ }
+ }
+ }
+ }
+
+ delete routine;
+}
+
int main(int argc, char **argv)
{
::testing::InitGoogleTest(&argc, argv);
diff --git a/src/Reactor/Subzero.vcxproj b/src/Reactor/Subzero.vcxproj
index 2beadcc..02bea0e 100644
--- a/src/Reactor/Subzero.vcxproj
+++ b/src/Reactor/Subzero.vcxproj
@@ -254,6 +254,8 @@
<ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceTypes.h" />
<ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceUtils.h" />
<ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceVariableSplitting.h" />
+ <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceTargetLoweringX8664.h" />
+ <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceTargetLoweringX8664Traits.h" />
</ItemGroup>
<ItemGroup>
<None Include="$(SolutionDir)third_party\pnacl-subzero\src\IceClFlags.def" />
diff --git a/src/Reactor/Subzero.vcxproj.filters b/src/Reactor/Subzero.vcxproj.filters
index fb90d91..6bdcac7 100644
--- a/src/Reactor/Subzero.vcxproj.filters
+++ b/src/Reactor/Subzero.vcxproj.filters
@@ -269,6 +269,12 @@
<ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceVariableSplitting.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceTargetLoweringX8664Traits.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceTargetLoweringX8664.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="$(SolutionDir)third_party\pnacl-subzero\src\IceClFlags.def">
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index c0778af..ee3b323 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -52,13 +52,29 @@
namespace sw
{
+ enum EmulatedType
+ {
+ EmulatedShift = 16,
+ EmulatedV2 = 2 << EmulatedShift,
+ EmulatedV4 = 4 << EmulatedShift,
+ EmulatedV8 = 8 << EmulatedShift,
+ EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
+
+ Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
+ Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
+ Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
+ Type_v8i8 = Ice::IceType_v16i8 | EmulatedV8,
+ Type_v4i8 = Ice::IceType_v16i8 | EmulatedV4,
+ };
+
class Value : public Ice::Variable {};
class Constant : public Ice::Constant {};
class BasicBlock : public Ice::CfgNode {};
Ice::Type T(Type *t)
{
- return (Ice::Type)reinterpret_cast<std::intptr_t>(t);
+ static_assert(Ice::IceType_NUM < EmulatedBits, "Ice::Type overlaps with our emulated types!");
+ return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
}
Type *T(Ice::Type t)
@@ -66,6 +82,11 @@
return reinterpret_cast<Type*>(t);
}
+ Type *T(EmulatedType t)
+ {
+ return reinterpret_cast<Type*>(t);
+ }
+
Value *V(Ice::Variable *v)
{
return reinterpret_cast<Value*>(v);
@@ -461,16 +482,92 @@
Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
{
- Ice::Variable *value = ::function->makeVariable(T(type));
- auto load = Ice::InstLoad::create(::function, value, ptr, align);
- ::basicBlock->appendInst(load);
- return V(value);
+ int valueType = (int)reinterpret_cast<intptr_t>(type);
+ Ice::Variable *result = ::function->makeVariable(T(type));
+
+ if(valueType & EmulatedBits)
+ {
+ switch(valueType)
+ {
+ case Type_v4i8:
+ case Type_v2i16:
+ {
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ load->addArg(::context->getConstantInt32(4));
+ load->addArg(ptr);
+ ::basicBlock->appendInst(load);
+ }
+ break;
+ case Type_v2i32:
+ case Type_v8i8:
+ case Type_v4i16:
+ {
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+ load->addArg(::context->getConstantInt32(8));
+ load->addArg(ptr);
+ ::basicBlock->appendInst(load);
+ }
+ break;
+ default: assert(false && "UNIMPLEMENTED");
+ }
+ }
+ else
+ {
+ auto load = Ice::InstLoad::create(::function, result, ptr, align);
+ ::basicBlock->appendInst(load);
+ }
+
+ return V(result);
}
Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
{
- auto store = Ice::InstStore::create(::function, value, ptr, align);
- ::basicBlock->appendInst(store);
+ int valueType = (int)reinterpret_cast<intptr_t>(type);
+
+ if(valueType & EmulatedBits)
+ {
+ switch(valueType)
+ {
+ case Type_v4i8:
+ case Type_v2i16:
+ {
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
+ store->addArg(::context->getConstantInt32(4));
+ store->addArg(value);
+ store->addArg(ptr);
+ ::basicBlock->appendInst(store);
+ }
+ break;
+ case Type_v2i32:
+ case Type_v8i8:
+ case Type_v4i16:
+ {
+ const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
+ auto target = ::context->getConstantUndef(Ice::IceType_i32);
+ auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
+ store->addArg(::context->getConstantInt32(8));
+ store->addArg(value);
+ store->addArg(ptr);
+ ::basicBlock->appendInst(store);
+ }
+ break;
+ default: assert(false && "UNIMPLEMENTED");
+ }
+ }
+ else
+ {
+ assert(T(value->getType()) == type);
+
+ auto store = Ice::InstStore::create(::function, value, ptr, align);
+ ::basicBlock->appendInst(store);
+ }
+
return value;
}
@@ -505,7 +602,7 @@
static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
{
- if(T(v->getType()) == destType)
+ if(v->getType() == T(destType))
{
return v;
}
@@ -1955,7 +2052,7 @@
Type *Byte4::getType()
{
- assert(false && "UNIMPLEMENTED"); return nullptr;
+ return T(Type_v4i8);
}
Type *SByte4::getType()
@@ -2178,7 +2275,7 @@
Type *Byte8::getType()
{
- assert(false && "UNIMPLEMENTED"); return nullptr;
+ return T(Type_v8i8);
}
SByte8::SByte8()
@@ -2442,12 +2539,12 @@
Type *Byte16::getType()
{
- assert(false && "UNIMPLEMENTED"); return nullptr;
+ return T(Ice::IceType_v16i8);
}
Type *SByte16::getType()
{
- assert(false && "UNIMPLEMENTED"); return nullptr;
+ return T(Ice::IceType_v16i8);
}
Short2::Short2(RValue<Short4> cast)
@@ -2457,7 +2554,7 @@
Type *Short2::getType()
{
- assert(false && "UNIMPLEMENTED"); return nullptr;
+ return T(Type_v2i16);
}
UShort2::UShort2(RValue<UShort4> cast)
@@ -2467,7 +2564,7 @@
Type *UShort2::getType()
{
- assert(false && "UNIMPLEMENTED"); return nullptr;
+ return T(Type_v2i16);
}
Short4::Short4(RValue<Int> cast)
@@ -2821,7 +2918,7 @@
Type *Short4::getType()
{
- assert(false && "UNIMPLEMENTED"); return nullptr;
+ return T(Type_v4i16);
}
UShort4::UShort4(RValue<Int4> cast)
@@ -3057,7 +3154,7 @@
Type *UShort4::getType()
{
- assert(false && "UNIMPLEMENTED"); return nullptr;
+ return T(Type_v4i16);
}
Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
@@ -4797,7 +4894,7 @@
Type *Int4::getType()
{
- assert(false && "UNIMPLEMENTED"); return nullptr;
+ return T(Ice::IceType_v4i32);
}
UInt4::UInt4(RValue<Float4> cast)