Emulate vectors shorter than 128-bit.

Bug swiftshader:15

Change-Id: I065c52711719b12fe55cf0190e735240c008629e
Reviewed-on: https://swiftshader-review.googlesource.com/7712
Reviewed-by: Nicolas Capens <capn@google.com>
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-on: https://swiftshader-review.googlesource.com/7390
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Reactor/Main.cpp b/src/Reactor/Main.cpp
index f12b9ac..cafb363 100644
--- a/src/Reactor/Main.cpp
+++ b/src/Reactor/Main.cpp
@@ -33,7 +33,7 @@
 	return sum;
 }
 
-TEST(SubzeroReactorSample, SubzeroReactor)
+TEST(SubzeroReactorTest, Sample)
 {
 	Routine *routine = nullptr;
 
@@ -73,6 +73,66 @@
 	delete routine;
 }
 
+TEST(SubzeroReactorTest, SubVectorLoadStore)
+{
+	Routine *routine = nullptr;
+
+	{
+		Function<Int(Pointer<Byte>, Pointer<Byte>)> function;
+		{
+			Pointer<Byte> in = function.Arg<0>();
+			Pointer<Byte> out = function.Arg<1>();
+
+			*Pointer<Int4>(out + 16 * 0)   = *Pointer<Int4>(in + 16 * 0);
+			*Pointer<Short4>(out + 16 * 1) = *Pointer<Short4>(in + 16 * 1);
+			*Pointer<Byte8>(out + 16 * 2)  = *Pointer<Byte8>(in + 16 * 2);
+			*Pointer<Byte4>(out + 16 * 3)  = *Pointer<Byte4>(in + 16 * 3);
+			*Pointer<Short2>(out + 16 * 4) = *Pointer<Short2>(in + 16 * 4);
+   
+			Return(0);
+		}
+
+		routine = function(L"one");
+
+		if(routine)
+		{
+			int8_t in[16 * 5] = {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
+			                     17, 18, 19, 20, 21, 22, 23, 24,  0,  0,  0,  0,  0,  0,  0,  0,
+			                     25, 26, 27, 28, 29, 30, 31, 32,  0,  0,  0,  0,  0,  0,  0,  0,
+			                     33, 34, 35, 36,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+			                     37, 38, 39, 40,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0};
+
+			int8_t out[16 * 5] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+			                      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+			                      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+			                      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+			                      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
+		
+			int (*callable)(void*, void*) = (int(*)(void*,void*))routine->getEntry();
+			callable(in, out);
+
+			for(int row = 0; row < 5; row++)
+			{
+				for(int col = 0; col < 16; col++)
+				{
+					int i = row * 16 + col;
+
+					if(in[i] ==  0)
+					{
+						EXPECT_EQ(out[i], -1) << "Row " << row << " column " << col <<  " not left untouched.";
+					}
+					else
+					{
+						EXPECT_EQ(out[i], in[i]) << "Row " << row << " column " << col << " not equal to input.";
+					}
+				}
+			}
+		}
+	}
+
+	delete routine;
+}
+
 int main(int argc, char **argv)
 {
 	::testing::InitGoogleTest(&argc, argv);
diff --git a/src/Reactor/Subzero.vcxproj b/src/Reactor/Subzero.vcxproj
index 2beadcc..02bea0e 100644
--- a/src/Reactor/Subzero.vcxproj
+++ b/src/Reactor/Subzero.vcxproj
@@ -254,6 +254,8 @@
     <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceTypes.h" />

     <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceUtils.h" />

     <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceVariableSplitting.h" />

+    <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceTargetLoweringX8664.h" />

+    <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceTargetLoweringX8664Traits.h" />

   </ItemGroup>

   <ItemGroup>

     <None Include="$(SolutionDir)third_party\pnacl-subzero\src\IceClFlags.def" />

diff --git a/src/Reactor/Subzero.vcxproj.filters b/src/Reactor/Subzero.vcxproj.filters
index fb90d91..6bdcac7 100644
--- a/src/Reactor/Subzero.vcxproj.filters
+++ b/src/Reactor/Subzero.vcxproj.filters
@@ -269,6 +269,12 @@
     <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceVariableSplitting.h">

       <Filter>Header Files</Filter>

     </ClInclude>

+    <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceTargetLoweringX8664Traits.h">

+      <Filter>Header Files</Filter>

+    </ClInclude>

+    <ClInclude Include="$(SolutionDir)third_party\pnacl-subzero\src\IceTargetLoweringX8664.h">

+      <Filter>Header Files</Filter>

+    </ClInclude>

   </ItemGroup>

   <ItemGroup>

     <None Include="$(SolutionDir)third_party\pnacl-subzero\src\IceClFlags.def">

diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index c0778af..ee3b323 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -52,13 +52,29 @@
 
 namespace sw
 {
+	enum EmulatedType
+	{
+		EmulatedShift = 16,
+		EmulatedV2 = 2 << EmulatedShift,
+		EmulatedV4 = 4 << EmulatedShift,
+		EmulatedV8 = 8 << EmulatedShift,
+		EmulatedBits = EmulatedV2 | EmulatedV4 | EmulatedV8,
+
+		Type_v2i32 = Ice::IceType_v4i32 | EmulatedV2,
+		Type_v4i16 = Ice::IceType_v8i16 | EmulatedV4,
+		Type_v2i16 = Ice::IceType_v8i16 | EmulatedV2,
+		Type_v8i8 =  Ice::IceType_v16i8 | EmulatedV8,
+		Type_v4i8 =  Ice::IceType_v16i8 | EmulatedV4,
+	};
+
 	class Value : public Ice::Variable {};
 	class Constant : public Ice::Constant {};
 	class BasicBlock : public Ice::CfgNode {};
 
 	Ice::Type T(Type *t)
 	{
-		return (Ice::Type)reinterpret_cast<std::intptr_t>(t);
+		static_assert(Ice::IceType_NUM < EmulatedBits, "Ice::Type overlaps with our emulated types!");
+		return (Ice::Type)(reinterpret_cast<std::intptr_t>(t) & ~EmulatedBits);
 	}
 
 	Type *T(Ice::Type t)
@@ -66,6 +82,11 @@
 		return reinterpret_cast<Type*>(t);
 	}
 
+	Type *T(EmulatedType t)
+	{
+		return reinterpret_cast<Type*>(t);
+	}
+
 	Value *V(Ice::Variable *v)
 	{
 		return reinterpret_cast<Value*>(v);
@@ -461,16 +482,92 @@
 
 	Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
 	{
-		Ice::Variable *value = ::function->makeVariable(T(type));
-		auto load = Ice::InstLoad::create(::function, value, ptr, align);
-		::basicBlock->appendInst(load);
-		return V(value);
+		int valueType = (int)reinterpret_cast<intptr_t>(type);
+		Ice::Variable *result = ::function->makeVariable(T(type));
+
+		if(valueType & EmulatedBits)
+		{
+			switch(valueType)
+			{
+			case Type_v4i8:
+			case Type_v2i16:
+				{
+					const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+					auto target = ::context->getConstantUndef(Ice::IceType_i32);
+					auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+					load->addArg(::context->getConstantInt32(4));
+					load->addArg(ptr);
+					::basicBlock->appendInst(load);
+				}
+				break;
+			case Type_v2i32:
+			case Type_v8i8:
+			case Type_v4i16:
+				{
+					const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::LoadSubVector, Ice::Intrinsics::SideEffects_F, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F};
+					auto target = ::context->getConstantUndef(Ice::IceType_i32);
+					auto load = Ice::InstIntrinsicCall::create(::function, 2, result, target, intrinsic);
+					load->addArg(::context->getConstantInt32(8));
+					load->addArg(ptr);
+					::basicBlock->appendInst(load);
+				}
+				break;
+			default: assert(false && "UNIMPLEMENTED");
+			}
+		}
+		else
+		{
+			auto load = Ice::InstLoad::create(::function, result, ptr, align);
+			::basicBlock->appendInst(load);
+		}
+
+		return V(result);
 	}
 
 	Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
 	{
-		auto store = Ice::InstStore::create(::function, value, ptr, align);
-		::basicBlock->appendInst(store);
+		int valueType = (int)reinterpret_cast<intptr_t>(type);
+
+		if(valueType & EmulatedBits)
+		{
+			switch(valueType)
+			{
+			case Type_v4i8:
+			case Type_v2i16:
+				{
+					const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
+					auto target = ::context->getConstantUndef(Ice::IceType_i32);
+					auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
+					store->addArg(::context->getConstantInt32(4));
+					store->addArg(value);
+					store->addArg(ptr);
+					::basicBlock->appendInst(store);
+				}
+				break;
+			case Type_v2i32:
+			case Type_v8i8:
+			case Type_v4i16:
+				{
+					const Ice::Intrinsics::IntrinsicInfo intrinsic = {Ice::Intrinsics::StoreSubVector, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T};
+					auto target = ::context->getConstantUndef(Ice::IceType_i32);
+					auto store = Ice::InstIntrinsicCall::create(::function, 3, nullptr, target, intrinsic);
+					store->addArg(::context->getConstantInt32(8));
+					store->addArg(value);
+					store->addArg(ptr);
+					::basicBlock->appendInst(store);
+				}
+				break;
+			default: assert(false && "UNIMPLEMENTED");
+			}
+		}
+		else
+		{
+			assert(T(value->getType()) == type);
+
+			auto store = Ice::InstStore::create(::function, value, ptr, align);
+			::basicBlock->appendInst(store);
+		}
+
 		return value;
 	}
 
@@ -505,7 +602,7 @@
 
 	static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
 	{
-		if(T(v->getType()) == destType)
+		if(v->getType() == T(destType))
 		{
 			return v;
 		}
@@ -1955,7 +2052,7 @@
 
 	Type *Byte4::getType()
 	{
-		assert(false && "UNIMPLEMENTED"); return nullptr;
+		return T(Type_v4i8);
 	}
 
 	Type *SByte4::getType()
@@ -2178,7 +2275,7 @@
 
 	Type *Byte8::getType()
 	{
-		assert(false && "UNIMPLEMENTED"); return nullptr;
+		return T(Type_v8i8);
 	}
 
 	SByte8::SByte8()
@@ -2442,12 +2539,12 @@
 
 	Type *Byte16::getType()
 	{
-		assert(false && "UNIMPLEMENTED"); return nullptr;
+		return T(Ice::IceType_v16i8);
 	}
 
 	Type *SByte16::getType()
 	{
-		assert(false && "UNIMPLEMENTED"); return nullptr;
+		return T(Ice::IceType_v16i8);
 	}
 
 	Short2::Short2(RValue<Short4> cast)
@@ -2457,7 +2554,7 @@
 
 	Type *Short2::getType()
 	{
-		assert(false && "UNIMPLEMENTED"); return nullptr;
+		return T(Type_v2i16);
 	}
 
 	UShort2::UShort2(RValue<UShort4> cast)
@@ -2467,7 +2564,7 @@
 
 	Type *UShort2::getType()
 	{
-		assert(false && "UNIMPLEMENTED"); return nullptr;
+		return T(Type_v2i16);
 	}
 
 	Short4::Short4(RValue<Int> cast)
@@ -2821,7 +2918,7 @@
 
 	Type *Short4::getType()
 	{
-		assert(false && "UNIMPLEMENTED"); return nullptr;
+		return T(Type_v4i16);
 	}
 
 	UShort4::UShort4(RValue<Int4> cast)
@@ -3057,7 +3154,7 @@
 
 	Type *UShort4::getType()
 	{
-		assert(false && "UNIMPLEMENTED"); return nullptr;
+		return T(Type_v4i16);
 	}
 
 	Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
@@ -4797,7 +4894,7 @@
 
 	Type *Int4::getType()
 	{
-		assert(false && "UNIMPLEMENTED"); return nullptr;
+		return T(Ice::IceType_v4i32);
 	}
 
 	UInt4::UInt4(RValue<Float4> cast)