SpirvShader: Add PhysicalPointers

And add load / store paths to deal with data layouts that are not interleaved by lane.

Bug: b/126330097

Change-Id: Id7730a606d623ffe74ae57668216f05159972f10
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/25710
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index d5ff62d..ff5d7d6 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -542,6 +542,18 @@
 		}
 	}
 
+	bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
+	{
+		switch (storageClass)
+		{
+		case spv::StorageClassUniform:
+		case spv::StorageClassStorageBuffer:
+			return false;
+		default:
+			return true;
+		}
+	}
+
 	template<typename F>
 	int SpirvShader::VisitInterfaceInner(TypeID id, Decorations d, F f) const
 	{
@@ -1056,12 +1068,24 @@
 			UNIMPLEMENTED("Descriptor-backed load not yet implemented");
 		}
 
-		auto &ptrBase = routine->getValue(pointer.pointerBase);
+		Pointer<Float> ptrBase;
+		if (pointerBase.kind == Object::Kind::PhysicalPointer)
+		{
+			ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
+		}
+		else
+		{
+			ptrBase = &routine->getValue(pointer.pointerBase)[0];
+		}
+
+		bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
+
 		auto &dst = routine->createIntermediate(objectId, objectTy.sizeInComponents);
 
 		if (pointer.kind == Object::Kind::Value)
 		{
-			auto offsets = As<SIMD::Int>(routine->getIntermediate(insn.word(3))[0]);
+			// Divergent offsets.
+			auto offsets = As<SIMD::Int>(routine->getIntermediate(pointerId)[0]);
 			for (auto i = 0u; i < objectTy.sizeInComponents; i++)
 			{
 				// i wish i had a Float,Float,Float,Float constructor here..
@@ -1069,17 +1093,27 @@
 				for (int j = 0; j < SIMD::Width; j++)
 				{
 					Int offset = Int(i) + Extract(offsets, j);
-					v = Insert(v, Extract(ptrBase[offset], j), j);
+					if (interleavedByLane) { offset = offset * SIMD::Width + j; }
+					v = Insert(v, ptrBase[offset], j);
 				}
 				dst.emplace(i, v);
 			}
 		}
-		else
+		else if (interleavedByLane)
 		{
-			// no divergent offsets to worry about
+			// Lane-interleaved data. No divergent offsets.
+			Pointer<SIMD::Float> src = ptrBase;
 			for (auto i = 0u; i < objectTy.sizeInComponents; i++)
 			{
-				dst.emplace(i, ptrBase[i]);
+				dst.emplace(i, src[i]);
+			}
+		}
+		else
+		{
+			// Non-interleaved data. No divergent offsets.
+			for (auto i = 0u; i < objectTy.sizeInComponents; i++)
+			{
+				dst.emplace(i, RValue<SIMD::Float>(ptrBase[i]));
 			}
 		}
 	}
@@ -1124,7 +1158,17 @@
 			UNIMPLEMENTED("Descriptor-backed store not yet implemented");
 		}
 
-		auto &ptrBase = routine->getValue(pointer.pointerBase);
+		Pointer<Float> ptrBase;
+		if (pointerBase.kind == Object::Kind::PhysicalPointer)
+		{
+			ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
+		}
+		else
+		{
+			ptrBase = &routine->getValue(pointer.pointerBase)[0];
+		}
+
+		bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
 
 		if (object.kind == Object::Kind::Constant)
 		{
@@ -1132,23 +1176,25 @@
 
 			if (pointer.kind == Object::Kind::Value)
 			{
+				// Constant source data. Divergent offsets.
 				auto offsets = As<SIMD::Int>(routine->getIntermediate(pointerId)[0]);
 				for (auto i = 0u; i < elementTy.sizeInComponents; i++)
 				{
-					// Scattered store
 					for (int j = 0; j < SIMD::Width; j++)
 					{
-						auto dst = ptrBase[Int(i) + Extract(offsets, j)];
-						dst = Insert(dst, Float(src[i]), j);
+						Int offset = Int(i) + Extract(offsets, j);
+						if (interleavedByLane) { offset = offset * SIMD::Width + j; }
+						ptrBase[offset] = RValue<Float>(src[i]);
 					}
 				}
 			}
 			else
 			{
-				// no divergent offsets
+				// Constant source data. No divergent offsets.
+				Pointer<SIMD::Float> dst = ptrBase;
 				for (auto i = 0u; i < elementTy.sizeInComponents; i++)
 				{
-					ptrBase[i] = RValue<SIMD::Float>(src[i]);
+					dst[i] = RValue<SIMD::Float>(src[i]);
 				}
 			}
 		}
@@ -1158,23 +1204,34 @@
 
 			if (pointer.kind == Object::Kind::Value)
 			{
+				// Intermediate source data. Divergent offsets.
 				auto offsets = As<SIMD::Int>(routine->getIntermediate(pointerId)[0]);
 				for (auto i = 0u; i < elementTy.sizeInComponents; i++)
 				{
-					// Scattered store
 					for (int j = 0; j < SIMD::Width; j++)
 					{
-						auto dst = ptrBase[Int(i) + Extract(offsets, j)];
-						dst = Insert(dst, Extract(src[i], j), j);
+						Int offset = Int(i) + Extract(offsets, j);
+						if (interleavedByLane) { offset = offset * SIMD::Width + j; }
+						ptrBase[offset] = Extract(src[i], j);
 					}
 				}
 			}
+			else if (interleavedByLane)
+			{
+				// Intermediate source data. Lane-interleaved data. No divergent offsets.
+				Pointer<SIMD::Float> dst = ptrBase;
+				for (auto i = 0u; i < elementTy.sizeInComponents; i++)
+				{
+					dst[i] = src[i];
+				}
+			}
 			else
 			{
-				// no divergent offsets
+				// Intermediate source data. Non-interleaved data. No divergent offsets.
+				Pointer<SIMD::Float> dst = ptrBase;
 				for (auto i = 0u; i < elementTy.sizeInComponents; i++)
 				{
-					ptrBase[i] = src[i];
+					dst[i] = SIMD::Float(src[i]);
 				}
 			}
 		}
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 3b59dfd..4bfdc4a 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -220,10 +220,11 @@
 			enum class Kind
 			{
 				Unknown,        /* for paranoia -- if we get left with an object in this state, the module was broken */
-				Variable,
-				InterfaceVariable,
-				Constant,
-				Value,
+				Variable,          // TODO: Document
+				InterfaceVariable, // TODO: Document
+				Constant,          // Values held by Object::constantValue
+				Value,             // Values held by SpirvRoutine::intermediates
+				PhysicalPointer,   // Pointer held by SpirvRoutine::physicalPointers
 			} kind = Kind::Unknown;
 		};
 
@@ -384,6 +385,34 @@
 		void ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const;
 		void ApplyDecorationsForIdMember(Decorations *d, TypeID id, uint32_t member) const;
 
+		// Returns true if data in the given storage class is word-interleaved
+		// by each SIMD vector lane, otherwise data is linerally stored.
+		//
+		// A 'lane' is a component of a SIMD vector register.
+		// Given 4 consecutive loads/stores of 4 SIMD vector registers:
+		//
+		// "StorageInterleavedByLane":
+		//
+		//  Ptr+0:Reg0.x | Ptr+1:Reg0.y | Ptr+2:Reg0.z | Ptr+3:Reg0.w
+		// --------------+--------------+--------------+--------------
+		//  Ptr+4:Reg1.x | Ptr+5:Reg1.y | Ptr+6:Reg1.z | Ptr+7:Reg1.w
+		// --------------+--------------+--------------+--------------
+		//  Ptr+8:Reg2.x | Ptr+9:Reg2.y | Ptr+a:Reg2.z | Ptr+b:Reg2.w
+		// --------------+--------------+--------------+--------------
+		//  Ptr+c:Reg3.x | Ptr+d:Reg3.y | Ptr+e:Reg3.z | Ptr+f:Reg3.w
+		//
+		// Not "StorageInterleavedByLane":
+		//
+		//  Ptr+0:Reg0.x | Ptr+0:Reg0.y | Ptr+0:Reg0.z | Ptr+0:Reg0.w
+		// --------------+--------------+--------------+--------------
+		//  Ptr+1:Reg1.x | Ptr+1:Reg1.y | Ptr+1:Reg1.z | Ptr+1:Reg1.w
+		// --------------+--------------+--------------+--------------
+		//  Ptr+2:Reg2.x | Ptr+2:Reg2.y | Ptr+2:Reg2.z | Ptr+2:Reg2.w
+		// --------------+--------------+--------------+--------------
+		//  Ptr+3:Reg3.x | Ptr+3:Reg3.y | Ptr+3:Reg3.z | Ptr+3:Reg3.w
+		//
+		static bool IsStorageInterleavedByLane(spv::StorageClass storageClass);
+
 		template<typename F>
 		int VisitInterfaceInner(TypeID id, Decorations d, F f) const;
 
@@ -430,6 +459,8 @@
 
 		std::unordered_map<SpirvShader::ObjectID, Intermediate> intermediates;
 
+		std::unordered_map<SpirvShader::ObjectID, Pointer<Byte> > physicalPointers;
+
 		Value inputs = Value{MAX_INTERFACE_COMPONENTS};
 		Value outputs = Value{MAX_INTERFACE_COMPONENTS};
 
@@ -461,6 +492,13 @@
 			ASSERT(it != intermediates.end());
 			return it->second;
 		}
+
+		Pointer<Byte>& getPhysicalPointer(SpirvShader::ObjectID id)
+		{
+			auto it = physicalPointers.find(id);
+			assert(it != physicalPointers.end());
+			return it->second;
+		}
 	};
 
 	class GenericValue