Separate intermediate values from lvalues

The vast majority of values in a SPIRV program are intermediates -- they
are guaranteed written to exactly once, by the instruction which defines
them.

Initially we had treated these the same as mutable (stack) variables, but
that produces wasteful code full of loads and stores.

Instead, represent intermediate values as a bundle of RValue<Float4>,
representing an rvalue float-sized value per SIMD lane. Introduce the
new type Intermediate to hold these bundles to allow incremental
construction of the individual RValue<Float4> objects within the bundle.

Bug: b/124534397
Change-Id: Ibb663773100d017de117111705b530b092f87ea2
Reviewed-on: https://swiftshader-review.googlesource.com/c/24968
Tested-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 98f4b912..4cb7c4c 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -488,7 +488,6 @@
 
 	Int4 SpirvShader::WalkAccessChain(uint32_t id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
 	{
-		// TODO: think about decorations, to make this work on location based interfaces
 		// TODO: think about explicit layout (UBO/SSBO) storage classes
 		// TODO: avoid doing per-lane work in some cases if we can?
 
@@ -497,8 +496,10 @@
 		auto & baseObject = getObject(id);
 		auto typeId = baseObject.definition.word(1);
 
+		// The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
+		// Start with its offset and build from there.
 		if (baseObject.kind == Object::Kind::Value)
-			dynamicOffset += As<Int4>(routine->getValue(id)[0]);
+			dynamicOffset += As<Int4>(routine->getIntermediate(id)[0]);
 
 		for (auto i = 0u; i < numIndexes; i++)
 		{
@@ -525,7 +526,7 @@
 				if (obj.kind == Object::Kind::Constant)
 					constantOffset += stride * GetConstantInt(indexIds[i]);
 				else
-					dynamicOffset += Int4(stride) * As<Int4>(routine->getValue(indexIds[i])[0]);
+					dynamicOffset += Int4(stride) * As<Int4>(routine->getIntermediate(indexIds[i])[0]);
 				break;
 			}
 
@@ -685,7 +686,7 @@
 				auto &object = getObject(insn.word(2));
 				auto &type = getType(insn.word(1));
 				auto &pointer = getObject(insn.word(3));
-				routine->createLvalue(insn.word(2), type.sizeInComponents);		// TODO: this should be an ssavalue!
+				routine->createIntermediate(insn.word(2), type.sizeInComponents);
 				auto &pointerBase = getObject(pointer.pointerBase);
 
 				if (pointerBase.storageClass == spv::StorageClassImage ||
@@ -696,18 +697,18 @@
 				}
 
 				SpirvRoutine::Value& ptrBase = routine->getValue(pointer.pointerBase);
-				auto & dst = routine->getValue(insn.word(2));
+				auto & dst = routine->getIntermediate(insn.word(2));
 
 				if (pointer.kind == Object::Kind::Value)
 				{
-					auto offsets = As<Int4>(routine->getValue(insn.word(3)));
+					auto offsets = As<Int4>(routine->getIntermediate(insn.word(3))[0]);
 					for (auto i = 0u; i < object.sizeInComponents; i++)
 					{
 						// i wish i had a Float,Float,Float,Float constructor here..
 						Float4 v;
 						for (int j = 0; j < 4; j++)
 							v = Insert(v, Extract(ptrBase[Int(i) + Extract(offsets, j)], j), j);
-						dst[i] = v;
+						dst.emplace(i, v);
 					}
 				}
 				else
@@ -715,7 +716,7 @@
 					// no divergent offsets to worry about
 					for (auto i = 0u; i < object.sizeInComponents; i++)
 					{
-						dst[i] = ptrBase[i];
+						dst.emplace(i, ptrBase[i]);
 					}
 				}
 				break;
@@ -725,7 +726,7 @@
 				auto &object = getObject(insn.word(2));
 				auto &type = getType(insn.word(1));
 				auto &base = getObject(insn.word(3));
-				routine->createLvalue(insn.word(2), type.sizeInComponents);		// TODO: this should be an ssavalue!
+				routine->createIntermediate(insn.word(2), type.sizeInComponents);
 				auto &pointerBase = getObject(object.pointerBase);
 				assert(type.sizeInComponents == 1);
 				assert(base.pointerBase == object.pointerBase);
@@ -737,8 +738,8 @@
 					UNIMPLEMENTED("Descriptor-backed OpAccessChain not yet implemented");
 				}
 
-				auto & dst = routine->getValue(insn.word(2));
-				dst[0] = As<Float4>(WalkAccessChain(insn.word(3), insn.wordCount() - 4, insn.wordPointer(4), routine));
+				auto & dst = routine->getIntermediate(insn.word(2));
+				dst.emplace(0, As<Float4>(WalkAccessChain(insn.word(3), insn.wordCount() - 4, insn.wordPointer(4), routine)));
 				break;
 			}
 			case spv::OpStore:
@@ -755,11 +756,11 @@
 				}
 
 				SpirvRoutine::Value& ptrBase = routine->getValue(pointer.pointerBase);
-				auto & src = routine->getValue(insn.word(2));;
+				auto & src = routine->getIntermediate(insn.word(2));;
 
 				if (pointer.kind == Object::Kind::Value)
 				{
-					auto offsets = As<Int4>(routine->getValue(insn.word(1)));
+					auto offsets = As<Int4>(routine->getIntermediate(insn.word(1))[0]);
 					for (auto i = 0u; i < object.sizeInComponents; i++)
 					{
 						// Scattered store
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 9611403..9fe4745 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -30,24 +30,85 @@
 
 namespace sw
 {
+	// Incrementally constructed complex bundle of rvalues
+	// Effectively a restricted vector, supporting only:
+	// - allocation to a (runtime-known) fixed size
+	// - in-place construction of elements
+	// - const operator[]
+	class Intermediate
+	{
+	public:
+		using Scalar = RValue<Float4>;
+
+		Intermediate(uint32_t size) : contents(new ContentsType[size]), size(size) {}
+
+		~Intermediate()
+		{
+			for (auto i = 0u; i < size; i++)
+				reinterpret_cast<Scalar *>(&contents[i])->~Scalar();
+			delete [] contents;
+		}
+
+		void emplace(uint32_t n, Scalar&& value)
+		{
+			assert(n < size);
+			new (&contents[n]) Scalar(value);
+		}
+
+		Scalar const & operator[](uint32_t n) const
+		{
+			assert(n < size);
+			return *reinterpret_cast<Scalar const *>(&contents[n]);
+		}
+
+		// No copy/move construction or assignment
+		Intermediate(Intermediate const &) = delete;
+		Intermediate(Intermediate &&) = delete;
+		Intermediate & operator=(Intermediate const &) = delete;
+		Intermediate & operator=(Intermediate &&) = delete;
+
+	private:
+		using ContentsType = std::aligned_storage<sizeof(Scalar), alignof(Scalar)>::type;
+
+		ContentsType *contents;
+		uint32_t size;
+	};
+
 	class SpirvRoutine
 	{
 	public:
 		using Value = Array<Float4>;
-		std::unordered_map<uint32_t, std::unique_ptr<Value>> lvalues;
-		std::unique_ptr<Value> inputs = std::unique_ptr<Value>(new Value(MAX_INTERFACE_COMPONENTS));
-		std::unique_ptr<Value> outputs = std::unique_ptr<Value>(new Value(MAX_INTERFACE_COMPONENTS));
+		std::unordered_map<uint32_t, Value> lvalues;
+
+		std::unordered_map<uint32_t, Intermediate> intermediates;
+
+		std::unique_ptr<Value> const inputs = std::unique_ptr<Value>(new Value(MAX_INTERFACE_COMPONENTS));
+		std::unique_ptr<Value> const outputs = std::unique_ptr<Value>(new Value(MAX_INTERFACE_COMPONENTS));
 
 		void createLvalue(uint32_t id, uint32_t size)
 		{
-			lvalues.emplace(id, std::unique_ptr<Value>(new Value(size)));
+			lvalues.emplace(id, Value(size));
+		}
+
+		void createIntermediate(uint32_t id, uint32_t size)
+		{
+			intermediates.emplace(std::piecewise_construct,
+					std::forward_as_tuple(id),
+					std::forward_as_tuple(size));
 		}
 
 		Value& getValue(uint32_t id)
 		{
 			auto it = lvalues.find(id);
 			assert(it != lvalues.end());
-			return *it->second;
+			return it->second;
+		}
+
+		Intermediate& getIntermediate(uint32_t id)
+		{
+			auto it = intermediates.find(id);
+			assert(it != intermediates.end());
+			return it->second;
 		}
 	};