SpirvShaderDebugger: Implement DebugValue

This debug instruction partially updates a local variable with SSA values.

This is typically used to update a `DebugLocalVariable` of a composite type, which holds structure member offsets from a base address.
To handle these, we allocate shadow memory to hold a copy of the entire variable in contiguous memory and have the `DebugLocalVariable` point to this memory. Whenever we encounter a `DebugValue`, we copy the necessary fields to the
shadow memory.

This approach is simplier and more performant than attempting to fiddle with `vk::dbg::Value`s for each `DebugValue`.

Bug: b/148401179
Change-Id: I6aafce1f7553f4a3a97bbf84e6e9dac97ce71a68
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/48429
Reviewed-by: Jaebaek Seo <jaebaek@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Pipeline/SpirvShaderDebugger.cpp b/src/Pipeline/SpirvShaderDebugger.cpp
index 06f75c9..ea2adc8 100644
--- a/src/Pipeline/SpirvShaderDebugger.cpp
+++ b/src/Pipeline/SpirvShaderDebugger.cpp
@@ -33,6 +33,7 @@
 #	include "spirv-tools/libspirv.h"
 
 #	include <algorithm>
+#	include <queue>
 
 namespace {
 
@@ -52,6 +53,14 @@
 template<typename T>
 using ArgTyT = typename ArgTy<T>::type;
 
+template<typename T>
+T take(std::queue<T> &queue)
+{
+	auto v = queue.front();
+	queue.pop();
+	return v;
+}
+
 }  // anonymous namespace
 
 namespace spvtools {
@@ -256,12 +265,26 @@
 		       kind == Kind::TemplateType;
 	}
 
+	std::pair<const Type *, uint32_t> index(std::queue<uint32_t> &&indices) const
+	{
+		if(indices.size() == 0)
+		{
+			return { this, 0 };
+		}
+		return indexMember(std::move(indices));
+	}
+
 	// sizeInBytes() returns the number of bytes of the given debug type.
 	virtual uint32_t sizeInBytes() const = 0;
 
 	// value() returns a shared pointer to a vk::dbg::Value that views the data
 	// at ptr of this type.
 	virtual std::shared_ptr<vk::dbg::Value> value(void *ptr, bool interleaved) const = 0;
+
+protected:
+	// indexMember() returns the nested inner element debug type and byte offset
+	// from the base of this type, using the list of indices.
+	virtual std::pair<const Type *, uint32_t> indexMember(std::queue<uint32_t> &&) const = 0;
 };
 
 struct CompilationUnit : ObjectImpl<CompilationUnit, Scope, Object::Kind::CompilationUnit>
@@ -286,6 +309,12 @@
 
 	uint32_t sizeInBytes() const override { return size / 8; }
 
+	std::pair<const Type *, uint32_t> indexMember(std::queue<uint32_t> &&) const override
+	{
+		DABORT("indexMember() called on BasicType %s", name.c_str());
+		return {};
+	}
+
 	std::shared_ptr<vk::dbg::Value> value(void *ptr, bool interleaved) const override
 	{
 		switch(encoding)
@@ -397,6 +426,24 @@
 		return numBytes;
 	}
 
+	std::pair<const Type *, uint32_t> indexMember(std::queue<uint32_t> &&indices) const override
+	{
+		std::vector<uint32_t> arrIndices(dimensions.size());
+		for(size_t i = 0; i < dimensions.size(); i++)
+		{
+			arrIndices[i] = take(indices);
+		}
+
+		auto out = base->index(std::move(indices));
+		auto stride = base->sizeInBytes();
+		for(int i = static_cast<int>(dimensions.size()) - 1; i >= 0; i--)
+		{
+			out.second += arrIndices[i] * stride;
+			stride *= dimensions[i];
+		}
+		return out;
+	}
+
 	std::shared_ptr<vk::dbg::Value> value(void *ptr, bool interleaved) const override
 	{
 		auto vc = std::make_shared<vk::dbg::VariableContainer>();
@@ -431,6 +478,14 @@
 		return base->sizeInBytes() * components;
 	}
 
+	std::pair<const Type *, uint32_t> indexMember(std::queue<uint32_t> &&indices) const override
+	{
+		auto idx = take(indices);
+		auto out = base->index(std::move(indices));
+		out.second += base->sizeInBytes() * idx;
+		return out;
+	}
+
 	std::shared_ptr<vk::dbg::Value> value(void *ptr, bool interleaved) const override
 	{
 		const auto elSize = base->sizeInBytes();
@@ -456,6 +511,11 @@
 	std::vector<Type *> paramTys;
 
 	uint32_t sizeInBytes() const override { return 0; }
+	std::pair<const Type *, uint32_t> indexMember(std::queue<uint32_t> &&indices) const override
+	{
+		DABORT("indexMember() called on FunctionType");
+		return {};
+	}
 	std::shared_ptr<vk::dbg::Value> value(void *ptr, bool interleaved) const override { return nullptr; }
 };
 
@@ -487,6 +547,15 @@
 
 	uint32_t sizeInBytes() const override { return size / 8; }
 
+	std::pair<const Type *, uint32_t> indexMember(std::queue<uint32_t> &&indices) const override
+	{
+		auto idx = take(indices);
+		auto member = members[idx];
+		auto out = member->type->index(std::move(indices));
+		out.second += member->offset / 8;
+		return out;
+	}
+
 	std::shared_ptr<vk::dbg::Value> value(void *ptr, bool interleaved) const override
 	{
 		auto vc = std::make_shared<vk::dbg::VariableContainer>();
@@ -520,6 +589,10 @@
 	std::vector<TemplateParameter *> parameters;
 
 	uint32_t sizeInBytes() const override { return target->sizeInBytes(); }
+	std::pair<const Type *, uint32_t> indexMember(std::queue<uint32_t> &&indices) const override
+	{
+		return target->index(std::move(indices));
+	}
 	std::shared_ptr<vk::dbg::Value> value(void *ptr, bool interleaved) const override
 	{
 		return target->value(ptr, interleaved);
@@ -605,7 +678,7 @@
 struct Value : ObjectImpl<Value, Object, Object::Kind::Value>
 {
 	LocalVariable *local = nullptr;
-	sw::SpirvShader::Object::ID variable;
+	sw::SpirvShader::Object::ID value;
 	Expression *expression = nullptr;
 	std::vector<uint32_t> indexes;
 };
@@ -658,6 +731,7 @@
 // sw::SpirvShader::Impl::Debugger
 //
 // Private struct holding debugger information for the SpirvShader.
+// There is an instance of this class per shader program.
 ////////////////////////////////////////////////////////////////////////////////
 struct SpirvShader::Impl::Debugger
 {
@@ -676,6 +750,13 @@
 	void setLocation(EmitState *state, const std::shared_ptr<vk::dbg::File> &, int line, int column);
 	void setLocation(EmitState *state, const std::string &path, int line, int column);
 
+	// foreachLane() calls f for each debugger group representing the SIMD
+	// lanes of execution.
+	// FUNC is a function with the signature:
+	//   (int lane, const Group &group, auto &key)
+	template<typename Key, typename Func>
+	void foreachLane(const Key &key, const debug::Scope *scope, EmitState *state, Func &&f) const;
+
 	// exposeVariable exposes the variable with the given ID to the debugger
 	// using the specified key.
 	template<typename Key>
@@ -705,6 +786,17 @@
 	std::unordered_map<const void *, int> spirvLineMappings;  // instruction pointer to line
 	std::unordered_map<const void *, Object::ID> results;     // instruction pointer to result ID
 
+	// Shadow memory is used to construct a contiguous memory block for local
+	// variables that may be formed from multiple SSA values.
+	struct Shadow
+	{
+		// Offset in the shadow memory allocation for the given local variable.
+		std::unordered_map<debug::LocalVariable *, uint32_t> offsets;
+
+		// Total size of the shadow memory in bytes.
+		uint32_t size;
+	} shadow;
+
 private:
 	// add() registers the debug object with the given id.
 	template<typename ID>
@@ -754,6 +846,7 @@
 // sw::SpirvShader::Impl::Debugger::State
 //
 // State holds the runtime data structures for the shader debug session.
+// There is an instance of this class per shader invocation.
 ////////////////////////////////////////////////////////////////////////////////
 class SpirvShader::Impl::Debugger::State
 {
@@ -800,6 +893,7 @@
 
 	const Debugger *debugger;
 	const std::shared_ptr<vk::dbg::Thread> thread;
+	std::unique_ptr<uint8_t[]> const shadow;
 	std::unordered_map<const debug::Scope *, Scopes> scopes;
 	Scopes globals;                          // Scope for globals.
 	debug::SourceScope *srcScope = nullptr;  // Current source scope.
@@ -821,6 +915,7 @@
 SpirvShader::Impl::Debugger::State::State(const Debugger *debugger, const char *stackBase, vk::dbg::Context::Lock &lock)
     : debugger(debugger)
     , thread(lock.currentThread())
+    , shadow(new uint8_t[debugger->shadow.size])
     , initialThreadDepth(thread->depth())
 {
 	enter(lock, stackBase);
@@ -1371,11 +1466,92 @@
 		case OpenCLDebugInfo100DebugValue:
 			defineOrEmit(insn, pass, [&](debug::Value *value) {
 				value->local = get(debug::LocalVariable::ID(insn.word(5)));
-				value->variable = Object::ID(insn.word(6));
+				value->value = Object::ID(insn.word(6));
 				value->expression = get(debug::Expression::ID(insn.word(7)));
 				for(uint32_t i = 8; i < insn.wordCount(); i++)
 				{
-					value->indexes.push_back(insn.word(i));
+					auto idx = shader->GetConstScalarInt(insn.word(i));
+					value->indexes.push_back(idx);
+				}
+
+				// DebugValue partially updates a DebugLocalVariable with an SSA
+				// value. This is typically used to update a DebugLocalVariable
+				// of a composite type, which holds structure member offsets
+				// from a base address.
+				// To handle these, we allocate shadow memory to hold a copy of
+				// the entire variable in contiguous memory and have the
+				// DebugLocalVariable point to this memory. Whenever we
+				// encounter a DebugValue, we copy the necessary fields to the
+				// shadow memory.
+
+				// type of the full DebugLocalVariable.
+				auto type = value->local->type;
+
+				// base address of the variable.
+				// Start by pointing base to the root of the shadow memory.
+				// This will be offset to the variable, then the member within
+				// the variable below.
+				SIMD::Pointer base(*Pointer<Pointer<Byte>>(state->routine->dbgState + OFFSET(State, shadow)), shadow.size);
+
+				// All variables are considered local, and therefore
+				// interleaved.
+				base = InterleaveByLane(base);
+
+				// Have we already allocated shadow memory for this variable?
+				auto it = shadow.offsets.find(value->local);
+				if(it == shadow.offsets.end())
+				{
+					// No shadow memory has been allocated for this local
+					// variable yet.
+
+					// Allocate the memory for the variable.
+					auto offset = shadow.size;
+					shadow.offsets.emplace(value->local, offset);
+					auto size = type->sizeInBytes() * SIMD::Width;
+					base += offset;
+					shadow.size += size;
+
+					// Expose the variable.
+					auto name = value->local->name.c_str();
+					auto scope = value->local->parent;
+					auto offsets = base.offsets();
+					foreachLane(name, scope, state, [&](int lane, const Group &group, auto &key) {
+						auto ptr = base.base + Extract(offsets, lane);
+						group.putPtr<const char *>(name, ptr, true, value->local->type);
+					});
+				}
+				else
+				{
+					// Shadow memory already allocated for this variable.
+					// Offset base to point to it.
+					base += it->second;
+				}
+
+				// Find the byte offset on the indexed member of the variable.
+				std::queue<uint32_t> indices;
+				for(auto idx : value->indexes)
+				{
+					indices.emplace(idx);
+				}
+				auto offset = type->index(std::move(indices)).second;
+
+				// Update base to point to the particular member.
+				base += offset;
+
+				// Now copy the updated value into shadow memory representation
+				// of the variable.
+				// TODO(b/148401179): This assumes tight packing of all
+				// components, which may not match with the debug structure
+				// layout.
+				auto &valObject = shader->getObject(value->value);
+				auto &valType = shader->getType(valObject);
+				for(auto i = 0u; i < valType.componentCount; i++)
+				{
+					auto val = Operand(shader, state, value->value).Int(i);
+					auto dst = base + i * sizeof(uint32_t) * SIMD::Width;
+					// Use RobustBufferAccess as the size as described by the
+					// debug type may be smaller than the true SSA size.
+					dst.Store(val, sw::OutOfBoundsBehavior::RobustBufferAccess, state->activeLaneMask());
 				}
 			});
 			break;
@@ -1505,6 +1681,22 @@
 	return ptr;
 }
 
+template<typename Key, typename Func>
+void SpirvShader::Impl::Debugger::foreachLane(
+    const Key &key,
+    const debug::Scope *scope,
+    EmitState *state,
+    Func &&f) const
+{
+	auto dbgState = state->routine->dbgState;
+	auto hover = Group::hovers(dbgState, scope).group<Key>(key);
+	for(int lane = 0; lane < SIMD::Width; lane++)
+	{
+		f(lane, Group::localsLane(dbgState, scope, lane), key);
+		f(lane, hover, laneNames[lane]);
+	}
+}
+
 template<typename Key>
 void SpirvShader::Impl::Debugger::exposeVariable(
     const SpirvShader *shader,
@@ -1514,13 +1706,9 @@
     Object::ID id,
     EmitState *state) const
 {
-	auto dbgState = state->routine->dbgState;
-	auto hover = Group::hovers(dbgState, scope).group<Key>(key);
-	for(int lane = 0; lane < SIMD::Width; lane++)
-	{
-		exposeVariable(shader, Group::localsLane(dbgState, scope, lane), lane, key, type, id, state);
-		exposeVariable(shader, hover, lane, laneNames[lane], type, id, state);
-	}
+	foreachLane(key, scope, state, [&](int lane, const Group &group, auto &key) {
+		exposeVariable(shader, group, lane, laneNames[lane], type, id, state);
+	});
 }
 
 template<typename Key>