SpirvShader: Rework CFG traversal.

The previous traversal could create huge pending lists of duplicate-blocks, to just skip over them.
This was due a couple of factors, including BFS traversal and always appending the downstream blocks to the queue.

There's no particular reason to do BFS traversal, so perform DFS traversal instead, and check the downstream blocks haven't been processed already before enquing.

Fixes pending lists reaching counts as high as 1e7 for tests such as dEQP-VK.spirv_assembly.instruction.compute.opphi.nested -
making these tests much faster to pass and drastically reducing peak memory usage.

Bug: b/135512559
Change-Id: Ibbfa03e582f08d6b41e6c8496d05b8b4027b31b6
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/32951
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 5d2b2da..80d951d 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -29,6 +29,8 @@
 #include <spirv/unified1/spirv.hpp>
 #include <spirv/unified1/GLSL.std.450.h>
 
+#include <queue>
+
 namespace
 {
 	constexpr float PI = 3.141592653589793f;
@@ -1980,20 +1982,38 @@
 	{
 		auto oldPending = state->pending;
 
-		std::queue<Block::ID> pending;
+		std::deque<Block::ID> pending;
 		state->pending = &pending;
-		pending.push(id);
+		pending.push_front(id);
 		while (pending.size() > 0)
 		{
 			auto id = pending.front();
-			pending.pop();
 
 			auto const &block = getBlock(id);
 			if (id == ignore)
 			{
+				pending.pop_front();
 				continue;
 			}
 
+			// Ensure all dependency blocks have been generated.
+			auto depsDone = true;
+			ForeachBlockDependency(id, [&](Block::ID dep)
+			{
+				if (state->visited.count(dep) == 0)
+				{
+					state->pending->push_front(dep);
+					depsDone = false;
+				}
+			});
+
+			if (!depsDone)
+			{
+				continue;
+			}
+
+			pending.pop_front();
+
 			state->currentBlock = id;
 
 			switch (block.kind)
@@ -2036,29 +2056,24 @@
 		}
 	}
 
+	void SpirvShader::ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const
+	{
+		auto block = getBlock(blockId);
+		for (auto dep : block.ins)
+		{
+			if (block.kind != Block::Loop ||                 // if not a loop...
+				!existsPath(blockId, dep, block.mergeBlock)) // or a loop and not a loop back edge
+			{
+				f(dep);
+			}
+		}
+	}
+
 	void SpirvShader::EmitNonLoop(EmitState *state) const
 	{
 		auto blockId = state->currentBlock;
 		auto block = getBlock(blockId);
 
-		// Ensure all incoming blocks have been generated.
-		auto depsDone = true;
-		for (auto in : block.ins)
-		{
-			if (state->visited.count(in) == 0)
-			{
-				state->pending->emplace(in);
-				depsDone = false;
-			}
-		}
-
-		if (!depsDone)
-		{
-			// come back to this once the dependencies have been generated
-			state->pending->emplace(blockId);
-			return;
-		}
-
 		if (!state->visited.emplace(blockId).second)
 		{
 			return; // Already generated this block.
@@ -2080,7 +2095,10 @@
 
 		for (auto out : block.outs)
 		{
-			state->pending->emplace(out);
+			if (state->visited.count(out) == 0)
+			{
+				state->pending->push_back(out);
+			}
 		}
 	}
 
@@ -2091,27 +2109,6 @@
 		auto mergeBlockId = block.mergeBlock;
 		auto &mergeBlock = getBlock(mergeBlockId);
 
-		// Ensure all incoming non-back edge blocks have been generated.
-		auto depsDone = true;
-		for (auto in : block.ins)
-		{
-			if (state->visited.count(in) == 0)
-			{
-				if (!existsPath(blockId, in, mergeBlockId)) // if not a loop back edge
-				{
-					state->pending->emplace(in);
-					depsDone = false;
-				}
-			}
-		}
-
-		if (!depsDone)
-		{
-			// come back to this once the dependencies have been generated
-			state->pending->emplace(blockId);
-			return;
-		}
-
 		if (!state->visited.emplace(blockId).second)
 		{
 			return; // Already emitted this loop.
@@ -2260,7 +2257,7 @@
 
 		// Continue emitting from the merge block.
 		Nucleus::setInsertBlock(mergeBasicBlock);
-		state->pending->emplace(mergeBlockId);
+		state->pending->push_back(mergeBlockId);
 		for (auto it : mergeActiveLaneMasks)
 		{
 			state->addActiveLaneMaskEdge(it.first, mergeBlockId, it.second);
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 67ce732..b692254 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -34,7 +34,7 @@
 #include <cstring>
 #include <functional>
 #include <memory>
-#include <queue>
+#include <deque>
 #include <string>
 #include <type_traits>
 #include <unordered_map>
@@ -888,7 +888,7 @@
 			Block::ID currentBlock; // The current block being built.
 			Block::Set visited; // Blocks already built.
 			std::unordered_map<Block::Edge, RValue<SIMD::Int>, Block::Edge::Hash> edgeActiveLaneMasks;
-			std::queue<Block::ID> *pending;
+			std::deque<Block::ID> *pending;
 
 			const vk::DescriptorSet::Bindings &descriptorSets;
 		};
@@ -910,7 +910,12 @@
 		// Asserts if from is reachable and the edge does not exist.
 		RValue<SIMD::Int> GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const;
 
-		// Emit all the unvisited blocks (except for ignore) in BFS order,
+		// ForeachBlockDependency calls f with each dependency of the given
+		// block. A dependency is an incoming block that is not a loop-back
+		// edge.
+		void ForeachBlockDependency(Block::ID blockId, std::function<void(Block::ID)> f) const;
+
+		// Emit all the unvisited blocks (except for ignore) in DFS order,
 		// starting with id.
 		void EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore = 0) const;
 		void EmitNonLoop(EmitState *state) const;