GLES: Remove hardcoded shader limits
Perform whole-shader analysis to determine usage limits.
Use these limits to allocate compile time and runtime arrays.
Removes the constants:
MAX_SHADER_NESTED_LOOPS
MAX_SHADER_NESTED_IFS
MAX_SHADER_CALL_STACK_SIZE
Also switched to using dynamic containers to remove the MAX_SHADER_CALL_SITES limit, which I believe to have been buggy and broken.
Bug: b/123587120
Change-Id: I89be80072183ac2aac28124df236888309e7207c
Reviewed-on: https://swiftshader-review.googlesource.com/c/24668
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
diff --git a/src/Common/Types.hpp b/src/Common/Types.hpp
index fac6d36..7259b20 100644
--- a/src/Common/Types.hpp
+++ b/src/Common/Types.hpp
@@ -152,11 +152,13 @@
return v;
}
- template <int limit> class BoundedIndex
+ class BoundedIndex
{
public:
BoundedIndex(int index) : index(index) {}
+ inline void setLimit(int limit) { this->limit = limit; }
+
inline int operator++(int) { return index++; }
inline int operator--(int) { return index--; }
inline void operator=(int i) { index = i; }
@@ -190,6 +192,7 @@
private:
int index = 0;
+ int limit = 0;
};
// The OFFSET macro is a generalization of the offsetof() macro defined in <cstddef>.
diff --git a/src/Main/Config.hpp b/src/Main/Config.hpp
index f0e9563..46ba961 100644
--- a/src/Main/Config.hpp
+++ b/src/Main/Config.hpp
@@ -97,9 +97,6 @@
MAX_TEXTURE_LOD = MIPMAP_LEVELS - 2, // Trilinear accesses lod+1
RENDERTARGETS = 8,
NUM_TEMPORARY_REGISTERS = 4096,
- MAX_SHADER_CALL_SITES = 2048,
- MAX_SHADER_NESTED_LOOPS = 4,
- MAX_SHADER_NESTED_IFS = 24 + 24,
MAX_SHADER_CALL_STACK_SIZE = 64,
MAX_SHADER_ENABLE_STACK_SIZE = 1 + 24,
};
diff --git a/src/Shader/PixelProgram.cpp b/src/Shader/PixelProgram.cpp
index ca44d19..843ba32 100644
--- a/src/Shader/PixelProgram.cpp
+++ b/src/Shader/PixelProgram.cpp
@@ -25,6 +25,39 @@
extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
extern bool fullPixelPositionRegister;
+ PixelProgram::PixelProgram(const PixelProcessor::State &state, const PixelShader *shader) :
+ PixelRoutine(state, shader),
+ r(shader->indirectAddressableTemporaries),
+ aL(shader->getLimits().loops),
+ increment(shader->getLimits().loops),
+ iteration(shader->getLimits().loops),
+ callStack(shader->getLimits().stack)
+ {
+ auto limits = shader->getLimits();
+ ifDepth.setLimit(limits.ifs);
+ loopRepDepth.setLimit(limits.loops);
+ currentLabel.setLimit(limits.functions);
+
+ ifFalseBlock.resize(limits.ifs);
+ loopRepTestBlock.resize(limits.loops);
+ loopRepEndBlock.resize(limits.loops);
+ labelBlock.resize(limits.functions);
+ isConditionalIf.resize(limits.ifs);
+
+ loopDepth = -1;
+ enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+
+ if(shader->containsBreakInstruction())
+ {
+ enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ }
+
+ if(shader->containsContinueInstruction())
+ {
+ enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ }
+ }
+
void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
{
if(shader->getShaderModel() >= 0x0300)
diff --git a/src/Shader/PixelProgram.hpp b/src/Shader/PixelProgram.hpp
index b8548d7..44b5d21 100644
--- a/src/Shader/PixelProgram.hpp
+++ b/src/Shader/PixelProgram.hpp
@@ -18,33 +18,14 @@
#include "PixelRoutine.hpp"
#include "SamplerCore.hpp"
+#include <unordered_map>
+
namespace sw
{
class PixelProgram : public PixelRoutine
{
public:
- PixelProgram(const PixelProcessor::State &state, const PixelShader *shader) :
- PixelRoutine(state, shader), r(shader->indirectAddressableTemporaries)
- {
- for(int i = 0; i < MAX_SHADER_CALL_SITES; ++i)
- {
- labelBlock[i] = 0;
- }
-
- loopDepth = -1;
- enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
-
- if(shader->containsBreakInstruction())
- {
- enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
- }
-
- if(shader->containsContinueInstruction())
- {
- enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
- }
- }
-
+ PixelProgram(const PixelProcessor::State &state, const PixelShader *shader);
virtual ~PixelProgram() {}
protected:
@@ -67,13 +48,13 @@
// DX9 specific variables
Vector4f p0;
- Array<Int, MAX_SHADER_NESTED_LOOPS> aL;
- Array<Int, MAX_SHADER_NESTED_LOOPS> increment;
- Array<Int, MAX_SHADER_NESTED_LOOPS> iteration;
+ Array<Int> aL; // loop counter register
+ Array<Int> increment; // increment value per loop
+ Array<Int> iteration; // iteration count
Int loopDepth; // FIXME: Add support for switch
Int stackIndex; // FIXME: Inc/decrement callStack
- Array<UInt, MAX_SHADER_CALL_STACK_SIZE> callStack;
+ Array<UInt> callStack;
// Per pixel based on conditions reached
Int enableIndex;
@@ -153,18 +134,18 @@
void RET();
void LEAVE();
- BoundedIndex<MAX_SHADER_NESTED_IFS> ifDepth = 0;
- BoundedIndex<MAX_SHADER_NESTED_LOOPS> loopRepDepth = 0;
- BoundedIndex<MAX_SHADER_CALL_SITES> currentLabel = -1;
+ BoundedIndex ifDepth = 0;
+ BoundedIndex loopRepDepth = 0;
+ BoundedIndex currentLabel = -1;
bool scalar = false;
- BasicBlock *ifFalseBlock[MAX_SHADER_NESTED_IFS];
- BasicBlock *loopRepTestBlock[MAX_SHADER_NESTED_LOOPS];
- BasicBlock *loopRepEndBlock[MAX_SHADER_NESTED_LOOPS];
- BasicBlock *labelBlock[MAX_SHADER_CALL_SITES];
- std::vector<BasicBlock*> callRetBlock[MAX_SHADER_CALL_SITES];
+ std::vector<BasicBlock*> ifFalseBlock;
+ std::vector<BasicBlock*> loopRepTestBlock;
+ std::vector<BasicBlock*> loopRepEndBlock;
+ std::vector<BasicBlock*> labelBlock;
+ std::unordered_map<unsigned int, std::vector<BasicBlock*>> callRetBlock; // label -> list of call sites
BasicBlock *returnBlock;
- bool isConditionalIf[MAX_SHADER_NESTED_IFS];
+ std::vector<bool> isConditionalIf;
std::vector<Int4> restoreContinue;
};
}
diff --git a/src/Shader/PixelShader.cpp b/src/Shader/PixelShader.cpp
index d24e7c2..1af50d8 100644
--- a/src/Shader/PixelShader.cpp
+++ b/src/Shader/PixelShader.cpp
@@ -161,6 +161,7 @@
analyzeSamplers();
analyzeCallSites();
analyzeIndirectAddressing();
+ analyzeLimits();
}
void PixelShader::analyzeZOverride()
diff --git a/src/Shader/Shader.cpp b/src/Shader/Shader.cpp
index db2b2bc..3528b1e 100644
--- a/src/Shader/Shader.cpp
+++ b/src/Shader/Shader.cpp
@@ -19,10 +19,14 @@
#include "Common/Math.hpp"
#include "Common/Debug.hpp"
+#include <algorithm>
#include <set>
#include <fstream>
+#include <functional>
#include <sstream>
#include <stdarg.h>
+#include <unordered_map>
+#include <unordered_set>
namespace sw
{
@@ -1877,15 +1881,13 @@
// This is used to know what basic block to return to.
void Shader::analyzeCallSites()
{
- int callSiteIndex[MAX_SHADER_CALL_SITES] = {0};
+ std::unordered_map<int, int> callSiteIndices;
for(auto &inst : instruction)
{
if(inst->opcode == OPCODE_CALL || inst->opcode == OPCODE_CALLNZ)
{
- int label = sw::min(inst->dst.label, static_cast<unsigned int>(MAX_SHADER_CALL_SITES));
-
- inst->dst.callSite = callSiteIndex[label]++;
+ inst->dst.callSite = callSiteIndices[inst->dst.label]++;
}
}
}
@@ -1924,4 +1926,177 @@
}
}
}
+
+ // analyzeLimits analyzes the whole shader program to determine the deepest
+ // nesting of control flow blocks and function calls. These calculations
+ // are stored into the limits member, and is used by the programs to
+ // allocate stack storage variables.
+ void Shader::analyzeLimits()
+ {
+ typedef unsigned int FunctionID;
+
+ // Identifier of the function with the main entry point.
+ constexpr FunctionID MAIN_ID = 0xF0000000;
+
+ // Invalid function identifier.
+ constexpr FunctionID INVALID_ID = ~0U;
+
+ // Limits on a single function.
+ struct FunctionLimits
+ {
+ uint32_t loops = 0; // maximum nested loop and reps.
+ uint32_t ifs = 0; // maximum nested if statements.
+ uint32_t stack = 0; // maximum call depth.
+ };
+
+ // Information about a single function in the shader.
+ struct FunctionInfo
+ {
+ FunctionLimits limits;
+ std::unordered_set<FunctionID> calls; // What this function calls.
+ bool reachable; // Is this function reachable?
+ };
+
+ // Begin by doing a pass over the instructions to identify all the
+ // labels that denote the start of a function.
+ std::unordered_map<FunctionID, FunctionInfo> functions;
+
+ for(auto &inst : instruction)
+ {
+ if (inst->isCall())
+ {
+ FunctionID id = inst->dst.label;
+ ASSERT(id != MAIN_ID); // If this fires, we're going to have to represent main with something else.
+ functions[id] = FunctionInfo();
+ }
+ }
+
+ // Add a definition for the main entry point.
+ // This starts at the beginning of the instructions and does not have
+ // its own label.
+ functions[MAIN_ID] = FunctionInfo();
+ functions[MAIN_ID].reachable = true;
+ FunctionID currentFunc = MAIN_ID;
+
+ // Limits for the currently analyzed function.
+ FunctionLimits currentLimits;
+
+ // Now loop over the instructions gathering the limits of each of the
+ // functions.
+ for(size_t i = 0; i < instruction.size(); i++)
+ {
+ const auto& inst = instruction[i];
+ switch (inst->opcode)
+ {
+ case OPCODE_LABEL:
+ {
+ FunctionID id = inst->dst.label;
+ auto it = functions.find(id);
+ if (it == functions.end())
+ {
+ // Label does not start a new function.
+ continue;
+ }
+ ASSERT(currentFunc == INVALID_ID); // Functions overlap
+ currentFunc = id;
+ break;
+ }
+ case OPCODE_CALL:
+ case OPCODE_CALLNZ:
+ {
+ ASSERT(currentFunc != INVALID_ID);
+ FunctionID id = inst->dst.label;
+ functions[currentFunc].calls.emplace(id);
+ functions[id].reachable = true;
+ break;
+ }
+ case OPCODE_LOOP:
+ case OPCODE_REP:
+ case OPCODE_WHILE:
+ case OPCODE_SWITCH: // Not a mistake - switches share loopReps.
+ {
+ ASSERT(currentFunc != INVALID_ID);
+ ++currentLimits.loops;
+ auto& func = functions[currentFunc];
+ func.limits.loops = std::max(func.limits.loops, currentLimits.loops);
+ break;
+ }
+ case OPCODE_ENDLOOP:
+ case OPCODE_ENDREP:
+ case OPCODE_ENDWHILE:
+ case OPCODE_ENDSWITCH:
+ {
+ ASSERT(currentLimits.loops > 0);
+ --currentLimits.loops;
+ break;
+ }
+ case OPCODE_IF:
+ case OPCODE_IFC:
+ {
+ ASSERT(currentFunc != INVALID_ID);
+ ++currentLimits.ifs;
+ auto& func = functions[currentFunc];
+ func.limits.ifs = std::max(func.limits.ifs, currentLimits.ifs);
+ break;
+ }
+ case OPCODE_ENDIF:
+ {
+ ASSERT(currentLimits.ifs > 0);
+ currentLimits.ifs--;
+ break;
+ }
+ case OPCODE_RET:
+ {
+ // Must be in a function to return.
+ ASSERT(currentFunc != INVALID_ID);
+
+ // All stacks should be popped before returning.
+ ASSERT(currentLimits.ifs == 0);
+ ASSERT(currentLimits.loops == 0);
+
+ currentFunc = INVALID_ID;
+ currentLimits = FunctionLimits();
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ // Assert that every function is reachable (these should have been
+ // stripped in earlier stages). Unreachable functions may be code
+ // generated, but their own limits are not considered below, potentially
+ // causing OOB indexing in later stages.
+ for (auto it : functions) { ASSERT(it.second.reachable); }
+
+ // We have now gathered all the information about each of the functions
+ // in the shader. Traverse these functions starting from the main
+ // function to calculate the maximum limits across the entire shader.
+
+ std::unordered_set<FunctionID> visited;
+ std::function<Limits(FunctionID)> traverse;
+ traverse = [&](FunctionID id) -> Limits
+ {
+ const auto& func = functions[id];
+ ASSERT(visited.count(id) == 0); // Sanity check: Recursive functions are not allowed.
+ visited.insert(id);
+ Limits limits;
+ limits.stack = 1;
+ for (auto callee : func.calls)
+ {
+ auto calleeLimits = traverse(callee);
+ limits.loops = std::max(limits.loops, calleeLimits.loops);
+ limits.ifs = std::max(limits.ifs, calleeLimits.ifs);
+ limits.stack = std::max(limits.stack, calleeLimits.stack + 1);
+ }
+ visited.erase(id);
+
+ limits.loops += func.limits.loops;
+ limits.ifs += func.limits.ifs;
+ return limits;
+ };
+
+ limits = traverse(MAIN_ID);
+ limits.functions = (uint32_t)functions.size();
+ }
}
diff --git a/src/Shader/Shader.hpp b/src/Shader/Shader.hpp
index ada88ba..64a86cd 100644
--- a/src/Shader/Shader.hpp
+++ b/src/Shader/Shader.hpp
@@ -35,6 +35,7 @@
enum Opcode
{
// Matches order in d3d9types.h
+ // See https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/content/d3d9types/ne-d3d9types-_d3dshader_instruction_opcode_type
OPCODE_NOP = 0,
OPCODE_MOV,
OPCODE_ADD,
@@ -554,6 +555,15 @@
};
};
+ // Limits holds the maximum nested counts for the shader.
+ struct Limits
+ {
+ uint32_t loops = 0; // maximum nested loop and reps.
+ uint32_t ifs = 0; // maximum nested if statements.
+ uint32_t stack = 0; // maximum call depth.
+ uint32_t functions = 0; // total number of functions.
+ };
+
Shader();
virtual ~Shader();
@@ -562,6 +572,7 @@
size_t getLength() const;
ShaderType getShaderType() const;
unsigned short getShaderModel() const;
+ inline const Limits& getLimits() const { return limits; }
void append(Instruction *instruction);
void declareSampler(int i);
@@ -629,8 +640,11 @@
void analyzeSamplers();
void analyzeCallSites();
void analyzeIndirectAddressing();
+ void analyzeLimits();
void markFunctionAnalysis(unsigned int functionLabel, Analysis flag);
+ Limits limits; // Calculated in analyzeLimits().
+
ShaderType shaderType;
union
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp
index 8bae9cf..0463ea6 100644
--- a/src/Shader/VertexProgram.cpp
+++ b/src/Shader/VertexProgram.cpp
@@ -24,12 +24,24 @@
namespace sw
{
VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
- : VertexRoutine(state, shader), shader(shader), r(shader->indirectAddressableTemporaries)
+ : VertexRoutine(state, shader),
+ shader(shader),
+ r(shader->indirectAddressableTemporaries),
+ aL(shader->getLimits().loops),
+ increment(shader->getLimits().loops),
+ iteration(shader->getLimits().loops),
+ callStack(shader->getLimits().stack)
{
- for(int i = 0; i < MAX_SHADER_CALL_SITES; i++)
- {
- labelBlock[i] = 0;
- }
+ auto limits = shader->getLimits();
+ ifDepth.setLimit(limits.ifs);
+ loopRepDepth.setLimit(limits.loops);
+ currentLabel.setLimit(limits.functions);
+
+ ifFalseBlock.resize(limits.ifs);
+ loopRepTestBlock.resize(limits.loops);
+ loopRepEndBlock.resize(limits.loops);
+ labelBlock.resize(limits.functions);
+ isConditionalIf.resize(limits.ifs);
loopDepth = -1;
enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
diff --git a/src/Shader/VertexProgram.hpp b/src/Shader/VertexProgram.hpp
index 379abce..3f794fa 100644
--- a/src/Shader/VertexProgram.hpp
+++ b/src/Shader/VertexProgram.hpp
@@ -22,6 +22,8 @@
#include "Renderer/Stream.hpp"
#include "Common/Types.hpp"
+#include <unordered_map>
+
namespace sw
{
struct Stream;
@@ -39,15 +41,15 @@
RegisterArray<NUM_TEMPORARY_REGISTERS> r; // Temporary registers
Vector4f a0;
- Array<Int, MAX_SHADER_NESTED_LOOPS> aL;
+ Array<Int> aL; // loop counter register
Vector4f p0;
- Array<Int, MAX_SHADER_NESTED_LOOPS> increment;
- Array<Int, MAX_SHADER_NESTED_LOOPS> iteration;
+ Array<Int> increment;
+ Array<Int> iteration;
Int loopDepth;
Int stackIndex; // FIXME: Inc/decrement callStack
- Array<UInt, MAX_SHADER_CALL_STACK_SIZE> callStack;
+ Array<UInt> callStack;
Int enableIndex;
Array<Int4, MAX_SHADER_ENABLE_STACK_SIZE> enableStack;
@@ -122,18 +124,18 @@
Vector4f sampleTexture(const Src &s, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
Vector4f sampleTexture(int sampler, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function);
- BoundedIndex<MAX_SHADER_NESTED_IFS> ifDepth = 0;
- BoundedIndex<MAX_SHADER_NESTED_LOOPS> loopRepDepth = 0;
- BoundedIndex<MAX_SHADER_CALL_SITES> currentLabel = -1;
+ BoundedIndex ifDepth = 0;
+ BoundedIndex loopRepDepth = 0;
+ BoundedIndex currentLabel = -1;
bool scalar = false;
- BasicBlock *ifFalseBlock[MAX_SHADER_NESTED_IFS];
- BasicBlock *loopRepTestBlock[MAX_SHADER_NESTED_LOOPS];
- BasicBlock *loopRepEndBlock[MAX_SHADER_NESTED_LOOPS];
- BasicBlock *labelBlock[MAX_SHADER_CALL_SITES];
- std::vector<BasicBlock*> callRetBlock[MAX_SHADER_CALL_SITES];
+ std::vector<BasicBlock*> ifFalseBlock;
+ std::vector<BasicBlock*> loopRepTestBlock;
+ std::vector<BasicBlock*> loopRepEndBlock;
+ std::vector<BasicBlock*> labelBlock;
+ std::unordered_map<unsigned int, std::vector<BasicBlock*>> callRetBlock; // label -> list of call sites
BasicBlock *returnBlock;
- bool isConditionalIf[MAX_SHADER_NESTED_IFS];
+ std::vector<bool> isConditionalIf;
std::vector<Int4> restoreContinue;
};
}
diff --git a/src/Shader/VertexShader.cpp b/src/Shader/VertexShader.cpp
index 8f1c4f8..7b2455c 100644
--- a/src/Shader/VertexShader.cpp
+++ b/src/Shader/VertexShader.cpp
@@ -208,6 +208,7 @@
analyzeSamplers();
analyzeCallSites();
analyzeIndirectAddressing();
+ analyzeLimits();
}
void VertexShader::analyzeInput()