Implement gather/scatter operations for shader register files. This allows to address the registers with a vector of indices. Also rename 'dynamic' register files to 'indirect addressable', to disambiguate from 'dynamic indexing' at the shader level. Indexing with a uniform does not require gather/scatter operations, but does require indirect addressing. Bug chromium:845103 Bug skia:7846 Change-Id: I3c42be33def66328688f2900c61c80246bf1e584 Reviewed-on: https://swiftshader-review.googlesource.com/18989 Tested-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Shader/PixelProgram.hpp b/src/Shader/PixelProgram.hpp index 1f60bde..ef6c2c0 100644 --- a/src/Shader/PixelProgram.hpp +++ b/src/Shader/PixelProgram.hpp
@@ -24,7 +24,7 @@ { public: PixelProgram(const PixelProcessor::State &state, const PixelShader *shader) : - PixelRoutine(state, shader), r(shader->dynamicallyIndexedTemporaries), + PixelRoutine(state, shader), r(shader->indirectAddressableTemporaries), loopDepth(-1), ifDepth(0), loopRepDepth(0), currentLabel(-1), whileTest(false) { for(int i = 0; i < 2048; ++i)
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp index 1c300b0..146e42d 100644 --- a/src/Shader/PixelRoutine.cpp +++ b/src/Shader/PixelRoutine.cpp
@@ -29,7 +29,8 @@ extern bool exactColorRounding; extern bool forceClearRegisters; - PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader), v(shader && shader->dynamicallyIndexedInput) + PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) + : QuadRasterizer(state, shader), v(shader && shader->indirectAddressableInput) { if(!shader || shader->getShaderModel() < 0x0200 || forceClearRegisters) {
diff --git a/src/Shader/PixelShader.cpp b/src/Shader/PixelShader.cpp index 9e281d9..d24e7c2 100644 --- a/src/Shader/PixelShader.cpp +++ b/src/Shader/PixelShader.cpp
@@ -160,7 +160,7 @@ analyzeDynamicBranching(); analyzeSamplers(); analyzeCallSites(); - analyzeDynamicIndexing(); + analyzeIndirectAddressing(); } void PixelShader::analyzeZOverride()
diff --git a/src/Shader/Shader.cpp b/src/Shader/Shader.cpp index 6874051..36192c9 100644 --- a/src/Shader/Shader.cpp +++ b/src/Shader/Shader.cpp
@@ -1890,40 +1890,34 @@ } } - void Shader::analyzeDynamicIndexing() + void Shader::analyzeIndirectAddressing() { - dynamicallyIndexedTemporaries = false; - dynamicallyIndexedInput = false; - dynamicallyIndexedOutput = false; + indirectAddressableTemporaries = false; + indirectAddressableInput = false; + indirectAddressableOutput = false; for(const auto &inst : instruction) { - if(inst->dst.rel.type == PARAMETER_ADDR || - inst->dst.rel.type == PARAMETER_LOOP || - inst->dst.rel.type == PARAMETER_TEMP || - inst->dst.rel.type == PARAMETER_CONST) + if(inst->dst.rel.type != PARAMETER_VOID) { switch(inst->dst.type) { - case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break; - case PARAMETER_INPUT: dynamicallyIndexedInput = true; break; - case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break; + case PARAMETER_TEMP: indirectAddressableTemporaries = true; break; + case PARAMETER_INPUT: indirectAddressableInput = true; break; + case PARAMETER_OUTPUT: indirectAddressableOutput = true; break; default: break; } } for(int j = 0; j < 3; j++) { - if(inst->src[j].rel.type == PARAMETER_ADDR || - inst->src[j].rel.type == PARAMETER_LOOP || - inst->src[j].rel.type == PARAMETER_TEMP || - inst->src[j].rel.type == PARAMETER_CONST) + if(inst->src[j].rel.type != PARAMETER_VOID) { switch(inst->src[j].type) { - case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break; - case PARAMETER_INPUT: dynamicallyIndexedInput = true; break; - case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break; + case PARAMETER_TEMP: indirectAddressableTemporaries = true; break; + case PARAMETER_INPUT: indirectAddressableInput = true; break; + case PARAMETER_OUTPUT: indirectAddressableOutput = true; break; default: break; } }
diff --git a/src/Shader/Shader.hpp b/src/Shader/Shader.hpp index 6755cd4..6d431f5 100644 --- a/src/Shader/Shader.hpp +++ b/src/Shader/Shader.hpp
@@ -612,9 +612,9 @@ unsigned int dirtyConstantsI; unsigned int dirtyConstantsB; - bool dynamicallyIndexedTemporaries; - bool dynamicallyIndexedInput; - bool dynamicallyIndexedOutput; + bool indirectAddressableTemporaries; + bool indirectAddressableInput; + bool indirectAddressableOutput; protected: void parse(const unsigned long *token); @@ -627,7 +627,7 @@ void analyzeDynamicBranching(); void analyzeSamplers(); void analyzeCallSites(); - void analyzeDynamicIndexing(); + void analyzeIndirectAddressing(); void markFunctionAnalysis(unsigned int functionLabel, Analysis flag); ShaderType shaderType;
diff --git a/src/Shader/ShaderCore.cpp b/src/Shader/ShaderCore.cpp index 338605c..4ea3260 100644 --- a/src/Shader/ShaderCore.cpp +++ b/src/Shader/ShaderCore.cpp
@@ -560,6 +560,100 @@ } } + const Vector4f RegisterFile::operator[](RValue<Int4> index) + { + ASSERT(indirectAddressable); + + Int index0 = Extract(index, 0); + Int index1 = Extract(index, 1); + Int index2 = Extract(index, 2); + Int index3 = Extract(index, 3); + + Vector4f r; + + r.x.x = Extract(x[0][index0], 0); + r.x.y = Extract(x[0][index1], 1); + r.x.z = Extract(x[0][index2], 2); + r.x.w = Extract(x[0][index3], 3); + + r.y.x = Extract(y[0][index0], 0); + r.y.y = Extract(y[0][index1], 1); + r.y.z = Extract(y[0][index2], 2); + r.y.w = Extract(y[0][index3], 3); + + r.z.x = Extract(z[0][index0], 0); + r.z.y = Extract(z[0][index1], 1); + r.z.z = Extract(z[0][index2], 2); + r.z.w = Extract(z[0][index3], 3); + + r.w.x = Extract(w[0][index0], 0); + r.w.y = Extract(w[0][index1], 1); + r.w.z = Extract(w[0][index2], 2); + r.w.w = Extract(w[0][index3], 3); + + return r; + } + + void RegisterFile::scatter_x(Int4 index, RValue<Float4> r) + { + ASSERT(indirectAddressable); + + Int index0 = Extract(index, 0); + Int index1 = Extract(index, 1); + Int index2 = Extract(index, 2); + Int index3 = Extract(index, 3); + + x[0][index0] = Insert(x[0][index0], Extract(r, 0), 0); + x[0][index1] = Insert(x[0][index1], Extract(r, 1), 1); + x[0][index2] = Insert(x[0][index2], Extract(r, 2), 2); + x[0][index3] = Insert(x[0][index3], Extract(r, 3), 3); + } + + void RegisterFile::scatter_y(Int4 index, RValue<Float4> r) + { + ASSERT(indirectAddressable); + + Int index0 = Extract(index, 0); + Int index1 = Extract(index, 1); + Int index2 = Extract(index, 2); + Int index3 = Extract(index, 3); + + y[0][index0] = Insert(y[0][index0], Extract(r, 0), 0); + y[0][index1] = Insert(y[0][index1], Extract(r, 1), 1); + y[0][index2] = Insert(y[0][index2], Extract(r, 2), 2); + y[0][index3] = Insert(y[0][index3], Extract(r, 3), 3); + } + + void RegisterFile::scatter_z(Int4 index, RValue<Float4> r) + { + ASSERT(indirectAddressable); + + Int index0 = Extract(index, 0); + Int index1 = Extract(index, 1); + Int index2 = Extract(index, 2); + Int index3 = Extract(index, 3); + + z[0][index0] = Insert(z[0][index0], Extract(r, 0), 0); + z[0][index1] = Insert(z[0][index1], Extract(r, 1), 1); + z[0][index2] = Insert(z[0][index2], Extract(r, 2), 2); + z[0][index3] = Insert(z[0][index3], Extract(r, 3), 3); + } + + void RegisterFile::scatter_w(Int4 index, RValue<Float4> r) + { + ASSERT(indirectAddressable); + + Int index0 = Extract(index, 0); + Int index1 = Extract(index, 1); + Int index2 = Extract(index, 2); + Int index3 = Extract(index, 3); + + w[0][index0] = Insert(w[0][index0], Extract(r, 0), 0); + w[0][index1] = Insert(w[0][index1], Extract(r, 1), 1); + w[0][index2] = Insert(w[0][index2], Extract(r, 2), 2); + w[0][index3] = Insert(w[0][index3], Extract(r, 3), 3); + } + void ShaderCore::mov(Vector4f &dst, const Vector4f &src, bool integerDestination) { if(integerDestination)
diff --git a/src/Shader/ShaderCore.hpp b/src/Shader/ShaderCore.hpp index 249e058..4dc109f 100644 --- a/src/Shader/ShaderCore.hpp +++ b/src/Shader/ShaderCore.hpp
@@ -147,31 +147,30 @@ Reference<Float4> w; }; - template<int S, bool D = false> - class RegisterArray + class RegisterFile { public: - RegisterArray(bool dynamic = D) : dynamic(dynamic) + RegisterFile(int size, bool indirectAddressable) : size(size), indirectAddressable(indirectAddressable) { - if(dynamic) + if(indirectAddressable) { - x = new Array<Float4>(S); - y = new Array<Float4>(S); - z = new Array<Float4>(S); - w = new Array<Float4>(S); + x = new Array<Float4>(size); + y = new Array<Float4>(size); + z = new Array<Float4>(size); + w = new Array<Float4>(size); } else { - x = new Array<Float4>[S]; - y = new Array<Float4>[S]; - z = new Array<Float4>[S]; - w = new Array<Float4>[S]; + x = new Array<Float4>[size]; + y = new Array<Float4>[size]; + z = new Array<Float4>[size]; + w = new Array<Float4>[size]; } } - ~RegisterArray() + ~RegisterFile() { - if(dynamic) + if(indirectAddressable) { delete x; delete y; @@ -189,7 +188,7 @@ Register operator[](int i) { - if(dynamic) + if(indirectAddressable) { return Register(x[0][i], y[0][i], z[0][i], w[0][i]); } @@ -201,19 +200,36 @@ Register operator[](RValue<Int> i) { - ASSERT(dynamic); + ASSERT(indirectAddressable); return Register(x[0][i], y[0][i], z[0][i], w[0][i]); } - private: - const bool dynamic; + const Vector4f operator[](RValue<Int4> i); // Gather operation (read only). + + void scatter_x(Int4 i, RValue<Float4> r); + void scatter_y(Int4 i, RValue<Float4> r); + void scatter_z(Int4 i, RValue<Float4> r); + void scatter_w(Int4 i, RValue<Float4> r); + + protected: + const int size; + const bool indirectAddressable; Array<Float4> *x; Array<Float4> *y; Array<Float4> *z; Array<Float4> *w; }; + template<int S, bool I = false> + class RegisterArray : public RegisterFile + { + public: + RegisterArray(bool indirectAddressable = I) : RegisterFile(S, indirectAddressable) + { + } + }; + class ShaderCore { typedef Shader::Control Control;
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp index 4f8ba1a..8dbd600 100644 --- a/src/Shader/VertexProgram.cpp +++ b/src/Shader/VertexProgram.cpp
@@ -24,7 +24,7 @@ namespace sw { VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader) - : VertexRoutine(state, shader), shader(shader), r(shader->dynamicallyIndexedTemporaries) + : VertexRoutine(state, shader), shader(shader), r(shader->indirectAddressableTemporaries) { ifDepth = 0; loopRepDepth = 0;
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp index 2d7c2c6..9b8d336 100644 --- a/src/Shader/VertexRoutine.cpp +++ b/src/Shader/VertexRoutine.cpp
@@ -27,8 +27,8 @@ extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) - : v(shader && shader->dynamicallyIndexedInput), - o(shader && shader->dynamicallyIndexedOutput), + : v(shader && shader->indirectAddressableInput), + o(shader && shader->indirectAddressableOutput), state(state) { }
diff --git a/src/Shader/VertexShader.cpp b/src/Shader/VertexShader.cpp index 33c2241..8f1c4f8 100644 --- a/src/Shader/VertexShader.cpp +++ b/src/Shader/VertexShader.cpp
@@ -176,7 +176,7 @@ setOutput(posReg, 4, sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0)); positionRegister = posReg; } - + void VertexShader::setPointSizeRegister(int ptSizeReg) { setOutput(ptSizeReg, 4, sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0)); @@ -207,7 +207,7 @@ analyzeDynamicBranching(); analyzeSamplers(); analyzeCallSites(); - analyzeDynamicIndexing(); + analyzeIndirectAddressing(); } void VertexShader::analyzeInput()