Implement gather/scatter operations for shader register files.
This allows to address the registers with a vector of indices.
Also rename 'dynamic' register files to 'indirect addressable', to
disambiguate from 'dynamic indexing' at the shader level. Indexing with
a uniform does not require gather/scatter operations, but does require
indirect addressing.
Bug chromium:845103
Bug skia:7846
Change-Id: I3c42be33def66328688f2900c61c80246bf1e584
Reviewed-on: https://swiftshader-review.googlesource.com/18989
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Shader/PixelProgram.hpp b/src/Shader/PixelProgram.hpp
index 1f60bde..ef6c2c0 100644
--- a/src/Shader/PixelProgram.hpp
+++ b/src/Shader/PixelProgram.hpp
@@ -24,7 +24,7 @@
{
public:
PixelProgram(const PixelProcessor::State &state, const PixelShader *shader) :
- PixelRoutine(state, shader), r(shader->dynamicallyIndexedTemporaries),
+ PixelRoutine(state, shader), r(shader->indirectAddressableTemporaries),
loopDepth(-1), ifDepth(0), loopRepDepth(0), currentLabel(-1), whileTest(false)
{
for(int i = 0; i < 2048; ++i)
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp
index 1c300b0..146e42d 100644
--- a/src/Shader/PixelRoutine.cpp
+++ b/src/Shader/PixelRoutine.cpp
@@ -29,7 +29,8 @@
extern bool exactColorRounding;
extern bool forceClearRegisters;
- PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader), v(shader && shader->dynamicallyIndexedInput)
+ PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader)
+ : QuadRasterizer(state, shader), v(shader && shader->indirectAddressableInput)
{
if(!shader || shader->getShaderModel() < 0x0200 || forceClearRegisters)
{
diff --git a/src/Shader/PixelShader.cpp b/src/Shader/PixelShader.cpp
index 9e281d9..d24e7c2 100644
--- a/src/Shader/PixelShader.cpp
+++ b/src/Shader/PixelShader.cpp
@@ -160,7 +160,7 @@
analyzeDynamicBranching();
analyzeSamplers();
analyzeCallSites();
- analyzeDynamicIndexing();
+ analyzeIndirectAddressing();
}
void PixelShader::analyzeZOverride()
diff --git a/src/Shader/Shader.cpp b/src/Shader/Shader.cpp
index 6874051..36192c9 100644
--- a/src/Shader/Shader.cpp
+++ b/src/Shader/Shader.cpp
@@ -1890,40 +1890,34 @@
}
}
- void Shader::analyzeDynamicIndexing()
+ void Shader::analyzeIndirectAddressing()
{
- dynamicallyIndexedTemporaries = false;
- dynamicallyIndexedInput = false;
- dynamicallyIndexedOutput = false;
+ indirectAddressableTemporaries = false;
+ indirectAddressableInput = false;
+ indirectAddressableOutput = false;
for(const auto &inst : instruction)
{
- if(inst->dst.rel.type == PARAMETER_ADDR ||
- inst->dst.rel.type == PARAMETER_LOOP ||
- inst->dst.rel.type == PARAMETER_TEMP ||
- inst->dst.rel.type == PARAMETER_CONST)
+ if(inst->dst.rel.type != PARAMETER_VOID)
{
switch(inst->dst.type)
{
- case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break;
- case PARAMETER_INPUT: dynamicallyIndexedInput = true; break;
- case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break;
+ case PARAMETER_TEMP: indirectAddressableTemporaries = true; break;
+ case PARAMETER_INPUT: indirectAddressableInput = true; break;
+ case PARAMETER_OUTPUT: indirectAddressableOutput = true; break;
default: break;
}
}
for(int j = 0; j < 3; j++)
{
- if(inst->src[j].rel.type == PARAMETER_ADDR ||
- inst->src[j].rel.type == PARAMETER_LOOP ||
- inst->src[j].rel.type == PARAMETER_TEMP ||
- inst->src[j].rel.type == PARAMETER_CONST)
+ if(inst->src[j].rel.type != PARAMETER_VOID)
{
switch(inst->src[j].type)
{
- case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break;
- case PARAMETER_INPUT: dynamicallyIndexedInput = true; break;
- case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break;
+ case PARAMETER_TEMP: indirectAddressableTemporaries = true; break;
+ case PARAMETER_INPUT: indirectAddressableInput = true; break;
+ case PARAMETER_OUTPUT: indirectAddressableOutput = true; break;
default: break;
}
}
diff --git a/src/Shader/Shader.hpp b/src/Shader/Shader.hpp
index 6755cd4..6d431f5 100644
--- a/src/Shader/Shader.hpp
+++ b/src/Shader/Shader.hpp
@@ -612,9 +612,9 @@
unsigned int dirtyConstantsI;
unsigned int dirtyConstantsB;
- bool dynamicallyIndexedTemporaries;
- bool dynamicallyIndexedInput;
- bool dynamicallyIndexedOutput;
+ bool indirectAddressableTemporaries;
+ bool indirectAddressableInput;
+ bool indirectAddressableOutput;
protected:
void parse(const unsigned long *token);
@@ -627,7 +627,7 @@
void analyzeDynamicBranching();
void analyzeSamplers();
void analyzeCallSites();
- void analyzeDynamicIndexing();
+ void analyzeIndirectAddressing();
void markFunctionAnalysis(unsigned int functionLabel, Analysis flag);
ShaderType shaderType;
diff --git a/src/Shader/ShaderCore.cpp b/src/Shader/ShaderCore.cpp
index 338605c..4ea3260 100644
--- a/src/Shader/ShaderCore.cpp
+++ b/src/Shader/ShaderCore.cpp
@@ -560,6 +560,100 @@
}
}
+ const Vector4f RegisterFile::operator[](RValue<Int4> index)
+ {
+ ASSERT(indirectAddressable);
+
+ Int index0 = Extract(index, 0);
+ Int index1 = Extract(index, 1);
+ Int index2 = Extract(index, 2);
+ Int index3 = Extract(index, 3);
+
+ Vector4f r;
+
+ r.x.x = Extract(x[0][index0], 0);
+ r.x.y = Extract(x[0][index1], 1);
+ r.x.z = Extract(x[0][index2], 2);
+ r.x.w = Extract(x[0][index3], 3);
+
+ r.y.x = Extract(y[0][index0], 0);
+ r.y.y = Extract(y[0][index1], 1);
+ r.y.z = Extract(y[0][index2], 2);
+ r.y.w = Extract(y[0][index3], 3);
+
+ r.z.x = Extract(z[0][index0], 0);
+ r.z.y = Extract(z[0][index1], 1);
+ r.z.z = Extract(z[0][index2], 2);
+ r.z.w = Extract(z[0][index3], 3);
+
+ r.w.x = Extract(w[0][index0], 0);
+ r.w.y = Extract(w[0][index1], 1);
+ r.w.z = Extract(w[0][index2], 2);
+ r.w.w = Extract(w[0][index3], 3);
+
+ return r;
+ }
+
+ void RegisterFile::scatter_x(Int4 index, RValue<Float4> r)
+ {
+ ASSERT(indirectAddressable);
+
+ Int index0 = Extract(index, 0);
+ Int index1 = Extract(index, 1);
+ Int index2 = Extract(index, 2);
+ Int index3 = Extract(index, 3);
+
+ x[0][index0] = Insert(x[0][index0], Extract(r, 0), 0);
+ x[0][index1] = Insert(x[0][index1], Extract(r, 1), 1);
+ x[0][index2] = Insert(x[0][index2], Extract(r, 2), 2);
+ x[0][index3] = Insert(x[0][index3], Extract(r, 3), 3);
+ }
+
+ void RegisterFile::scatter_y(Int4 index, RValue<Float4> r)
+ {
+ ASSERT(indirectAddressable);
+
+ Int index0 = Extract(index, 0);
+ Int index1 = Extract(index, 1);
+ Int index2 = Extract(index, 2);
+ Int index3 = Extract(index, 3);
+
+ y[0][index0] = Insert(y[0][index0], Extract(r, 0), 0);
+ y[0][index1] = Insert(y[0][index1], Extract(r, 1), 1);
+ y[0][index2] = Insert(y[0][index2], Extract(r, 2), 2);
+ y[0][index3] = Insert(y[0][index3], Extract(r, 3), 3);
+ }
+
+ void RegisterFile::scatter_z(Int4 index, RValue<Float4> r)
+ {
+ ASSERT(indirectAddressable);
+
+ Int index0 = Extract(index, 0);
+ Int index1 = Extract(index, 1);
+ Int index2 = Extract(index, 2);
+ Int index3 = Extract(index, 3);
+
+ z[0][index0] = Insert(z[0][index0], Extract(r, 0), 0);
+ z[0][index1] = Insert(z[0][index1], Extract(r, 1), 1);
+ z[0][index2] = Insert(z[0][index2], Extract(r, 2), 2);
+ z[0][index3] = Insert(z[0][index3], Extract(r, 3), 3);
+ }
+
+ void RegisterFile::scatter_w(Int4 index, RValue<Float4> r)
+ {
+ ASSERT(indirectAddressable);
+
+ Int index0 = Extract(index, 0);
+ Int index1 = Extract(index, 1);
+ Int index2 = Extract(index, 2);
+ Int index3 = Extract(index, 3);
+
+ w[0][index0] = Insert(w[0][index0], Extract(r, 0), 0);
+ w[0][index1] = Insert(w[0][index1], Extract(r, 1), 1);
+ w[0][index2] = Insert(w[0][index2], Extract(r, 2), 2);
+ w[0][index3] = Insert(w[0][index3], Extract(r, 3), 3);
+ }
+
void ShaderCore::mov(Vector4f &dst, const Vector4f &src, bool integerDestination)
{
if(integerDestination)
diff --git a/src/Shader/ShaderCore.hpp b/src/Shader/ShaderCore.hpp
index 249e058..4dc109f 100644
--- a/src/Shader/ShaderCore.hpp
+++ b/src/Shader/ShaderCore.hpp
@@ -147,31 +147,30 @@
Reference<Float4> w;
};
- template<int S, bool D = false>
- class RegisterArray
+ class RegisterFile
{
public:
- RegisterArray(bool dynamic = D) : dynamic(dynamic)
+ RegisterFile(int size, bool indirectAddressable) : size(size), indirectAddressable(indirectAddressable)
{
- if(dynamic)
+ if(indirectAddressable)
{
- x = new Array<Float4>(S);
- y = new Array<Float4>(S);
- z = new Array<Float4>(S);
- w = new Array<Float4>(S);
+ x = new Array<Float4>(size);
+ y = new Array<Float4>(size);
+ z = new Array<Float4>(size);
+ w = new Array<Float4>(size);
}
else
{
- x = new Array<Float4>[S];
- y = new Array<Float4>[S];
- z = new Array<Float4>[S];
- w = new Array<Float4>[S];
+ x = new Array<Float4>[size];
+ y = new Array<Float4>[size];
+ z = new Array<Float4>[size];
+ w = new Array<Float4>[size];
}
}
- ~RegisterArray()
+ ~RegisterFile()
{
- if(dynamic)
+ if(indirectAddressable)
{
delete x;
delete y;
@@ -189,7 +188,7 @@
Register operator[](int i)
{
- if(dynamic)
+ if(indirectAddressable)
{
return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
}
@@ -201,19 +200,36 @@
Register operator[](RValue<Int> i)
{
- ASSERT(dynamic);
+ ASSERT(indirectAddressable);
return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
}
- private:
- const bool dynamic;
+ const Vector4f operator[](RValue<Int4> i); // Gather operation (read only).
+
+ void scatter_x(Int4 i, RValue<Float4> r);
+ void scatter_y(Int4 i, RValue<Float4> r);
+ void scatter_z(Int4 i, RValue<Float4> r);
+ void scatter_w(Int4 i, RValue<Float4> r);
+
+ protected:
+ const int size;
+ const bool indirectAddressable;
Array<Float4> *x;
Array<Float4> *y;
Array<Float4> *z;
Array<Float4> *w;
};
+ template<int S, bool I = false>
+ class RegisterArray : public RegisterFile
+ {
+ public:
+ RegisterArray(bool indirectAddressable = I) : RegisterFile(S, indirectAddressable)
+ {
+ }
+ };
+
class ShaderCore
{
typedef Shader::Control Control;
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp
index 4f8ba1a..8dbd600 100644
--- a/src/Shader/VertexProgram.cpp
+++ b/src/Shader/VertexProgram.cpp
@@ -24,7 +24,7 @@
namespace sw
{
VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
- : VertexRoutine(state, shader), shader(shader), r(shader->dynamicallyIndexedTemporaries)
+ : VertexRoutine(state, shader), shader(shader), r(shader->indirectAddressableTemporaries)
{
ifDepth = 0;
loopRepDepth = 0;
diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp
index 2d7c2c6..9b8d336 100644
--- a/src/Shader/VertexRoutine.cpp
+++ b/src/Shader/VertexRoutine.cpp
@@ -27,8 +27,8 @@
extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader)
- : v(shader && shader->dynamicallyIndexedInput),
- o(shader && shader->dynamicallyIndexedOutput),
+ : v(shader && shader->indirectAddressableInput),
+ o(shader && shader->indirectAddressableOutput),
state(state)
{
}
diff --git a/src/Shader/VertexShader.cpp b/src/Shader/VertexShader.cpp
index 33c2241..8f1c4f8 100644
--- a/src/Shader/VertexShader.cpp
+++ b/src/Shader/VertexShader.cpp
@@ -176,7 +176,7 @@
setOutput(posReg, 4, sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0));
positionRegister = posReg;
}
-
+
void VertexShader::setPointSizeRegister(int ptSizeReg)
{
setOutput(ptSizeReg, 4, sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0));
@@ -207,7 +207,7 @@
analyzeDynamicBranching();
analyzeSamplers();
analyzeCallSites();
- analyzeDynamicIndexing();
+ analyzeIndirectAddressing();
}
void VertexShader::analyzeInput()