Support 3-component integer formats natively.
Change-Id: Id48bc7a232c50b753da64cb914e75b5d590ae47d
Reviewed-on: https://swiftshader-review.googlesource.com/14369
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp
index 6bcc657..3147177 100644
--- a/src/Renderer/Surface.cpp
+++ b/src/Renderer/Surface.cpp
@@ -3571,9 +3571,11 @@
case FORMAT_R32UI:
return FORMAT_R32UI;
case FORMAT_X16B16G16R16I:
+ return FORMAT_X16B16G16R16I;
case FORMAT_A16B16G16R16I:
return FORMAT_A16B16G16R16I;
case FORMAT_X16B16G16R16UI:
+ return FORMAT_X16B16G16R16UI;
case FORMAT_A16B16G16R16UI:
return FORMAT_A16B16G16R16UI;
case FORMAT_A2R10G10B10:
@@ -3581,9 +3583,11 @@
case FORMAT_A16B16G16R16:
return FORMAT_A16B16G16R16;
case FORMAT_X32B32G32R32I:
+ return FORMAT_X32B32G32R32I;
case FORMAT_A32B32G32R32I:
return FORMAT_A32B32G32R32I;
case FORMAT_X32B32G32R32UI:
+ return FORMAT_X32B32G32R32UI;
case FORMAT_A32B32G32R32UI:
return FORMAT_A32B32G32R32UI;
case FORMAT_G8R8I:
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index c51dbf9..0c86815 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -2001,6 +2001,13 @@
c.w = Pointer<Short4>(buffer[f3])[index[3]];
transpose4x4(c.x, c.y, c.z, c.w);
break;
+ case 3:
+ c.x = Pointer<Short4>(buffer[f0])[index[0]];
+ c.y = Pointer<Short4>(buffer[f1])[index[1]];
+ c.z = Pointer<Short4>(buffer[f2])[index[2]];
+ c.w = Pointer<Short4>(buffer[f3])[index[3]];
+ transpose4x3(c.x, c.y, c.z, c.w);
+ break;
case 2:
c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]);
c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1])));
@@ -2159,13 +2166,11 @@
transpose4x4(c.x, c.y, c.z, c.w);
break;
case 3:
- ASSERT(state.textureFormat == FORMAT_X32B32G32R32F);
c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
transpose4x3(c.x, c.y, c.z, c.w);
- c.w = Float4(1.0f);
break;
case 2:
// FIXME: Optimal shuffling?
diff --git a/src/Shader/ShaderCore.cpp b/src/Shader/ShaderCore.cpp
index ec159fd..5b2c1ae 100644
--- a/src/Shader/ShaderCore.cpp
+++ b/src/Shader/ShaderCore.cpp
@@ -490,6 +490,18 @@
row3 = UnpackHigh(tmp0, tmp1);
}
+ void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3)
+ {
+ Int2 tmp0 = UnpackHigh(row0, row1);
+ Int2 tmp1 = UnpackHigh(row2, row3);
+ Int2 tmp2 = UnpackLow(row0, row1);
+ Int2 tmp3 = UnpackLow(row2, row3);
+
+ row0 = UnpackLow(tmp2, tmp3);
+ row1 = UnpackHigh(tmp2, tmp3);
+ row2 = UnpackLow(tmp0, tmp1);
+ }
+
void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3)
{
Float4 tmp0 = UnpackLow(row0, row1);
diff --git a/src/Shader/ShaderCore.hpp b/src/Shader/ShaderCore.hpp
index e998bcc..b0ad3a0 100644
--- a/src/Shader/ShaderCore.hpp
+++ b/src/Shader/ShaderCore.hpp
@@ -82,6 +82,7 @@
Float4 dot4(const Vector4f &v0, const Vector4f &v1);
void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
+ void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3);
void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);
void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);