Add utility function to transpose BGRA data The current transpose4x4() function assumes the input and the output channels are in the same order, whereas, when using BGRA, we want to reorder the channels so that the output is: B0G0R0A0, B1G1R1A1, B2G2R2A2, B3G3R3A3 so the new transpose4x4zyxw() utility function was added for this purpose. Bug: b/204322086 Change-Id: Ic73118c8fb3ba307620041063863146e5053e263 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/69769 Tested-by: Alexis Hétu <sugoi@google.com> Commit-Queue: Alexis Hétu <sugoi@google.com> Reviewed-by: Jonah Ryan-Davis <jonahr@google.com> Kokoro-Result: kokoro <noreply+kokoro@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp index 28c1e4b..ac9f91f 100644 --- a/src/Pipeline/ShaderCore.cpp +++ b/src/Pipeline/ShaderCore.cpp
@@ -777,6 +777,19 @@ row3 = Float4(tmp2.zw, tmp3.zw); } +void transpose4x4zyxw(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) +{ + Float4 tmp0 = UnpackLow(row0, row1); + Float4 tmp1 = UnpackLow(row2, row3); + Float4 tmp2 = UnpackHigh(row0, row1); + Float4 tmp3 = UnpackHigh(row2, row3); + + row2 = Float4(tmp0.xy, tmp1.xy); + row1 = Float4(tmp0.zw, tmp1.zw); + row0 = Float4(tmp2.xy, tmp3.xy); + row3 = Float4(tmp2.zw, tmp3.zw); +} + void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) { Float4 tmp0 = UnpackLow(row0, row1);
diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp index c49b01f..894291a 100644 --- a/src/Pipeline/ShaderCore.hpp +++ b/src/Pipeline/ShaderCore.hpp
@@ -173,6 +173,7 @@ void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); +void transpose4x4zyxw(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3);