Fix unaligned access on depth values
This was tested by using __writeeflags(__readeflags() | 0x40000) to
enable alignment checks on x86.
Bug: b/169957911
Change-Id: Ie97b2fda281548fac94b13abe93213a9a1495b0c
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/48929
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Sean Risser <srisser@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 9fbfe80..d9e50c3 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -410,9 +410,7 @@
if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
{
- // FIXME: Properly optimizes?
- zValue.xy = *Pointer<Float4>(buffer);
- zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
+ zValue = Float4(*Pointer<Float2>(buffer), *Pointer<Float2>(buffer + pitch));
}
Int4 zTest;
@@ -489,9 +487,8 @@
if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
{
- // FIXME: Properly optimizes?
- zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0);
- zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
+ zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer), 0));
+ zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer + pitch), 1));
}
Int4 zTest;
@@ -559,9 +556,13 @@
}
if(state.depthFormat == VK_FORMAT_D16_UNORM)
+ {
return depthTest16(zBuffer, q, x, z, sMask, zMask, cMask);
+ }
else
+ {
return depthTest32F(zBuffer, q, x, z, sMask, zMask, cMask);
+ }
}
void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha)
@@ -603,16 +604,13 @@
if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
{
- // FIXME: Properly optimizes?
- zValue.xy = *Pointer<Float4>(buffer);
- zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
+ zValue = Float4(*Pointer<Float2>(buffer), *Pointer<Float2>(buffer + pitch));
}
Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + zMask * 16, 16));
zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + zMask * 16, 16));
Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
- // FIXME: Properly optimizes?
*Pointer<Float2>(buffer) = Float2(Z.xy);
*Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
}
@@ -638,20 +636,16 @@
if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
{
- // FIXME: Properly optimizes?
- zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0);
- zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
+ zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer), 0));
+ zValue = As<Short4>(Insert(As<Int2>(zValue), *Pointer<Int>(buffer + pitch), 1));
}
Z = Z & *Pointer<Short4>(constants + OFFSET(Constants, maskW4Q) + zMask * 8, 8);
zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants, invMaskW4Q) + zMask * 8, 8);
Z = Z | zValue;
- // FIXME: Properly optimizes?
- *Pointer<Short>(buffer) = Extract(Z, 0);
- *Pointer<Short>(buffer + 2) = Extract(Z, 1);
- *Pointer<Short>(buffer + pitch) = Extract(Z, 2);
- *Pointer<Short>(buffer + pitch + 2) = Extract(Z, 3);
+ *Pointer<Int>(buffer) = Extract(As<Int2>(Z), 0);
+ *Pointer<Int>(buffer + pitch) = Extract(As<Int2>(Z), 1);
}
void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask)
@@ -662,9 +656,13 @@
}
if(state.depthFormat == VK_FORMAT_D16_UNORM)
+ {
writeDepth16(zBuffer, q, x, z, zMask);
+ }
else
+ {
writeDepth32F(zBuffer, q, x, z, zMask);
+ }
}
void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, const Int &x, const Int &sMask, const Int &zMask, const Int &cMask)
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index 97053dc..d342595 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -4083,6 +4083,15 @@
*this = RValue<Float>(rhs.loadValue());
}
+Float4::Float4(RValue<Float2> lo, RValue<Float2> hi)
+ : XYZW(this)
+{
+ int shuffle[4] = { 0, 1, 4, 5 }; // Real type is v4i32
+ Value *packed = Nucleus::createShuffleVector(lo.value(), hi.value(), shuffle);
+
+ storeValue(packed);
+}
+
RValue<Float4> Float4::operator=(float x)
{
return *this = Float4(x, x, x, x);
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 1e08698..e301976 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2254,6 +2254,7 @@
Float4(const Swizzle2<Float4, X> &x, const SwizzleMask2<Float4, Y> &y);
template<int X, int Y>
Float4(const SwizzleMask2<Float4, X> &x, const SwizzleMask2<Float4, Y> &y);
+ Float4(RValue<Float2> lo, RValue<Float2> hi);
RValue<Float4> operator=(float replicate);
RValue<Float4> operator=(RValue<Float4> rhs);