Correct reciprocal approximation for power-of-two values. Intel's reciprocal approximation instruction is not exact for power-of-two values. It provides 12 bits of mantissa precision and keeps a balance between positive and negative errors, but the reciprocal of 2^x is not 2^-x. This affects conformance tests which expect varyings not to be affected by the perspective division. Correct for this by multiplying by the inverse. Bug 27165393 Change-Id: Ie52ec511a14a4f447adc47ce9c875bbad03cd274 Reviewed-on: https://swiftshader-review.googlesource.com/4903 Tested-by: Nicolas Capens <capn@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com> Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp index c43c87c..2b5f087 100644 --- a/src/Shader/PixelRoutine.cpp +++ b/src/Shader/PixelRoutine.cpp
@@ -140,7 +140,7 @@ if(interpolateW()) { w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false); - rhw = reciprocal(w); + rhw = reciprocal(w, false, false, true); if(state.centroid) { @@ -518,7 +518,7 @@ zMask = SignMask(zTest) & cMask; break; } - + if(state.stencilActive) { zMask &= sMask; @@ -687,12 +687,12 @@ Int pitch; if(!state.quadLayoutDepthBuffer) - { + { buffer = zBuffer + 4 * x; pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); } else - { + { buffer = zBuffer + 8 * x; } @@ -761,7 +761,7 @@ } Byte8 bufferValue = As<Byte8>(Long1(*Pointer<UInt>(buffer))); - + Byte8 newValue; stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask); @@ -945,7 +945,7 @@ ASSERT(false); } } - + void PixelRoutine::blendFactorAlpha(const Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorAlphaActive) { switch(blendFactorAlphaActive) @@ -1170,7 +1170,7 @@ current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y)); current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z)); } - + if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) { pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x)); @@ -1229,7 +1229,7 @@ { current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w)); } - + if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) { pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w)); @@ -1828,7 +1828,7 @@ } } - void PixelRoutine::blendFactor(const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive) + void PixelRoutine::blendFactor(const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive) { switch(blendFactorActive) { @@ -1899,7 +1899,7 @@ } } - void PixelRoutine::blendFactorAlpha(const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive) + void PixelRoutine::blendFactorAlpha(const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive) { switch(blendFactorAlphaActive) { @@ -2041,7 +2041,7 @@ oC.y *= sourceFactor.y; oC.z *= sourceFactor.z; } - + if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) { pixel.x *= destFactor.x; @@ -2100,7 +2100,7 @@ { oC.w *= sourceFactor.w; } - + if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) { pixel.w *= destFactor.w; @@ -2118,10 +2118,10 @@ pixel.w -= oC.w; oC.w = pixel.w; break; - case BLENDOP_MIN: + case BLENDOP_MIN: oC.w = Min(oC.w, pixel.w); break; - case BLENDOP_MAX: + case BLENDOP_MAX: oC.w = Max(oC.w, pixel.w); break; case BLENDOP_SOURCE: @@ -2272,7 +2272,7 @@ masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); } - + oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16)); value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16)); oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); @@ -2283,7 +2283,7 @@ value = *Pointer<Float4>(buffer + 16, 16); if(rgbaWriteMask != 0x0000000F) - { + { Float4 masked = value; oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));