Use SIMD types throughout graphics and compute pipelines
sw::SIMD::Float currently aliases rr::Float4, so we can replace all of
the latter with the former where we intend to support scaling to wider
SIMD vectors.
Bug: b/237494823
Change-Id: I04593aee136456d509b41ec9a977ee19fea1c268
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/66808
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Device/QuadRasterizer.cpp b/src/Device/QuadRasterizer.cpp
index e2848c3..b69945e 100644
--- a/src/Device/QuadRasterizer.cpp
+++ b/src/Device/QuadRasterizer.cpp
@@ -122,20 +122,20 @@
x1 = Max(x1, Max(x1a, x1b));
}
- Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16);
+ SIMD::Float yyyy = SIMD::Float(Float(y)) + SIMD::Float(*Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16));
if(interpolateZ())
{
for(unsigned int q = 0; q < state.multiSampleCount; q++)
{
- Float4 y = yyyy;
+ SIMD::Float y = yyyy;
if(state.enableMultiSampling)
{
- y += Float4(*Pointer<Float>(constants + OFFSET(Constants, SampleLocationsY) + q * sizeof(float)));
+ y += SIMD::Float(*Pointer<Float>(constants + OFFSET(Constants, SampleLocationsY) + q * sizeof(float)));
}
- Dz[q] = Float4(*Pointer<Float>(primitive + OFFSET(Primitive, z.C))) + y * Float4(*Pointer<Float>(primitive + OFFSET(Primitive, z.B)));
+ Dz[q] = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, z.C))) + y * SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, z.B)));
}
}
@@ -143,7 +143,7 @@
{
if(interpolateW())
{
- Dw = Float4(*Pointer<Float>(primitive + OFFSET(Primitive, w.C))) + yyyy * Float4(*Pointer<Float>(primitive + OFFSET(Primitive, w.B)));
+ Dw = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, w.C))) + yyyy * SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, w.B)));
}
if(spirvShader)
@@ -153,11 +153,11 @@
{
if(spirvShader->inputs[interfaceInterpolant].Type != SpirvShader::ATTRIBTYPE_UNUSED)
{
- Dv[interfaceInterpolant] = Float4(*Pointer<Float>(primitive + OFFSET(Primitive, V[packedInterpolant].C)));
+ Dv[interfaceInterpolant] = *Pointer<Float>(primitive + OFFSET(Primitive, V[packedInterpolant].C));
if(!spirvShader->inputs[interfaceInterpolant].Flat)
{
Dv[interfaceInterpolant] +=
- yyyy * Float4(*Pointer<Float>(primitive + OFFSET(Primitive, V[packedInterpolant].B)));
+ yyyy * SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, V[packedInterpolant].B)));
}
packedInterpolant++;
}
@@ -165,14 +165,14 @@
for(unsigned int i = 0; i < state.numClipDistances; i++)
{
- DclipDistance[i] = Float4(*Pointer<Float>(primitive + OFFSET(Primitive, clipDistance[i].C))) +
- yyyy * Float4(*Pointer<Float>(primitive + OFFSET(Primitive, clipDistance[i].B)));
+ DclipDistance[i] = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, clipDistance[i].C))) +
+ yyyy * SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, clipDistance[i].B)));
}
for(unsigned int i = 0; i < state.numCullDistances; i++)
{
- DcullDistance[i] = Float4(*Pointer<Float>(primitive + OFFSET(Primitive, cullDistance[i].C))) +
- yyyy * Float4(*Pointer<Float>(primitive + OFFSET(Primitive, cullDistance[i].B)));
+ DcullDistance[i] = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, cullDistance[i].C))) +
+ yyyy * SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, cullDistance[i].B)));
}
}
@@ -230,14 +230,14 @@
Until(y >= yMax);
}
-Float4 QuadRasterizer::interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
+SIMD::Float QuadRasterizer::interpolate(SIMD::Float &x, SIMD::Float &D, SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
{
if(flat)
{
return D;
}
- Float4 interpolant = mulAdd(x, Float4(*Pointer<Float>(planeEquation + OFFSET(PlaneEquation, A))), D);
+ SIMD::Float interpolant = mulAdd(x, SIMD::Float(*Pointer<Float>(planeEquation + OFFSET(PlaneEquation, A))), D);
if(perspective)
{
diff --git a/src/Device/QuadRasterizer.hpp b/src/Device/QuadRasterizer.hpp
index 2f19d1d..9ef2ee1 100644
--- a/src/Device/QuadRasterizer.hpp
+++ b/src/Device/QuadRasterizer.hpp
@@ -33,12 +33,12 @@
protected:
Pointer<Byte> constants;
- Float4 Dz[4];
- Float4 Dw;
- Float4 Dv[MAX_INTERFACE_COMPONENTS];
- Float4 Df;
- Float4 DclipDistance[MAX_CLIP_DISTANCES];
- Float4 DcullDistance[MAX_CULL_DISTANCES];
+ SIMD::Float Dz[4];
+ SIMD::Float Dw;
+ SIMD::Float Dv[MAX_INTERFACE_COMPONENTS];
+ SIMD::Float Df;
+ SIMD::Float DclipDistance[MAX_CLIP_DISTANCES];
+ SIMD::Float DcullDistance[MAX_CULL_DISTANCES];
UInt occlusion;
@@ -46,7 +46,7 @@
bool interpolateZ() const;
bool interpolateW() const;
- Float4 interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective);
+ SIMD::Float interpolate(SIMD::Float &x, SIMD::Float &D, SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective);
const PixelProcessor::State &state;
const SpirvShader *const spirvShader;
diff --git a/src/Pipeline/ComputeProgram.cpp b/src/Pipeline/ComputeProgram.cpp
index cbdf4ce..d142bb8 100644
--- a/src/Pipeline/ComputeProgram.cpp
+++ b/src/Pipeline/ComputeProgram.cpp
@@ -106,7 +106,7 @@
localInvocationID[0] = idx;
}
- Int4 wgID = Insert(Insert(Insert(SIMD::Int(0), workgroupID[0], 0), workgroupID[1], 1), workgroupID[2], 2);
+ Int4 wgID = Insert(Insert(Insert(Int4(0), workgroupID[0], 0), workgroupID[1], 1), workgroupID[2], 2);
auto localBase = workgroupSize * wgID;
SIMD::Int globalInvocationID[3];
globalInvocationID[0] = SIMD::Int(Extract(localBase, 0)) + localInvocationID[0];
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index cfdf2d2..3405458 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -31,9 +31,10 @@
{
}
-// Union all cMask and return it as 4 booleans
-Int4 PixelProgram::maskAny(Int cMask[4], const SampleSet &samples)
+// Union all cMask and return it as Booleans
+SIMD::Int PixelProgram::maskAny(Int cMask[4], const SampleSet &samples)
{
+ ASSERT(SIMD::Width == 4);
// See if at least 1 sample is used
Int maskUnion = 0;
for(unsigned int q : samples)
@@ -41,17 +42,18 @@
maskUnion |= cMask[q];
}
- // Convert to 4 booleans
- Int4 laneBits = Int4(1, 2, 4, 8);
- Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
- Int4 mask(maskUnion);
- mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
+ // Convert to Booleans
+ SIMD::Int laneBits = SIMD::Int(1, 2, 4, 8);
+ SIMD::Int laneShiftsToMSB = SIMD::Int(31, 30, 29, 28);
+ SIMD::Int mask(maskUnion);
+ mask = ((mask & laneBits) << laneShiftsToMSB) >> 31;
return mask;
}
-// Union all cMask/sMask/zMask and return it as 4 booleans
-Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples)
+// Union all cMask/sMask/zMask and return it as Booleans
+SIMD::Int PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples)
{
+ ASSERT(SIMD::Width == 4);
// See if at least 1 sample is used
Int maskUnion = 0;
for(unsigned int q : samples)
@@ -59,15 +61,15 @@
maskUnion |= (cMask[q] & sMask[q] & zMask[q]);
}
- // Convert to 4 booleans
- Int4 laneBits = Int4(1, 2, 4, 8);
- Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
- Int4 mask(maskUnion);
- mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
+ // Convert to Booleans
+ SIMD::Int laneBits = SIMD::Int(1, 2, 4, 8);
+ SIMD::Int laneShiftsToMSB = SIMD::Int(31, 30, 29, 28);
+ SIMD::Int mask(maskUnion);
+ mask = ((mask & laneBits) << laneShiftsToMSB) >> 31;
return mask;
}
-void PixelProgram::setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], const SampleSet &samples)
+void PixelProgram::setBuiltins(Int &x, Int &y, SIMD::Float (&z)[4], SIMD::Float &w, Int cMask[4], const SampleSet &samples)
{
routine.setImmutableInputBuiltins(spirvShader);
@@ -148,28 +150,28 @@
if(it != spirvShader->inputBuiltins.end())
{
ASSERT(it->second.SizeInComponents == 1);
- auto frontFacing = Int4(*Pointer<Int>(primitive + OFFSET(Primitive, clockwiseMask)));
- routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(frontFacing);
+ auto frontFacing = SIMD::Int(*Pointer<Int>(primitive + OFFSET(Primitive, clockwiseMask)));
+ routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<SIMD::Float>(frontFacing);
}
it = spirvShader->inputBuiltins.find(spv::BuiltInSampleMask);
if(it != spirvShader->inputBuiltins.end())
{
- static_assert(SIMD::Width == 4, "Expects SIMD width to be 4");
- Int4 laneBits = Int4(1, 2, 4, 8);
+ ASSERT(SIMD::Width == 4);
+ SIMD::Int laneBits = SIMD::Int(1, 2, 4, 8);
- Int4 inputSampleMask = 0;
+ SIMD::Int inputSampleMask = 0;
for(unsigned int q : samples)
{
- inputSampleMask |= Int4(1 << q) & CmpNEQ(Int4(cMask[q]) & laneBits, Int4(0));
+ inputSampleMask |= SIMD::Int(1 << q) & CmpNEQ(SIMD::Int(cMask[q]) & laneBits, 0);
}
- routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(inputSampleMask);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<SIMD::Float>(inputSampleMask);
// Sample mask input is an array, as the spec contemplates MSAA levels higher than 32.
// Fill any non-zero indices with 0.
for(auto i = 1u; i < it->second.SizeInComponents; i++)
{
- routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = Float4(0);
+ routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = 0;
}
}
@@ -195,8 +197,8 @@
// Note: all lanes initially active to facilitate derivatives etc. Actual coverage is
// handled separately, through the cMask.
- auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
- auto storesAndAtomicsMask = maskAny(cMask, sMask, zMask, samples);
+ SIMD::Int activeLaneMask = 0xFFFFFFFF;
+ SIMD::Int storesAndAtomicsMask = maskAny(cMask, sMask, zMask, samples);
routine.discardMask = 0;
spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets, state.multiSampleCount);
@@ -395,10 +397,10 @@
case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
- color[index].x = Min(Max(color[index].x, Float4(0.0f)), Float4(1.0f));
- color[index].y = Min(Max(color[index].y, Float4(0.0f)), Float4(1.0f));
- color[index].z = Min(Max(color[index].z, Float4(0.0f)), Float4(1.0f));
- color[index].w = Min(Max(color[index].w, Float4(0.0f)), Float4(1.0f));
+ color[index].x = Min(Max(color[index].x, 0.0f), 1.0f);
+ color[index].y = Min(Max(color[index].y, 0.0f), 1.0f);
+ color[index].z = Min(Max(color[index].z, 0.0f), 1.0f);
+ color[index].w = Min(Max(color[index].w, 0.0f), 1.0f);
break;
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32G32_SFLOAT:
diff --git a/src/Pipeline/PixelProgram.hpp b/src/Pipeline/PixelProgram.hpp
index 465d185..f367fee 100644
--- a/src/Pipeline/PixelProgram.hpp
+++ b/src/Pipeline/PixelProgram.hpp
@@ -31,7 +31,7 @@
virtual ~PixelProgram() {}
protected:
- virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], const SampleSet &samples);
+ virtual void setBuiltins(Int &x, Int &y, SIMD::Float (&z)[4], SIMD::Float &w, Int cMask[4], const SampleSet &samples);
virtual void executeShader(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples);
virtual Bool alphaTest(Int cMask[4], const SampleSet &samples);
virtual void blendColor(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], const SampleSet &samples);
@@ -43,8 +43,8 @@
// Raster operations
void clampColor(Vector4f color[MAX_COLOR_BUFFERS]);
- static Int4 maskAny(Int cMask[4], const SampleSet &samples);
- static Int4 maskAny(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples);
+ static SIMD::Int maskAny(Int cMask[4], const SampleSet &samples);
+ static SIMD::Int maskAny(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples);
};
} // namespace sw
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 74fac88..05cc345 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -73,7 +73,7 @@
Int zMask[4]; // Depth mask
Int sMask[4]; // Stencil mask
- Float4 unclampedZ[4];
+ SIMD::Float unclampedZ[4];
for(int invocation = 0; invocation < invocationCount; invocation++)
{
@@ -92,26 +92,26 @@
stencilTest(sBuffer, x, sMask, samples);
- Float4 rhwCentroid;
+ SIMD::Float rhwCentroid;
- Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16);
+ SIMD::Float xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive, xQuad), 16);
if(interpolateZ())
{
for(unsigned int q : samples)
{
- Float4 x = xxxx;
+ SIMD::Float x = xxxx;
if(state.enableMultiSampling)
{
- x -= Float4(*Pointer<Float>(constants + OFFSET(Constants, SampleLocationsX) + q * sizeof(float)));
+ x -= SIMD::Float(*Pointer<Float>(constants + OFFSET(Constants, SampleLocationsX) + q * sizeof(float)));
}
z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive, z), false, false);
if(state.depthBias)
{
- z[q] += Float4(*Pointer<Float>(primitive + OFFSET(Primitive, zBias)));
+ z[q] += SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, zBias)));
}
unclampedZ[q] = z[q];
@@ -140,21 +140,21 @@
occlusionSampleCount(zMask, sMask, samples);
}
- Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16);
+ SIMD::Float yyyy = SIMD::Float(Float(y)) + SIMD::Float(*Pointer<Float4>(primitive + OFFSET(Primitive, yQuad), 16));
// Centroid locations
- Float4 XXXX = 0.0f;
- Float4 YYYY = 0.0f;
+ SIMD::Float XXXX = 0.0f;
+ SIMD::Float YYYY = 0.0f;
if(state.centroid || shaderContainsInterpolation) // TODO(b/194714095)
{
- Float4 WWWW(1.0e-9f);
+ SIMD::Float WWWW = 1.0e-9f;
for(unsigned int q : samples)
{
- XXXX += *Pointer<Float4>(constants + OFFSET(Constants, sampleX[q]) + 16 * cMask[q]);
- YYYY += *Pointer<Float4>(constants + OFFSET(Constants, sampleY[q]) + 16 * cMask[q]);
- WWWW += *Pointer<Float4>(constants + OFFSET(Constants, weight) + 16 * cMask[q]);
+ XXXX += SIMD::Float(*Pointer<Float4>(constants + OFFSET(Constants, sampleX[q]) + 16 * cMask[q]));
+ YYYY += SIMD::Float(*Pointer<Float4>(constants + OFFSET(Constants, sampleY[q]) + 16 * cMask[q]));
+ WWWW += SIMD::Float(*Pointer<Float4>(constants + OFFSET(Constants, weight) + 16 * cMask[q]));
}
WWWW = Rcp(WWWW, true /* relaxedPrecision */);
@@ -417,9 +417,9 @@
}
}
-Bool PixelRoutine::depthTest32F(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask)
+Bool PixelRoutine::depthTest32F(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask)
{
- Float4 Z = z;
+ SIMD::Float Z = z;
Pointer<Byte> buffer = zBuffer + 4 * x;
Int pitch = *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
@@ -429,14 +429,14 @@
buffer += q * *Pointer<Int>(data + OFFSET(DrawData, depthSliceB));
}
- Float4 zValue;
+ SIMD::Float zValue;
if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
{
zValue = Float4(*Pointer<Float2>(buffer), *Pointer<Float2>(buffer + pitch));
}
- Int4 zTest;
+ SIMD::Int zTest;
switch(state.depthCompareMode)
{
@@ -489,7 +489,7 @@
return zMask != 0;
}
-Bool PixelRoutine::depthTest16(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask)
+Bool PixelRoutine::depthTest16(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask)
{
Short4 Z = convertFixed16(z, true);
@@ -566,7 +566,7 @@
return zMask != 0;
}
-Float4 PixelRoutine::clampDepth(const Float4 &z)
+SIMD::Float PixelRoutine::clampDepth(const SIMD::Float &z)
{
if(!state.depthClamp)
{
@@ -576,7 +576,7 @@
return Min(Max(z, state.minDepthClamp), state.maxDepthClamp);
}
-Bool PixelRoutine::depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask)
+Bool PixelRoutine::depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask)
{
if(!state.depthTestActive)
{
@@ -663,7 +663,7 @@
}
}
-void PixelRoutine::alphaToCoverage(Int cMask[4], const Float4 &alpha, const SampleSet &samples)
+void PixelRoutine::alphaToCoverage(Int cMask[4], const SIMD::Float &alpha, const SampleSet &samples)
{
static const int a2c[4] = {
OFFSET(DrawData, a2c0),
@@ -674,7 +674,7 @@
for(unsigned int q : samples)
{
- Int4 coverage = CmpNLT(alpha, Float4(*Pointer<Float>(data + a2c[q])));
+ SIMD::Int coverage = CmpNLT(alpha, SIMD::Float(*Pointer<Float>(data + a2c[q])));
Int aMask = SignMask(coverage);
cMask[q] &= aMask;
}
@@ -1919,7 +1919,7 @@
}
}
-void PixelRoutine::blendFactorAlpha(Float4 &blendFactorAlpha, const Float4 &sourceAlpha, const Float4 &destAlpha, VkBlendFactor alphaBlendFactor, vk::Format format)
+void PixelRoutine::blendFactorAlpha(SIMD::Float &blendFactorAlpha, const SIMD::Float &sourceAlpha, const SIMD::Float &destAlpha, VkBlendFactor alphaBlendFactor, vk::Format format)
{
switch(alphaBlendFactor)
{
@@ -1984,103 +1984,103 @@
}
}
-Float4 PixelRoutine::blendOpOverlay(Float4 &src, Float4 &dst)
+SIMD::Float PixelRoutine::blendOpOverlay(SIMD::Float &src, SIMD::Float &dst)
{
- Int4 largeDst = CmpGT(dst, 0.5f);
- return As<Float4>(
- (~largeDst & As<Int4>(2.0f * src * dst)) |
- (largeDst & As<Int4>(1.0f - (2.0f * (1.0f - src) * (1.0f - dst)))));
+ SIMD::Int largeDst = CmpGT(dst, 0.5f);
+ return As<SIMD::Float>(
+ (~largeDst & As<SIMD::Int>(2.0f * src * dst)) |
+ (largeDst & As<SIMD::Int>(1.0f - (2.0f * (1.0f - src) * (1.0f - dst)))));
}
-Float4 PixelRoutine::blendOpColorDodge(Float4 &src, Float4 &dst)
+SIMD::Float PixelRoutine::blendOpColorDodge(SIMD::Float &src, SIMD::Float &dst)
{
- Int4 srcBelowOne = CmpLT(src, 1.0f);
- Int4 positiveDst = CmpGT(dst, 0.0f);
- return As<Float4>(positiveDst & ((~srcBelowOne & As<Int4>(Float4(1.0f))) |
- (srcBelowOne & As<Int4>(Min(1.0f, (dst / (1.0f - src)))))));
+ SIMD::Int srcBelowOne = CmpLT(src, 1.0f);
+ SIMD::Int positiveDst = CmpGT(dst, 0.0f);
+ return As<SIMD::Float>(positiveDst & ((~srcBelowOne & As<SIMD::Int>(SIMD::Float(1.0f))) |
+ (srcBelowOne & As<SIMD::Int>(Min(1.0f, (dst / (1.0f - src)))))));
}
-Float4 PixelRoutine::blendOpColorBurn(Float4 &src, Float4 &dst)
+SIMD::Float PixelRoutine::blendOpColorBurn(SIMD::Float &src, SIMD::Float &dst)
{
- Int4 dstBelowOne = CmpLT(dst, 1.0f);
- Int4 positiveSrc = CmpGT(src, 0.0f);
- return As<Float4>(
- (~dstBelowOne & As<Int4>(Float4(1.0f))) |
- (dstBelowOne & positiveSrc & As<Int4>(1.0f - Min(1.0f, (1.0f - dst) / src))));
+ SIMD::Int dstBelowOne = CmpLT(dst, 1.0f);
+ SIMD::Int positiveSrc = CmpGT(src, 0.0f);
+ return As<SIMD::Float>(
+ (~dstBelowOne & As<SIMD::Int>(SIMD::Float(1.0f))) |
+ (dstBelowOne & positiveSrc & As<SIMD::Int>(1.0f - Min(1.0f, (1.0f - dst) / src))));
}
-Float4 PixelRoutine::blendOpHardlight(Float4 &src, Float4 &dst)
+SIMD::Float PixelRoutine::blendOpHardlight(SIMD::Float &src, SIMD::Float &dst)
{
- Int4 largeSrc = CmpGT(src, 0.5f);
- return As<Float4>(
- (~largeSrc & As<Int4>(2.0f * src * dst)) |
- (largeSrc & As<Int4>(1.0f - (2.0f * (1.0f - src) * (1.0f - dst)))));
+ SIMD::Int largeSrc = CmpGT(src, 0.5f);
+ return As<SIMD::Float>(
+ (~largeSrc & As<SIMD::Int>(2.0f * src * dst)) |
+ (largeSrc & As<SIMD::Int>(1.0f - (2.0f * (1.0f - src) * (1.0f - dst)))));
}
-Float4 PixelRoutine::blendOpSoftlight(Float4 &src, Float4 &dst)
+SIMD::Float PixelRoutine::blendOpSoftlight(SIMD::Float &src, SIMD::Float &dst)
{
- Int4 largeSrc = CmpGT(src, 0.5f);
- Int4 largeDst = CmpGT(dst, 0.25f);
+ SIMD::Int largeSrc = CmpGT(src, 0.5f);
+ SIMD::Int largeDst = CmpGT(dst, 0.25f);
- return As<Float4>(
- (~largeSrc & As<Int4>(dst - ((1.0f - (2.0f * src)) * dst * (1.0f - dst)))) |
- (largeSrc & ((~largeDst & As<Int4>(dst + (((2.0f * src) - 1.0f) * dst * ((((16.0f * dst) - 12.0f) * dst) + 3.0f)))) |
- (largeDst & As<Int4>(dst + (((2.0f * src) - 1.0f) * (Sqrt<Mediump>(dst) - dst)))))));
+ return As<SIMD::Float>(
+ (~largeSrc & As<SIMD::Int>(dst - ((1.0f - (2.0f * src)) * dst * (1.0f - dst)))) |
+ (largeSrc & ((~largeDst & As<SIMD::Int>(dst + (((2.0f * src) - 1.0f) * dst * ((((16.0f * dst) - 12.0f) * dst) + 3.0f)))) |
+ (largeDst & As<SIMD::Int>(dst + (((2.0f * src) - 1.0f) * (Sqrt<Mediump>(dst) - dst)))))));
}
-Float4 PixelRoutine::maxRGB(Vector4f &c)
+SIMD::Float PixelRoutine::maxRGB(Vector4f &c)
{
return Max(Max(c.x, c.y), c.z);
}
-Float4 PixelRoutine::minRGB(Vector4f &c)
+SIMD::Float PixelRoutine::minRGB(Vector4f &c)
{
return Min(Min(c.x, c.y), c.z);
}
-void PixelRoutine::setLumSat(Vector4f &cbase, Vector4f &csat, Vector4f &clum, Float4 &x, Float4 &y, Float4 &z)
+void PixelRoutine::setLumSat(Vector4f &cbase, Vector4f &csat, Vector4f &clum, SIMD::Float &x, SIMD::Float &y, SIMD::Float &z)
{
- Float4 minbase = minRGB(cbase);
- Float4 sbase = maxRGB(cbase) - minbase;
- Float4 ssat = maxRGB(csat) - minRGB(csat);
- Int4 isNonZero = CmpGT(sbase, 0.0f);
+ SIMD::Float minbase = minRGB(cbase);
+ SIMD::Float sbase = maxRGB(cbase) - minbase;
+ SIMD::Float ssat = maxRGB(csat) - minRGB(csat);
+ SIMD::Int isNonZero = CmpGT(sbase, 0.0f);
Vector4f color;
- color.x = As<Float4>(isNonZero & As<Int4>((cbase.x - minbase) * ssat / sbase));
- color.y = As<Float4>(isNonZero & As<Int4>((cbase.y - minbase) * ssat / sbase));
- color.z = As<Float4>(isNonZero & As<Int4>((cbase.z - minbase) * ssat / sbase));
+ color.x = As<SIMD::Float>(isNonZero & As<SIMD::Int>((cbase.x - minbase) * ssat / sbase));
+ color.y = As<SIMD::Float>(isNonZero & As<SIMD::Int>((cbase.y - minbase) * ssat / sbase));
+ color.z = As<SIMD::Float>(isNonZero & As<SIMD::Int>((cbase.z - minbase) * ssat / sbase));
setLum(color, clum, x, y, z);
}
-Float4 PixelRoutine::lumRGB(Vector4f &c)
+SIMD::Float PixelRoutine::lumRGB(Vector4f &c)
{
return c.x * 0.3f + c.y * 0.59f + c.z * 0.11f;
}
-Float4 PixelRoutine::computeLum(Float4 &color, Float4 &lum, Float4 &mincol, Float4 &maxcol, Int4 &negative, Int4 &aboveOne)
+SIMD::Float PixelRoutine::computeLum(SIMD::Float &color, SIMD::Float &lum, SIMD::Float &mincol, SIMD::Float &maxcol, SIMD::Int &negative, SIMD::Int &aboveOne)
{
- return As<Float4>(
- (negative & As<Int4>(lum + ((color - lum) * lum) / (lum - mincol))) |
- (~negative & ((aboveOne & As<Int4>(lum + ((color - lum) * (1.0f - lum)) / (maxcol - lum))) |
- (~aboveOne & As<Int4>(color)))));
+ return As<SIMD::Float>(
+ (negative & As<SIMD::Int>(lum + ((color - lum) * lum) / (lum - mincol))) |
+ (~negative & ((aboveOne & As<SIMD::Int>(lum + ((color - lum) * (1.0f - lum)) / (maxcol - lum))) |
+ (~aboveOne & As<SIMD::Int>(color)))));
}
-void PixelRoutine::setLum(Vector4f &cbase, Vector4f &clum, Float4 &x, Float4 &y, Float4 &z)
+void PixelRoutine::setLum(Vector4f &cbase, Vector4f &clum, SIMD::Float &x, SIMD::Float &y, SIMD::Float &z)
{
- Float4 lbase = lumRGB(cbase);
- Float4 llum = lumRGB(clum);
- Float4 ldiff = llum - lbase;
+ SIMD::Float lbase = lumRGB(cbase);
+ SIMD::Float llum = lumRGB(clum);
+ SIMD::Float ldiff = llum - lbase;
Vector4f color;
color.x = cbase.x + ldiff;
color.y = cbase.y + ldiff;
color.z = cbase.z + ldiff;
- Float4 lum = lumRGB(color);
- Float4 mincol = minRGB(color);
- Float4 maxcol = maxRGB(color);
+ SIMD::Float lum = lumRGB(color);
+ SIMD::Float mincol = minRGB(color);
+ SIMD::Float maxcol = maxRGB(color);
- Int4 negative = CmpLT(mincol, 0.0f);
- Int4 aboveOne = CmpGT(maxcol, 1.0f);
+ SIMD::Int negative = CmpLT(mincol, 0.0f);
+ SIMD::Int aboveOne = CmpGT(maxcol, 1.0f);
x = computeLum(color.x, lum, mincol, maxcol, negative, aboveOne);
y = computeLum(color.y, lum, mincol, maxcol, negative, aboveOne);
@@ -2089,10 +2089,10 @@
void PixelRoutine::premultiply(Vector4f &c)
{
- Int4 nonZeroAlpha = CmpNEQ(c.w, 0.0f);
- c.x = As<Float4>(nonZeroAlpha & As<Int4>(c.x / c.w));
- c.y = As<Float4>(nonZeroAlpha & As<Int4>(c.y / c.w));
- c.z = As<Float4>(nonZeroAlpha & As<Int4>(c.z / c.w));
+ SIMD::Int nonZeroAlpha = CmpNEQ(c.w, 0.0f);
+ c.x = As<SIMD::Float>(nonZeroAlpha & As<SIMD::Int>(c.x / c.w));
+ c.y = As<SIMD::Float>(nonZeroAlpha & As<SIMD::Int>(c.y / c.w));
+ c.z = As<SIMD::Float>(nonZeroAlpha & As<SIMD::Int>(c.z / c.w));
}
Vector4f PixelRoutine::computeAdvancedBlendMode(int index, const Vector4f &src, const Vector4f &dst, const Vector4f &srcFactor, const Vector4f &dstFactor)
@@ -2188,7 +2188,7 @@
break;
}
- Float4 p = srcColor.w * dstColor.w;
+ SIMD::Float p = srcColor.w * dstColor.w;
blendedColor.x *= p;
blendedColor.y *= p;
blendedColor.z *= p;
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp
index 855021e..229455d 100644
--- a/src/Pipeline/PixelRoutine.hpp
+++ b/src/Pipeline/PixelRoutine.hpp
@@ -37,14 +37,14 @@
protected:
using SampleSet = std::vector<int>;
- Float4 z[4]; // Multisampled z
- Float4 w; // Used as is
- Float4 rhw; // Reciprocal w
+ SIMD::Float z[4]; // Multisampled z
+ SIMD::Float w; // Used as is
+ SIMD::Float rhw; // Reciprocal w
SpirvRoutine routine;
const vk::DescriptorSet::Bindings &descriptorSets;
- virtual void setBuiltins(Int &x, Int &y, Float4 (&z)[4], Float4 &w, Int cMask[4], const SampleSet &samples) = 0;
+ virtual void setBuiltins(Int &x, Int &y, SIMD::Float (&z)[4], SIMD::Float &w, Int cMask[4], const SampleSet &samples) = 0;
virtual void executeShader(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples) = 0;
virtual Bool alphaTest(Int cMask[4], const SampleSet &samples) = 0;
virtual void blendColor(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], const SampleSet &samples) = 0;
@@ -52,7 +52,7 @@
void quad(Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y) override;
void alphaTest(Int &aMask, const Short4 &alpha);
- void alphaToCoverage(Int cMask[4], const Float4 &alpha, const SampleSet &samples);
+ void alphaToCoverage(Int cMask[4], const SIMD::Float &alpha, const SampleSet &samples);
void writeColor(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4f &color, const Int &sMask, const Int &zMask, const Int &cMask);
Vector4f alphaBlend(int index, const Pointer<Byte> &cBuffer, const Vector4f &sourceColor, const Int &x);
@@ -70,29 +70,31 @@
void stencilTest(Byte8 &value, VkCompareOp stencilCompareMode, bool isBack);
Byte8 stencilOperation(const Byte8 &bufferValue, const PixelProcessor::States::StencilOpState &ops, bool isBack, const Int &zMask, const Int &sMask);
Byte8 stencilOperation(const Byte8 &bufferValue, VkStencilOp operation, bool isBack);
- Float4 clampDepth(const Float4 &z);
- Bool depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask);
+ SIMD::Float clampDepth(const SIMD::Float &z);
+ Bool depthTest(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask);
void depthBoundsTest(const Pointer<Byte> &zBuffer, int q, const Int &x, Int &zMask, Int &cMask);
void readPixel(int index, const Pointer<Byte> &cBuffer, const Int &x, Vector4s &pixel);
enum BlendFactorModifier { None, OneMinus };
Float blendConstant(vk::Format format, int component, BlendFactorModifier modifier = None);
void blendFactorRGB(Vector4f &blendFactorRGB, const Vector4f &sourceColor, const Vector4f &destColor, VkBlendFactor colorBlendFactor, vk::Format format);
- void blendFactorAlpha(Float4 &blendFactorAlpha, const Float4 &sourceAlpha, const Float4 &destAlpha, VkBlendFactor alphaBlendFactor, vk::Format format);
+ void blendFactorAlpha(SIMD::Float &blendFactorAlpha, const SIMD::Float &sourceAlpha, const SIMD::Float &destAlpha, VkBlendFactor alphaBlendFactor, vk::Format format);
+
bool blendFactorCanExceedFormatRange(VkBlendFactor blendFactor, vk::Format format);
Vector4f computeAdvancedBlendMode(int index, const Vector4f &src, const Vector4f &dst, const Vector4f &srcFactor, const Vector4f &dstFactor);
- Float4 blendOpOverlay(Float4 &src, Float4 &dst);
- Float4 blendOpColorDodge(Float4 &src, Float4 &dst);
- Float4 blendOpColorBurn(Float4 &src, Float4 &dst);
- Float4 blendOpHardlight(Float4 &src, Float4 &dst);
- Float4 blendOpSoftlight(Float4 &src, Float4 &dst);
- void setLumSat(Vector4f &cbase, Vector4f &csat, Vector4f &clum, Float4 &x, Float4 &y, Float4 &z);
- void setLum(Vector4f &cbase, Vector4f &clum, Float4 &x, Float4 &y, Float4 &z);
- Float4 computeLum(Float4 &color, Float4 &lum, Float4 &mincol, Float4 &maxcol, Int4 &negative, Int4 &aboveOne);
- Float4 maxRGB(Vector4f &c);
- Float4 minRGB(Vector4f &c);
- Float4 lumRGB(Vector4f &c);
+ SIMD::Float blendOpOverlay(SIMD::Float &src, SIMD::Float &dst);
+ SIMD::Float blendOpColorDodge(SIMD::Float &src, SIMD::Float &dst);
+ SIMD::Float blendOpColorBurn(SIMD::Float &src, SIMD::Float &dst);
+ SIMD::Float blendOpHardlight(SIMD::Float &src, SIMD::Float &dst);
+ SIMD::Float blendOpSoftlight(SIMD::Float &src, SIMD::Float &dst);
+ void setLumSat(Vector4f &cbase, Vector4f &csat, Vector4f &clum, SIMD::Float &x, SIMD::Float &y, SIMD::Float &z);
+ void setLum(Vector4f &cbase, Vector4f &clum, SIMD::Float &x, SIMD::Float &y, SIMD::Float &z);
+ SIMD::Float computeLum(SIMD::Float &color, SIMD::Float &lum, SIMD::Float &mincol, SIMD::Float &maxcol, SIMD::Int &negative, SIMD::Int &aboveOne);
+ SIMD::Float maxRGB(Vector4f &c);
+ SIMD::Float minRGB(Vector4f &c);
+ SIMD::Float lumRGB(Vector4f &c);
void premultiply(Vector4f &c);
+
void writeStencil(Pointer<Byte> &sBuffer, const Int &x, const Int sMask[4], const Int zMask[4], const Int cMask[4], const SampleSet &samples);
void writeDepth(Pointer<Byte> &zBuffer, const Int &x, const Int zMask[4], const SampleSet &samples);
void occlusionSampleCount(const Int zMask[4], const Int sMask[4], const SampleSet &samples);
@@ -101,8 +103,8 @@
void linearToSRGB16_12_16(Vector4s &c);
Float4 sRGBtoLinear(const Float4 &x);
- Bool depthTest32F(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask);
- Bool depthTest16(const Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &sMask, Int &zMask, const Int &cMask);
+ Bool depthTest32F(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask);
+ Bool depthTest16(const Pointer<Byte> &zBuffer, int q, const Int &x, const SIMD::Float &z, const Int &sMask, Int &zMask, const Int &cMask);
void writeDepth32F(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask);
void writeDepth16(Pointer<Byte> &zBuffer, int q, const Int &x, const Float4 &z, const Int &zMask);
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index d700a5d..3162e29 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -27,7 +27,8 @@
, function(function)
{
}
-Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 uvwa[4], Float4 &dRef, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4i &offset, Int4 &sample)
+
+Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, SIMD::Float uvwa[4], SIMD::Float &dRef, Float &&lodOrBias, SIMD::Float &dsx, SIMD::Float &dsy, Vector4i offset, SIMD::Int &sample)
{
Vector4f c;
diff --git a/src/Pipeline/SamplerCore.hpp b/src/Pipeline/SamplerCore.hpp
index 757dc7c..632f894 100644
--- a/src/Pipeline/SamplerCore.hpp
+++ b/src/Pipeline/SamplerCore.hpp
@@ -61,7 +61,7 @@
public:
SamplerCore(Pointer<Byte> &constants, const Sampler &state, SamplerFunction function);
- Vector4f sampleTexture(Pointer<Byte> &texture, Float4 uvwa[4], Float4 &q, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4i &offset, Int4 &sample);
+ Vector4f sampleTexture(Pointer<Byte> &texture, SIMD::Float uvwa[4], SIMD::Float &dRef, Float &&lodOrBias, SIMD::Float &dsx, SIMD::Float &dsy, Vector4i offset, SIMD::Int &sample);
private:
Float4 applySwizzle(const Vector4f &c, VkComponentSwizzle swizzle, bool integer);
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index a01759a..5d8ddae 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -1027,7 +1027,7 @@
static bool IsStorageInterleavedByLane(spv::StorageClass storageClass);
static bool IsExplicitLayout(spv::StorageClass storageClass);
- static sw::SIMD::Pointer GetElementPointer(sw::SIMD::Pointer structure, uint32_t offset, bool interleavedByLane);
+ static SIMD::Pointer GetElementPointer(SIMD::Pointer structure, uint32_t offset, bool interleavedByLane);
// Output storage buffers and images should not be affected by helper invocations
static bool StoresInHelperInvocation(spv::StorageClass storageClass);
@@ -1419,8 +1419,8 @@
AtSample,
AtOffset,
};
- SIMD::Float Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId,
- uint32_t component, EmitState *state, InterpolationType type) const;
+ SIMD::Float EmitInterpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId,
+ uint32_t component, EmitState *state, InterpolationType type) const;
// Helper for implementing OpStore, which doesn't take an InsnIterator so it
// can also store independent operands.
diff --git a/src/Pipeline/SpirvShaderGLSLstd450.cpp b/src/Pipeline/SpirvShaderGLSLstd450.cpp
index e4e341c..bbbd951 100644
--- a/src/Pipeline/SpirvShaderGLSLstd450.cpp
+++ b/src/Pipeline/SpirvShaderGLSLstd450.cpp
@@ -21,20 +21,21 @@
#include <spirv/unified1/GLSL.std.450.h>
#include <spirv/unified1/spirv.hpp>
-namespace {
-constexpr float PI = 3.141592653589793f;
+namespace sw {
-sw::SIMD::Float Interpolate(const sw::SIMD::Float &x, const sw::SIMD::Float &y, const sw::SIMD::Float &rhw,
- const sw::SIMD::Float &A, const sw::SIMD::Float &B, const sw::SIMD::Float &C,
- sw::SpirvRoutine::Interpolation interpolation)
+static constexpr float PI = 3.141592653589793f;
+
+static SIMD::Float Interpolate(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw,
+ const SIMD::Float &A, const SIMD::Float &B, const SIMD::Float &C,
+ SpirvRoutine::Interpolation interpolation)
{
- sw::SIMD::Float interpolant = C;
+ SIMD::Float interpolant = C;
- if(interpolation != sw::SpirvRoutine::Flat)
+ if(interpolation != SpirvRoutine::Flat)
{
interpolant += x * A + y * B;
- if(interpolation == sw::SpirvRoutine::Perspective)
+ if(interpolation == SpirvRoutine::Perspective)
{
interpolant *= rhw;
}
@@ -43,10 +44,6 @@
return interpolant;
}
-} // namespace
-
-namespace sw {
-
SpirvShader::EmitResult SpirvShader::EmitExtGLSLstd450(InsnIterator insn, EmitState *state) const
{
auto &type = getType(insn.resultTypeId());
@@ -135,7 +132,7 @@
auto x = Round(src.Float(i));
// dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
- SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
+ SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(SIMD::Int(x) & SIMD::Int(1)));
}
}
break;
@@ -216,11 +213,8 @@
auto x = Operand(this, state, insn.word(7));
for(auto i = 0u; i < type.componentCount; i++)
{
- auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
- (edge1.Float(i) - edge0.Float(i)),
- SIMD::Float(0.0f)),
- SIMD::Float(1.0f));
- dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
+ auto tx = Min(Max((x.Float(i) - edge0.Float(i)) / (edge1.Float(i) - edge0.Float(i)), 0.0f), 1.0f);
+ dst.move(i, tx * tx * (3.0f - 2.0f * tx));
}
}
break;
@@ -602,7 +596,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Sin(radians.Float(i), d.RelaxedPrecision));
+ dst.move(i, Sin(radians.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -613,7 +607,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Cos(radians.Float(i), d.RelaxedPrecision));
+ dst.move(i, Cos(radians.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -624,7 +618,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Tan(radians.Float(i), d.RelaxedPrecision));
+ dst.move(i, Tan(radians.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -635,7 +629,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Asin(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Asin(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -646,7 +640,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Acos(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Acos(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -657,7 +651,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Atan(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Atan(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -668,7 +662,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Sinh(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Sinh(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -679,7 +673,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Cosh(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Cosh(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -690,7 +684,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Tanh(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Tanh(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -701,7 +695,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Asinh(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Asinh(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -712,7 +706,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Acosh(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Acosh(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -723,7 +717,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Atanh(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Atanh(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -735,7 +729,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Atan2(x.Float(i), y.Float(i), d.RelaxedPrecision));
+ dst.move(i, Atan2(x.Float(i), y.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -747,7 +741,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Pow(x.Float(i), y.Float(i), d.RelaxedPrecision));
+ dst.move(i, Pow(x.Float(i), y.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -758,7 +752,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Exp(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Exp(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -769,7 +763,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Log(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Log(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -780,7 +774,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Exp2(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Exp2(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -791,7 +785,7 @@
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, sw::Log2(val.Float(i), d.RelaxedPrecision));
+ dst.move(i, Log2(val.Float(i), d.RelaxedPrecision));
}
}
break;
@@ -943,7 +937,7 @@
auto ptr = state->getPointer(insn.word(5));
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, Interpolate(ptr, d.Location, 0, i, state, SpirvShader::Centroid));
+ dst.move(i, EmitInterpolate(ptr, d.Location, 0, i, state, SpirvShader::Centroid));
}
}
break;
@@ -953,7 +947,7 @@
auto ptr = state->getPointer(insn.word(5));
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, state, SpirvShader::AtSample));
+ dst.move(i, EmitInterpolate(ptr, d.Location, insn.word(6), i, state, SpirvShader::AtSample));
}
}
break;
@@ -963,7 +957,7 @@
auto ptr = state->getPointer(insn.word(5));
for(auto i = 0u; i < type.componentCount; i++)
{
- dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, state, SpirvShader::AtOffset));
+ dst.move(i, EmitInterpolate(ptr, d.Location, insn.word(6), i, state, SpirvShader::AtOffset));
}
}
break;
@@ -1007,8 +1001,8 @@
return EmitResult::Continue;
}
-SIMD::Float SpirvShader::Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId,
- uint32_t component, EmitState *state, InterpolationType type) const
+SIMD::Float SpirvShader::EmitInterpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId,
+ uint32_t component, EmitState *state, InterpolationType type) const
{
uint32_t interpolant = (location * 4);
uint32_t components_per_row = GetNumInputComponents(location);
@@ -1056,7 +1050,7 @@
// input variable is undefined, so we just clamp to avoid OOB accesses.
SIMD::Int samples = sampleOperand.Int(0) & SIMD::Int(NUM_SAMPLES - 1);
- for(int i = 0; i < SIMD::Width; ++i)
+ for(int i = 0; i < SIMD::Width; i++)
{
Int sample = Extract(samples, i);
x = Insert(x, *Pointer<Float>(state->routine->constants + OFFSET(Constants, SampleLocationsX) + sample * sizeof(float)), i);
@@ -1098,7 +1092,7 @@
SIMD::Float B;
SIMD::Float C;
- for(int i = 0; i < SIMD::Width; ++i)
+ for(int i = 0; i < SIMD::Width; i++)
{
Int offset = ((Extract(ptr.dynamicOffsets, i) + ptr.staticOffsets[i]) >> offsetShift) + component;
Pointer<Byte> planeEquationI = planeEquation + (offset * sizeof(PlaneEquation));
@@ -1107,7 +1101,7 @@
C = Insert(C, *Pointer<Float>(planeEquationI + OFFSET(PlaneEquation, C)), i);
}
- return ::Interpolate(x, y, rhw, A, B, C, state->routine->inputsInterpolation[packedInterpolant]);
+ return Interpolate(x, y, rhw, A, B, C, state->routine->inputsInterpolation[packedInterpolant]);
}
else
{
@@ -1136,7 +1130,7 @@
B = *Pointer<Float>(planeEquation + OFFSET(PlaneEquation, B));
}
- return ::Interpolate(x, y, rhw, A, B, C, interpolation);
+ return Interpolate(x, y, rhw, A, B, C, interpolation);
}
} // namespace sw
\ No newline at end of file
diff --git a/src/Pipeline/SpirvShaderMemory.cpp b/src/Pipeline/SpirvShaderMemory.cpp
index b3caa83..9b27561 100644
--- a/src/Pipeline/SpirvShaderMemory.cpp
+++ b/src/Pipeline/SpirvShaderMemory.cpp
@@ -574,10 +574,10 @@
{
if(interleavedByLane)
{
- structure.staticOffsets[0] += 0 * sizeof(float);
- structure.staticOffsets[1] += 1 * sizeof(float);
- structure.staticOffsets[2] += 2 * sizeof(float);
- structure.staticOffsets[3] += 3 * sizeof(float);
+ for(int i = 0; i < SIMD::Width; i++)
+ {
+ structure.staticOffsets[i] += i * sizeof(float);
+ }
return structure + offset * sw::SIMD::Width;
}
diff --git a/src/Pipeline/VertexProgram.cpp b/src/Pipeline/VertexProgram.cpp
index 7583b12..5a3dc2a 100644
--- a/src/Pipeline/VertexProgram.cpp
+++ b/src/Pipeline/VertexProgram.cpp
@@ -80,7 +80,8 @@
}
auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
- Int4 storesAndAtomicsMask = CmpGE(UInt4(vertexCount), UInt4(1, 2, 3, 4));
+ ASSERT(SIMD::Width == 4);
+ SIMD::Int storesAndAtomicsMask = CmpGE(SIMD::UInt(vertexCount), SIMD::UInt(1, 2, 3, 4));
spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets);
spirvShader->emitEpilog(&routine);
diff --git a/src/Pipeline/VertexRoutine.cpp b/src/Pipeline/VertexRoutine.cpp
index 6e8d3cf..535dc97 100644
--- a/src/Pipeline/VertexRoutine.cpp
+++ b/src/Pipeline/VertexRoutine.cpp
@@ -124,10 +124,10 @@
auto posZ = pos[it->second.FirstComponent + 2];
auto posW = pos[it->second.FirstComponent + 3];
- Int4 maxX = CmpLT(posW, posX);
- Int4 maxY = CmpLT(posW, posY);
- Int4 minX = CmpNLE(-posW, posX);
- Int4 minY = CmpNLE(-posW, posY);
+ SIMD::Int maxX = CmpLT(posW, posX);
+ SIMD::Int maxY = CmpLT(posW, posY);
+ SIMD::Int minX = CmpNLE(-posW, posX);
+ SIMD::Int minY = CmpNLE(-posW, posY);
clipFlags = Pointer<Int>(constants + OFFSET(Constants, maxX))[SignMask(maxX)];
clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxY))[SignMask(maxY)];
@@ -135,18 +135,18 @@
clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minY))[SignMask(minY)];
if(state.depthClipEnable)
{
- Int4 maxZ = CmpLT(posW, posZ);
- Int4 minZ = CmpNLE(Float4(0.0f), posZ);
+ SIMD::Int maxZ = CmpLT(posW, posZ);
+ SIMD::Int minZ = CmpNLE(0.0f, posZ);
clipFlags |= Pointer<Int>(constants + OFFSET(Constants, maxZ))[SignMask(maxZ)];
clipFlags |= Pointer<Int>(constants + OFFSET(Constants, minZ))[SignMask(minZ)];
}
- Float4 maxPos = As<Float4>(Int4(0x7F7FFFFF));
- Int4 finiteX = CmpLE(Abs(posX), maxPos);
- Int4 finiteY = CmpLE(Abs(posY), maxPos);
- Int4 finiteZ = CmpLE(Abs(posZ), maxPos);
+ SIMD::Float maxPos = As<SIMD::Float>(SIMD::Int(0x7F7FFFFF));
+ SIMD::Int finiteX = CmpLE(Abs(posX), maxPos);
+ SIMD::Int finiteY = CmpLE(Abs(posY), maxPos);
+ SIMD::Int finiteZ = CmpLE(Abs(posZ), maxPos);
- Int4 finiteXYZ = finiteX & finiteY & finiteZ;
+ SIMD::Int finiteXYZ = finiteX & finiteY & finiteZ;
clipFlags |= Pointer<Int>(constants + OFFSET(Constants, fini))[SignMask(finiteXYZ)];
}
}
@@ -597,12 +597,12 @@
pos.w = position[it->second.FirstComponent + 3];
// Projection and viewport transform.
- Float4 w = As<Float4>(As<Int4>(pos.w) | (As<Int4>(CmpEQ(pos.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
- Float4 rhw = Float4(1.0f) / w;
+ SIMD::Float w = As<SIMD::Float>(As<SIMD::Int>(pos.w) | (As<SIMD::Int>(CmpEQ(pos.w, 0.0f)) & As<SIMD::Int>(SIMD::Float(1.0f))));
+ SIMD::Float rhw = 1.0f / w;
Vector4f proj;
- proj.x = As<Float4>(RoundIntClamped(Float4(*Pointer<Float>(data + OFFSET(DrawData, X0xF))) + pos.x * rhw * Float4(*Pointer<Float>(data + OFFSET(DrawData, WxF)))));
- proj.y = As<Float4>(RoundIntClamped(Float4(*Pointer<Float>(data + OFFSET(DrawData, Y0xF))) + pos.y * rhw * Float4(*Pointer<Float>(data + OFFSET(DrawData, HxF)))));
+ proj.x = As<Float4>(RoundIntClamped(SIMD::Float(*Pointer<Float>(data + OFFSET(DrawData, X0xF))) + pos.x * rhw * SIMD::Float(*Pointer<Float>(data + OFFSET(DrawData, WxF)))));
+ proj.y = As<Float4>(RoundIntClamped(SIMD::Float(*Pointer<Float>(data + OFFSET(DrawData, Y0xF))) + pos.y * rhw * SIMD::Float(*Pointer<Float>(data + OFFSET(DrawData, HxF)))));
proj.z = pos.z * rhw;
proj.w = rhw;