Pass the sampling lod/bias as a separate parameter. This is necessary for cube or 2D array shadow texture sampling functions which need the fourth texture coordinate component for depth comparison while also taking a lod or bias parameter. Change-Id: I1e1399f134e22cecaff97a224df2c13c57ba3a40 Reviewed-on: https://swiftshader-review.googlesource.com/13551 Reviewed-by: Nicolas Capens <nicolascapens@google.com> Reviewed-by: Alexis Hétu <sugoi@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Shader/PixelPipeline.cpp b/src/Shader/PixelPipeline.cpp index 91535ad..f983b8f 100644 --- a/src/Shader/PixelPipeline.cpp +++ b/src/Shader/PixelPipeline.cpp
@@ -1240,7 +1240,7 @@ if(!project) { - c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u, v, w, q, dsx, dsy); + c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u, v, w, q, q, dsx, dsy); } else { @@ -1250,7 +1250,7 @@ Float4 v_q = v * rq; Float4 w_q = w * rq; - c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u_q, v_q, w_q, q, dsx, dsy); + c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u_q, v_q, w_q, q, q, dsx, dsy); } #if PERF_PROFILE
diff --git a/src/Shader/PixelProgram.cpp b/src/Shader/PixelProgram.cpp index 87ed7c8..bdcd1b9 100644 --- a/src/Shader/PixelProgram.cpp +++ b/src/Shader/PixelProgram.cpp
@@ -281,17 +281,20 @@ case Shader::OPCODE_M3X4: M3X4(d, s0, src1); break; case Shader::OPCODE_M3X3: M3X3(d, s0, src1); break; case Shader::OPCODE_M3X2: M3X2(d, s0, src1); break; - case Shader::OPCODE_TEX: TEXLD(d, s0, src1, project, bias); break; - case Shader::OPCODE_TEXLDD: TEXLDD(d, s0, src1, s2, s3); break; - case Shader::OPCODE_TEXLDL: TEXLDL(d, s0, src1); break; + case Shader::OPCODE_TEX: TEX(d, s0, src1, project, bias); break; + case Shader::OPCODE_TEXLDD: TEXGRAD(d, s0, src1, s2, s3); break; + case Shader::OPCODE_TEXLDL: TEXLOD(d, s0, src1, s0.w); break; + case Shader::OPCODE_TEXLOD: TEXLOD(d, s0, src1, s2.x); break; case Shader::OPCODE_TEXSIZE: TEXSIZE(d, s0.x, src1); break; case Shader::OPCODE_TEXKILL: TEXKILL(cMask, d, dst.mask); break; - case Shader::OPCODE_TEXOFFSET: TEXOFFSET(d, s0, src1, s2, bias); break; - case Shader::OPCODE_TEXLDLOFFSET: TEXLDL(d, s0, src1, s2, bias); break; - case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1); break; - case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2); break; + case Shader::OPCODE_TEXOFFSET: TEXOFFSET(d, s0, src1, s2); break; + case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x); break; + case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x); break; + case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break; case Shader::OPCODE_TEXGRAD: TEXGRAD(d, s0, src1, s2, s3); break; - case Shader::OPCODE_TEXGRADOFFSET: TEXGRAD(d, s0, src1, s2, s3, s4); break; + case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4); break; + case Shader::OPCODE_TEXBIAS: TEXBIAS(d, s0, src1, s2.x); break; + case Shader::OPCODE_TEXOFFSETBIAS: TEXOFFSETBIAS(d, s0, src1, s2, s3.x); break; case Shader::OPCODE_DISCARD: DISCARD(cMask, instruction); break; case Shader::OPCODE_DFDX: DFDX(d, s0); break; case Shader::OPCODE_DFDY: DFDY(d, s0); break; @@ -678,13 +681,13 @@ } } - Vector4f PixelProgram::sampleTexture(const Src &sampler, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) + Vector4f PixelProgram::sampleTexture(const Src &sampler, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) { Vector4f tmp; if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID) { - tmp = sampleTexture(sampler.index, uvwq, dsx, dsy, offset, function); + tmp = sampleTexture(sampler.index, uvwq, bias, dsx, dsy, offset, function); } else { @@ -696,7 +699,7 @@ { If(index == i) { - tmp = sampleTexture(i, uvwq, dsx, dsy, offset, function); + tmp = sampleTexture(i, uvwq, bias, dsx, dsy, offset, function); // FIXME: When the sampler states are the same, we could use one sampler and just index the texture } } @@ -712,14 +715,14 @@ return c; } - Vector4f PixelProgram::sampleTexture(int samplerIndex, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) + Vector4f PixelProgram::sampleTexture(int samplerIndex, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) { #if PERF_PROFILE Long texTime = Ticks(); #endif Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture); - Vector4f c = SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function); + Vector4f c = SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, bias, dsx, dsy, offset, function); #if PERF_PROFILE cycles[PERF_TEX] += Ticks() - texTime; @@ -1113,7 +1116,7 @@ dst.w = dot4(src0, row3); } - void PixelProgram::TEXLD(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias) + void PixelProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias) { if(project) { @@ -1123,52 +1126,57 @@ proj.y = src0.y * rw; proj.z = src0.z * rw; - dst = sampleTexture(src1, proj, src0, src0, src0, Implicit); + dst = sampleTexture(src1, proj, src0.x, (src0), (src0), (src0), Implicit); } else { - dst = sampleTexture(src1, src0, src0, src0, src0, bias ? Bias : Implicit); + dst = sampleTexture(src1, src0, src0.x, (src0), (src0), (src0), bias ? Bias : Implicit); } } - void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, bool bias) + void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset) { - dst = sampleTexture(src1, src0, src0, src0, src2, {bias ? Bias : Implicit, Offset}); + dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Implicit, Offset}); } - void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, bool bias) + void PixelProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &lod) { - dst = sampleTexture(src1, src0, src0, src0, offset, {Lod, Offset}); + dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset}); } - void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1) + void PixelProgram::TEXBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &bias) { - dst = sampleTexture(src1, src0, src0, src0, src0, Fetch); + dst = sampleTexture(src1, src0, bias, (src0), (src0), (src0), Bias); + } + + void PixelProgram::TEXOFFSETBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &bias) + { + dst = sampleTexture(src1, src0, bias, (src0), (src0), offset, {Bias, Offset}); } - void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset) + void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod) { - dst = sampleTexture(src1, src0, src0, src0, offset, {Fetch, Offset}); + dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch); } - void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3) + void PixelProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod) { - dst = sampleTexture(src1, src0, src2, src3, src0, Grad); + dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset}); } - void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset) + void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy) { - dst = sampleTexture(src1, src0, src2, src3, offset, {Grad, Offset}); + dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, (src0), Grad); } - void PixelProgram::TEXLDD(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3) + void PixelProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset) { - dst = sampleTexture(src1, src0, src2, src3, src0, Grad); + dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset}); } - void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1) + void PixelProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &lod) { - dst = sampleTexture(src1, src0, src0, src0, src0, Lod); + dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod); } void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
diff --git a/src/Shader/PixelProgram.hpp b/src/Shader/PixelProgram.hpp index a76a8ba..46af0ac 100644 --- a/src/Shader/PixelProgram.hpp +++ b/src/Shader/PixelProgram.hpp
@@ -82,8 +82,8 @@ Int4 enableContinue; Int4 enableLeave; - Vector4f sampleTexture(const Src &sampler, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function); - Vector4f sampleTexture(int samplerIndex, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function); + Vector4f sampleTexture(const Src &sampler, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function); + Vector4f sampleTexture(int samplerIndex, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function); // Raster operations void clampColor(Vector4f oC[RENDERTARGETS]); @@ -106,17 +106,18 @@ void M3X4(Vector4f &dst, Vector4f &src0, const Src &src1); void M4X3(Vector4f &dst, Vector4f &src0, const Src &src1); void M4X4(Vector4f &dst, Vector4f &src0, const Src &src1); - void TEXLD(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias); - void TEXLDD(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3); - void TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1); + void TEX(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias); + void TEXLOD(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &lod); + void TEXBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &bias); void TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1); void TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask); - void TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, bool bias); - void TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, bool bias); - void TEXELFETCH(Vector4f &dst, Vector4f &src, const Src&); - void TEXELFETCH(Vector4f &dst, Vector4f &src, const Src&, Vector4f &src2); - void TEXGRAD(Vector4f &dst, Vector4f &src, const Src&, Vector4f &src2, Vector4f &src3); - void TEXGRAD(Vector4f &dst, Vector4f &src, const Src&, Vector4f &src2, Vector4f &src3, Vector4f &src4); + void TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset); + void TEXOFFSETBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &bias); + void TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &lod); + void TEXELFETCH(Vector4f &dst, Vector4f &src, const Src &, Float4 &lod); + void TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src, const Src &, Vector4f &offset, Float4 &lod); + void TEXGRAD(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &dsx, Vector4f &dsy); + void TEXGRADOFFSET(Vector4f &dst, Vector4f &src, const Src &, Vector4f &dsx, Vector4f &dsy, Vector4f &offset); void DISCARD(Int cMask[4], const Shader::Instruction *instruction); void DFDX(Vector4f &dst, Vector4f &src); void DFDY(Vector4f &dst, Vector4f &src);
diff --git a/src/Shader/PixelShader.cpp b/src/Shader/PixelShader.cpp index 9e36be6..b8b320e 100644 --- a/src/Shader/PixelShader.cpp +++ b/src/Shader/PixelShader.cpp
@@ -314,8 +314,11 @@ case Shader::OPCODE_TEX: case Shader::OPCODE_TEXLDD: case Shader::OPCODE_TEXLDL: + case Shader::OPCODE_TEXLOD: + case Shader::OPCODE_TEXBIAS: case Shader::OPCODE_TEXOFFSET: - case Shader::OPCODE_TEXLDLOFFSET: + case Shader::OPCODE_TEXOFFSETBIAS: + case Shader::OPCODE_TEXLODOFFSET: case Shader::OPCODE_TEXELFETCH: case Shader::OPCODE_TEXELFETCHOFFSET: case Shader::OPCODE_TEXGRAD:
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp index ec71463..ca06b5c 100644 --- a/src/Shader/SamplerCore.cpp +++ b/src/Shader/SamplerCore.cpp
@@ -56,12 +56,12 @@ { } - Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy) + Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy) { - return sampleTexture(texture, u, v, w, q, dsx, dsy, dsx, Implicit, true); + return sampleTexture(texture, u, v, w, q, q, dsx, dsy, (dsx), Implicit, true); } - Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12) + Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12) { Vector4s c; @@ -77,6 +77,7 @@ Float4 uuuu = u; Float4 vvvv = v; Float4 wwww = w; + Float4 qqqq = q; if(state.textureType == TEXTURE_NULL) { @@ -109,22 +110,21 @@ Float anisotropy; Float4 uDelta; Float4 vDelta; - Float lodBias = (function == Fetch) ? Float4(As<Int4>(q)).x : q.x; if(state.textureType != TEXTURE_3D) { if(state.textureType != TEXTURE_CUBE) { - computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, lodBias, dsx, dsy, function); + computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, bias.x, dsx, dsy, function); } else { - computeLodCube(texture, lod, lodX, lodY, lodZ, lodBias, dsx, dsy, function); + computeLodCube(texture, lod, lodX, lodY, lodZ, bias.x, dsx, dsy, function); } } else { - computeLod3D(texture, lod, uuuu, vvvv, wwww, lodBias, dsx, dsy, function); + computeLod3D(texture, lod, uuuu, vvvv, wwww, bias.x, dsx, dsy, function); } if(!hasFloatTexture()) @@ -133,7 +133,7 @@ } else { - Vector4f cf = sampleFloatFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function); + Vector4f cf = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function); convertFixed12(c, cf); } @@ -299,7 +299,7 @@ return c; } - Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) + Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) { Vector4f c; @@ -328,6 +328,7 @@ Float4 uuuu = u; Float4 vvvv = v; Float4 wwww = w; + Float4 qqqq = q; Int face[4]; Float4 lodX; @@ -343,25 +344,24 @@ Float anisotropy; Float4 uDelta; Float4 vDelta; - Float lodBias = (function == Fetch) ? Float4(As<Int4>(q)).x : q.x; if(state.textureType != TEXTURE_3D) { if(state.textureType != TEXTURE_CUBE) { - computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, lodBias, dsx, dsy, function); + computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, bias.x, dsx, dsy, function); } else { - computeLodCube(texture, lod, lodX, lodY, lodZ, lodBias, dsx, dsy, function); + computeLodCube(texture, lod, lodX, lodY, lodZ, bias.x, dsx, dsy, function); } } else { - computeLod3D(texture, lod, uuuu, vvvv, wwww, lodBias, dsx, dsy, function); + computeLod3D(texture, lod, uuuu, vvvv, wwww, bias.x, dsx, dsy, function); } - c = sampleFloatFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function); + c = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function); if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture()) { @@ -389,7 +389,7 @@ } else { - Vector4s cs = sampleTexture(texture, u, v, w, q, dsx, dsy, offset, function, false); + Vector4s cs = sampleTexture(texture, u, v, w, q, bias, dsx, dsy, offset, function, false); if(has16bitTextureFormat()) { @@ -1116,9 +1116,9 @@ return c_; } - Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function) + Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function) { - Vector4f c = sampleFloatAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function); + Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, false, function); if(function == Fetch) { @@ -1127,7 +1127,7 @@ if(state.mipmapFilter > MIPMAP_POINT) { - Vector4f cc = sampleFloatAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function); + Vector4f cc = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, true, function); Float4 lod4 = Float4(Frac(lod)); @@ -1196,13 +1196,13 @@ return c; } - Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function) + Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function) { Vector4f c; if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch) { - c = sampleFloat(texture, u, v, w, offset, lod, face, secondLOD, function); + c = sampleFloat(texture, u, v, w, q, offset, lod, face, secondLOD, function); } else { @@ -1231,7 +1231,7 @@ Do { - c = sampleFloat(texture, u0, v0, w, offset, lod, face, secondLOD, function); + c = sampleFloat(texture, u0, v0, w, q, offset, lod, face, secondLOD, function); u0 += du; v0 += dv; @@ -1254,11 +1254,11 @@ return c; } - Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) + Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) { if(state.textureType != TEXTURE_3D) { - return sampleFloat2D(texture, u, v, w, offset, lod, face, secondLOD, function); + return sampleFloat2D(texture, u, v, w, q, offset, lod, face, secondLOD, function); } else { @@ -1266,7 +1266,7 @@ } } - Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) + Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) { Vector4f c; @@ -1295,16 +1295,16 @@ if(state.textureFilter == FILTER_POINT || (function == Fetch)) { - c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function); + c = sampleTexel(x0, y0, z0, q, mipmap, buffer, function); } else { y1 *= pitchP; - Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function); - Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function); - Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function); - Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function); + Vector4f c0 = sampleTexel(x0, y0, z0, q, mipmap, buffer, function); + Vector4f c1 = sampleTexel(x1, y0, z0, q, mipmap, buffer, function); + Vector4f c2 = sampleTexel(x0, y1, z0, q, mipmap, buffer, function); + Vector4f c3 = sampleTexel(x1, y1, z0, q, mipmap, buffer, function); if(!gather) // Blend { @@ -1481,10 +1481,20 @@ lod += lodBias; } } - else + else if(function == Lod) { lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); } + else if(function == Fetch) + { + // TODO: Eliminate int-float-int conversion. + lod = Float(As<Int>(lodBias)) + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); + } + else if(function == Base) + { + lod = Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); + } + else assert(false); lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod))); @@ -1537,10 +1547,20 @@ lod += lodBias; } } - else + else if(function == Lod) { lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); } + else if(function == Fetch) + { + // TODO: Eliminate int-float-int conversion. + lod = Float(As<Int>(lodBias)) + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); + } + else if(function == Base) + { + lod = Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); + } + else assert(false); lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod))); @@ -1555,9 +1575,7 @@ { if(function != Lod && function != Fetch) { - Float4 dudxy; - Float4 dvdxy; - Float4 dsdxy; + Float4 dudxy, dvdxy, dsdxy; if(function != Grad) { @@ -1567,13 +1585,9 @@ } else { - dudxy = dsx.x; - dvdxy = dsx.y; - dsdxy = dsx.z; - - dudxy = Float4(dudxy.xx, dsy.x.xx); - dvdxy = Float4(dvdxy.xx, dsy.y.xx); - dsdxy = Float4(dsdxy.xx, dsy.z.xx); + dudxy = Float4(dsx.x.xx, dsy.x.xx); + dvdxy = Float4(dsx.y.xx, dsy.y.xx); + dsdxy = Float4(dsx.z.xx, dsy.z.xx); dudxy = Float4(dudxy.xz, dudxy.xz); dvdxy = Float4(dvdxy.xz, dvdxy.xz); @@ -1601,10 +1615,20 @@ lod += lodBias; } } - else + else if(function == Lod) { lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); } + else if(function == Fetch) + { + // TODO: Eliminate int-float-int conversion. + lod = Float(As<Int>(lodBias)) + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); + } + else if(function == Base) + { + lod = Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel))); + } + else assert(false); lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
diff --git a/src/Shader/SamplerCore.hpp b/src/Shader/SamplerCore.hpp index 43ed1d1..3ad7549 100644 --- a/src/Shader/SamplerCore.hpp +++ b/src/Shader/SamplerCore.hpp
@@ -22,17 +22,18 @@ { enum SamplerMethod { - Implicit, - Bias, - Lod, - Grad, - Fetch + Implicit, // Compute gradients (pixel shader only). + Bias, // Compute gradients and add provided bias. + Lod, // Use provided LOD. + Grad, // Use provided gradients. + Fetch, // Use provided integer coordinates. + Base // Sample base level. }; enum SamplerOption { None, - Offset + Offset // Offset sample location by provided integer coordinates. }; struct SamplerFunction @@ -49,12 +50,12 @@ public: SamplerCore(Pointer<Byte> &constants, const Sampler::State &state); - Vector4s sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy); - Vector4f sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function); + Vector4s sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy); + Vector4f sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function); static Vector4f textureSize(Pointer<Byte> &mipmap, Float4 &lod); private: - Vector4s sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12); + Vector4s sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12); void border(Short4 &mask, Float4 &coordinates); void border(Int4 &mask, Float4 &coordinates); @@ -64,10 +65,10 @@ Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function); Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function); Vector4s sample3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function); - Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function); - Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function); - Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function); - Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function); + Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function); + Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function); + Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function); + Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function); Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function); Float log2sqrt(Float lod); void computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
diff --git a/src/Shader/Shader.cpp b/src/Shader/Shader.cpp index e99f13a..f6c9f6f 100644 --- a/src/Shader/Shader.cpp +++ b/src/Shader/Shader.cpp
@@ -747,232 +747,234 @@ { switch(opcode) { - case OPCODE_NULL: return "null"; - case OPCODE_NOP: return "nop"; - case OPCODE_MOV: return "mov"; - case OPCODE_ADD: return "add"; - case OPCODE_IADD: return "iadd"; - case OPCODE_SUB: return "sub"; - case OPCODE_ISUB: return "isub"; - case OPCODE_MAD: return "mad"; - case OPCODE_IMAD: return "imad"; - case OPCODE_MUL: return "mul"; - case OPCODE_IMUL: return "imul"; - case OPCODE_RCPX: return "rcpx"; - case OPCODE_DIV: return "div"; - case OPCODE_IDIV: return "idiv"; - case OPCODE_UDIV: return "udiv"; - case OPCODE_MOD: return "mod"; - case OPCODE_IMOD: return "imod"; - case OPCODE_UMOD: return "umod"; - case OPCODE_SHL: return "shl"; - case OPCODE_ISHR: return "ishr"; - case OPCODE_USHR: return "ushr"; - case OPCODE_RSQX: return "rsqx"; - case OPCODE_SQRT: return "sqrt"; - case OPCODE_RSQ: return "rsq"; - case OPCODE_LEN2: return "len2"; - case OPCODE_LEN3: return "len3"; - case OPCODE_LEN4: return "len4"; - case OPCODE_DIST1: return "dist1"; - case OPCODE_DIST2: return "dist2"; - case OPCODE_DIST3: return "dist3"; - case OPCODE_DIST4: return "dist4"; - case OPCODE_DP3: return "dp3"; - case OPCODE_DP4: return "dp4"; - case OPCODE_DET2: return "det2"; - case OPCODE_DET3: return "det3"; - case OPCODE_DET4: return "det4"; - case OPCODE_MIN: return "min"; - case OPCODE_IMIN: return "imin"; - case OPCODE_UMIN: return "umin"; - case OPCODE_MAX: return "max"; - case OPCODE_IMAX: return "imax"; - case OPCODE_UMAX: return "umax"; - case OPCODE_SLT: return "slt"; - case OPCODE_SGE: return "sge"; - case OPCODE_EXP2X: return "exp2x"; - case OPCODE_LOG2X: return "log2x"; - case OPCODE_LIT: return "lit"; - case OPCODE_ATT: return "att"; - case OPCODE_LRP: return "lrp"; - case OPCODE_STEP: return "step"; - case OPCODE_SMOOTH: return "smooth"; - case OPCODE_FLOATBITSTOINT: return "floatBitsToInt"; + case OPCODE_NULL: return "null"; + case OPCODE_NOP: return "nop"; + case OPCODE_MOV: return "mov"; + case OPCODE_ADD: return "add"; + case OPCODE_IADD: return "iadd"; + case OPCODE_SUB: return "sub"; + case OPCODE_ISUB: return "isub"; + case OPCODE_MAD: return "mad"; + case OPCODE_IMAD: return "imad"; + case OPCODE_MUL: return "mul"; + case OPCODE_IMUL: return "imul"; + case OPCODE_RCPX: return "rcpx"; + case OPCODE_DIV: return "div"; + case OPCODE_IDIV: return "idiv"; + case OPCODE_UDIV: return "udiv"; + case OPCODE_MOD: return "mod"; + case OPCODE_IMOD: return "imod"; + case OPCODE_UMOD: return "umod"; + case OPCODE_SHL: return "shl"; + case OPCODE_ISHR: return "ishr"; + case OPCODE_USHR: return "ushr"; + case OPCODE_RSQX: return "rsqx"; + case OPCODE_SQRT: return "sqrt"; + case OPCODE_RSQ: return "rsq"; + case OPCODE_LEN2: return "len2"; + case OPCODE_LEN3: return "len3"; + case OPCODE_LEN4: return "len4"; + case OPCODE_DIST1: return "dist1"; + case OPCODE_DIST2: return "dist2"; + case OPCODE_DIST3: return "dist3"; + case OPCODE_DIST4: return "dist4"; + case OPCODE_DP3: return "dp3"; + case OPCODE_DP4: return "dp4"; + case OPCODE_DET2: return "det2"; + case OPCODE_DET3: return "det3"; + case OPCODE_DET4: return "det4"; + case OPCODE_MIN: return "min"; + case OPCODE_IMIN: return "imin"; + case OPCODE_UMIN: return "umin"; + case OPCODE_MAX: return "max"; + case OPCODE_IMAX: return "imax"; + case OPCODE_UMAX: return "umax"; + case OPCODE_SLT: return "slt"; + case OPCODE_SGE: return "sge"; + case OPCODE_EXP2X: return "exp2x"; + case OPCODE_LOG2X: return "log2x"; + case OPCODE_LIT: return "lit"; + case OPCODE_ATT: return "att"; + case OPCODE_LRP: return "lrp"; + case OPCODE_STEP: return "step"; + case OPCODE_SMOOTH: return "smooth"; + case OPCODE_FLOATBITSTOINT: return "floatBitsToInt"; case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt"; - case OPCODE_INTBITSTOFLOAT: return "intBitsToFloat"; + case OPCODE_INTBITSTOFLOAT: return "intBitsToFloat"; case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat"; - case OPCODE_PACKSNORM2x16: return "packSnorm2x16"; - case OPCODE_PACKUNORM2x16: return "packUnorm2x16"; - case OPCODE_PACKHALF2x16: return "packHalf2x16"; + case OPCODE_PACKSNORM2x16: return "packSnorm2x16"; + case OPCODE_PACKUNORM2x16: return "packUnorm2x16"; + case OPCODE_PACKHALF2x16: return "packHalf2x16"; case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16"; case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16"; - case OPCODE_UNPACKHALF2x16: return "unpackHalf2x16"; - case OPCODE_FRC: return "frc"; - case OPCODE_M4X4: return "m4x4"; - case OPCODE_M4X3: return "m4x3"; - case OPCODE_M3X4: return "m3x4"; - case OPCODE_M3X3: return "m3x3"; - case OPCODE_M3X2: return "m3x2"; - case OPCODE_CALL: return "call"; - case OPCODE_CALLNZ: return "callnz"; - case OPCODE_LOOP: return "loop"; - case OPCODE_RET: return "ret"; - case OPCODE_ENDLOOP: return "endloop"; - case OPCODE_LABEL: return "label"; - case OPCODE_DCL: return "dcl"; - case OPCODE_POWX: return "powx"; - case OPCODE_CRS: return "crs"; - case OPCODE_SGN: return "sgn"; - case OPCODE_ISGN: return "isgn"; - case OPCODE_ABS: return "abs"; - case OPCODE_IABS: return "iabs"; - case OPCODE_NRM2: return "nrm2"; - case OPCODE_NRM3: return "nrm3"; - case OPCODE_NRM4: return "nrm4"; - case OPCODE_SINCOS: return "sincos"; - case OPCODE_REP: return "rep"; - case OPCODE_ENDREP: return "endrep"; - case OPCODE_IF: return "if"; - case OPCODE_IFC: return "ifc"; - case OPCODE_ELSE: return "else"; - case OPCODE_ENDIF: return "endif"; - case OPCODE_BREAK: return "break"; - case OPCODE_BREAKC: return "breakc"; - case OPCODE_MOVA: return "mova"; - case OPCODE_DEFB: return "defb"; - case OPCODE_DEFI: return "defi"; - case OPCODE_TEXCOORD: return "texcoord"; - case OPCODE_TEXKILL: return "texkill"; - case OPCODE_DISCARD: return "discard"; + case OPCODE_UNPACKHALF2x16: return "unpackHalf2x16"; + case OPCODE_FRC: return "frc"; + case OPCODE_M4X4: return "m4x4"; + case OPCODE_M4X3: return "m4x3"; + case OPCODE_M3X4: return "m3x4"; + case OPCODE_M3X3: return "m3x3"; + case OPCODE_M3X2: return "m3x2"; + case OPCODE_CALL: return "call"; + case OPCODE_CALLNZ: return "callnz"; + case OPCODE_LOOP: return "loop"; + case OPCODE_RET: return "ret"; + case OPCODE_ENDLOOP: return "endloop"; + case OPCODE_LABEL: return "label"; + case OPCODE_DCL: return "dcl"; + case OPCODE_POWX: return "powx"; + case OPCODE_CRS: return "crs"; + case OPCODE_SGN: return "sgn"; + case OPCODE_ISGN: return "isgn"; + case OPCODE_ABS: return "abs"; + case OPCODE_IABS: return "iabs"; + case OPCODE_NRM2: return "nrm2"; + case OPCODE_NRM3: return "nrm3"; + case OPCODE_NRM4: return "nrm4"; + case OPCODE_SINCOS: return "sincos"; + case OPCODE_REP: return "rep"; + case OPCODE_ENDREP: return "endrep"; + case OPCODE_IF: return "if"; + case OPCODE_IFC: return "ifc"; + case OPCODE_ELSE: return "else"; + case OPCODE_ENDIF: return "endif"; + case OPCODE_BREAK: return "break"; + case OPCODE_BREAKC: return "breakc"; + case OPCODE_MOVA: return "mova"; + case OPCODE_DEFB: return "defb"; + case OPCODE_DEFI: return "defi"; + case OPCODE_TEXCOORD: return "texcoord"; + case OPCODE_TEXKILL: return "texkill"; + case OPCODE_DISCARD: return "discard"; case OPCODE_TEX: - if(version < 0x0104) return "tex"; - else return "texld"; - case OPCODE_TEXBEM: return "texbem"; - case OPCODE_TEXBEML: return "texbeml"; - case OPCODE_TEXREG2AR: return "texreg2ar"; - case OPCODE_TEXREG2GB: return "texreg2gb"; - case OPCODE_TEXM3X2PAD: return "texm3x2pad"; - case OPCODE_TEXM3X2TEX: return "texm3x2tex"; - case OPCODE_TEXM3X3PAD: return "texm3x3pad"; - case OPCODE_TEXM3X3TEX: return "texm3x3tex"; - case OPCODE_RESERVED0: return "reserved0"; - case OPCODE_TEXM3X3SPEC: return "texm3x3spec"; - case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec"; - case OPCODE_EXPP: return "expp"; - case OPCODE_LOGP: return "logp"; - case OPCODE_CND: return "cnd"; - case OPCODE_DEF: return "def"; - case OPCODE_TEXREG2RGB: return "texreg2rgb"; - case OPCODE_TEXDP3TEX: return "texdp3tex"; - case OPCODE_TEXM3X2DEPTH: return "texm3x2depth"; - case OPCODE_TEXDP3: return "texdp3"; - case OPCODE_TEXM3X3: return "texm3x3"; - case OPCODE_TEXDEPTH: return "texdepth"; - case OPCODE_CMP0: return "cmp0"; - case OPCODE_ICMP: return "icmp"; - case OPCODE_UCMP: return "ucmp"; - case OPCODE_SELECT: return "select"; - case OPCODE_EXTRACT: return "extract"; - case OPCODE_INSERT: return "insert"; - case OPCODE_BEM: return "bem"; - case OPCODE_DP2ADD: return "dp2add"; - case OPCODE_DFDX: return "dFdx"; - case OPCODE_DFDY: return "dFdy"; - case OPCODE_FWIDTH: return "fwidth"; - case OPCODE_TEXLDD: return "texldd"; - case OPCODE_CMP: return "cmp"; - case OPCODE_TEXLDL: return "texldl"; - case OPCODE_TEXOFFSET: return "texoffset"; - case OPCODE_TEXLDLOFFSET: return "texldloffset"; - case OPCODE_TEXELFETCH: return "texelfetch"; + if(version < 0x0104) return "tex"; + else return "texld"; + case OPCODE_TEXBEM: return "texbem"; + case OPCODE_TEXBEML: return "texbeml"; + case OPCODE_TEXREG2AR: return "texreg2ar"; + case OPCODE_TEXREG2GB: return "texreg2gb"; + case OPCODE_TEXM3X2PAD: return "texm3x2pad"; + case OPCODE_TEXM3X2TEX: return "texm3x2tex"; + case OPCODE_TEXM3X3PAD: return "texm3x3pad"; + case OPCODE_TEXM3X3TEX: return "texm3x3tex"; + case OPCODE_RESERVED0: return "reserved0"; + case OPCODE_TEXM3X3SPEC: return "texm3x3spec"; + case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec"; + case OPCODE_EXPP: return "expp"; + case OPCODE_LOGP: return "logp"; + case OPCODE_CND: return "cnd"; + case OPCODE_DEF: return "def"; + case OPCODE_TEXREG2RGB: return "texreg2rgb"; + case OPCODE_TEXDP3TEX: return "texdp3tex"; + case OPCODE_TEXM3X2DEPTH: return "texm3x2depth"; + case OPCODE_TEXDP3: return "texdp3"; + case OPCODE_TEXM3X3: return "texm3x3"; + case OPCODE_TEXDEPTH: return "texdepth"; + case OPCODE_CMP0: return "cmp0"; + case OPCODE_ICMP: return "icmp"; + case OPCODE_UCMP: return "ucmp"; + case OPCODE_SELECT: return "select"; + case OPCODE_EXTRACT: return "extract"; + case OPCODE_INSERT: return "insert"; + case OPCODE_BEM: return "bem"; + case OPCODE_DP2ADD: return "dp2add"; + case OPCODE_DFDX: return "dFdx"; + case OPCODE_DFDY: return "dFdy"; + case OPCODE_FWIDTH: return "fwidth"; + case OPCODE_TEXLDD: return "texldd"; + case OPCODE_CMP: return "cmp"; + case OPCODE_TEXLDL: return "texldl"; + case OPCODE_TEXBIAS: return "texbias"; + case OPCODE_TEXOFFSET: return "texoffset"; + case OPCODE_TEXOFFSETBIAS: return "texoffsetbias"; + case OPCODE_TEXLODOFFSET: return "texlodoffset"; + case OPCODE_TEXELFETCH: return "texelfetch"; case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset"; - case OPCODE_TEXGRAD: return "texgrad"; - case OPCODE_TEXGRADOFFSET: return "texgradoffset"; - case OPCODE_BREAKP: return "breakp"; - case OPCODE_TEXSIZE: return "texsize"; - case OPCODE_PHASE: return "phase"; - case OPCODE_COMMENT: return "comment"; - case OPCODE_END: return "end"; - case OPCODE_PS_1_0: return "ps_1_0"; - case OPCODE_PS_1_1: return "ps_1_1"; - case OPCODE_PS_1_2: return "ps_1_2"; - case OPCODE_PS_1_3: return "ps_1_3"; - case OPCODE_PS_1_4: return "ps_1_4"; - case OPCODE_PS_2_0: return "ps_2_0"; - case OPCODE_PS_2_x: return "ps_2_x"; - case OPCODE_PS_3_0: return "ps_3_0"; - case OPCODE_VS_1_0: return "vs_1_0"; - case OPCODE_VS_1_1: return "vs_1_1"; - case OPCODE_VS_2_0: return "vs_2_0"; - case OPCODE_VS_2_x: return "vs_2_x"; - case OPCODE_VS_2_sw: return "vs_2_sw"; - case OPCODE_VS_3_0: return "vs_3_0"; - case OPCODE_VS_3_sw: return "vs_3_sw"; - case OPCODE_WHILE: return "while"; - case OPCODE_ENDWHILE: return "endwhile"; - case OPCODE_COS: return "cos"; - case OPCODE_SIN: return "sin"; - case OPCODE_TAN: return "tan"; - case OPCODE_ACOS: return "acos"; - case OPCODE_ASIN: return "asin"; - case OPCODE_ATAN: return "atan"; - case OPCODE_ATAN2: return "atan2"; - case OPCODE_COSH: return "cosh"; - case OPCODE_SINH: return "sinh"; - case OPCODE_TANH: return "tanh"; - case OPCODE_ACOSH: return "acosh"; - case OPCODE_ASINH: return "asinh"; - case OPCODE_ATANH: return "atanh"; - case OPCODE_DP1: return "dp1"; - case OPCODE_DP2: return "dp2"; - case OPCODE_TRUNC: return "trunc"; - case OPCODE_FLOOR: return "floor"; - case OPCODE_ROUND: return "round"; - case OPCODE_ROUNDEVEN: return "roundEven"; - case OPCODE_CEIL: return "ceil"; - case OPCODE_EXP2: return "exp2"; - case OPCODE_LOG2: return "log2"; - case OPCODE_EXP: return "exp"; - case OPCODE_LOG: return "log"; - case OPCODE_POW: return "pow"; - case OPCODE_F2B: return "f2b"; - case OPCODE_B2F: return "b2f"; - case OPCODE_F2I: return "f2i"; - case OPCODE_I2F: return "i2f"; - case OPCODE_F2U: return "f2u"; - case OPCODE_U2F: return "u2f"; - case OPCODE_B2I: return "b2i"; - case OPCODE_I2B: return "i2b"; - case OPCODE_ALL: return "all"; - case OPCODE_ANY: return "any"; - case OPCODE_NEG: return "neg"; - case OPCODE_INEG: return "ineg"; - case OPCODE_ISNAN: return "isnan"; - case OPCODE_ISINF: return "isinf"; - case OPCODE_NOT: return "not"; - case OPCODE_OR: return "or"; - case OPCODE_XOR: return "xor"; - case OPCODE_AND: return "and"; - case OPCODE_EQ: return "eq"; - case OPCODE_NE: return "neq"; - case OPCODE_FORWARD1: return "forward1"; - case OPCODE_FORWARD2: return "forward2"; - case OPCODE_FORWARD3: return "forward3"; - case OPCODE_FORWARD4: return "forward4"; - case OPCODE_REFLECT1: return "reflect1"; - case OPCODE_REFLECT2: return "reflect2"; - case OPCODE_REFLECT3: return "reflect3"; - case OPCODE_REFLECT4: return "reflect4"; - case OPCODE_REFRACT1: return "refract1"; - case OPCODE_REFRACT2: return "refract2"; - case OPCODE_REFRACT3: return "refract3"; - case OPCODE_REFRACT4: return "refract4"; - case OPCODE_LEAVE: return "leave"; - case OPCODE_CONTINUE: return "continue"; - case OPCODE_TEST: return "test"; - case OPCODE_SWITCH: return "switch"; - case OPCODE_ENDSWITCH: return "endswitch"; + case OPCODE_TEXGRAD: return "texgrad"; + case OPCODE_TEXGRADOFFSET: return "texgradoffset"; + case OPCODE_BREAKP: return "breakp"; + case OPCODE_TEXSIZE: return "texsize"; + case OPCODE_PHASE: return "phase"; + case OPCODE_COMMENT: return "comment"; + case OPCODE_END: return "end"; + case OPCODE_PS_1_0: return "ps_1_0"; + case OPCODE_PS_1_1: return "ps_1_1"; + case OPCODE_PS_1_2: return "ps_1_2"; + case OPCODE_PS_1_3: return "ps_1_3"; + case OPCODE_PS_1_4: return "ps_1_4"; + case OPCODE_PS_2_0: return "ps_2_0"; + case OPCODE_PS_2_x: return "ps_2_x"; + case OPCODE_PS_3_0: return "ps_3_0"; + case OPCODE_VS_1_0: return "vs_1_0"; + case OPCODE_VS_1_1: return "vs_1_1"; + case OPCODE_VS_2_0: return "vs_2_0"; + case OPCODE_VS_2_x: return "vs_2_x"; + case OPCODE_VS_2_sw: return "vs_2_sw"; + case OPCODE_VS_3_0: return "vs_3_0"; + case OPCODE_VS_3_sw: return "vs_3_sw"; + case OPCODE_WHILE: return "while"; + case OPCODE_ENDWHILE: return "endwhile"; + case OPCODE_COS: return "cos"; + case OPCODE_SIN: return "sin"; + case OPCODE_TAN: return "tan"; + case OPCODE_ACOS: return "acos"; + case OPCODE_ASIN: return "asin"; + case OPCODE_ATAN: return "atan"; + case OPCODE_ATAN2: return "atan2"; + case OPCODE_COSH: return "cosh"; + case OPCODE_SINH: return "sinh"; + case OPCODE_TANH: return "tanh"; + case OPCODE_ACOSH: return "acosh"; + case OPCODE_ASINH: return "asinh"; + case OPCODE_ATANH: return "atanh"; + case OPCODE_DP1: return "dp1"; + case OPCODE_DP2: return "dp2"; + case OPCODE_TRUNC: return "trunc"; + case OPCODE_FLOOR: return "floor"; + case OPCODE_ROUND: return "round"; + case OPCODE_ROUNDEVEN: return "roundEven"; + case OPCODE_CEIL: return "ceil"; + case OPCODE_EXP2: return "exp2"; + case OPCODE_LOG2: return "log2"; + case OPCODE_EXP: return "exp"; + case OPCODE_LOG: return "log"; + case OPCODE_POW: return "pow"; + case OPCODE_F2B: return "f2b"; + case OPCODE_B2F: return "b2f"; + case OPCODE_F2I: return "f2i"; + case OPCODE_I2F: return "i2f"; + case OPCODE_F2U: return "f2u"; + case OPCODE_U2F: return "u2f"; + case OPCODE_B2I: return "b2i"; + case OPCODE_I2B: return "i2b"; + case OPCODE_ALL: return "all"; + case OPCODE_ANY: return "any"; + case OPCODE_NEG: return "neg"; + case OPCODE_INEG: return "ineg"; + case OPCODE_ISNAN: return "isnan"; + case OPCODE_ISINF: return "isinf"; + case OPCODE_NOT: return "not"; + case OPCODE_OR: return "or"; + case OPCODE_XOR: return "xor"; + case OPCODE_AND: return "and"; + case OPCODE_EQ: return "eq"; + case OPCODE_NE: return "neq"; + case OPCODE_FORWARD1: return "forward1"; + case OPCODE_FORWARD2: return "forward2"; + case OPCODE_FORWARD3: return "forward3"; + case OPCODE_FORWARD4: return "forward4"; + case OPCODE_REFLECT1: return "reflect1"; + case OPCODE_REFLECT2: return "reflect2"; + case OPCODE_REFLECT3: return "reflect3"; + case OPCODE_REFLECT4: return "reflect4"; + case OPCODE_REFRACT1: return "refract1"; + case OPCODE_REFRACT2: return "refract2"; + case OPCODE_REFRACT3: return "refract3"; + case OPCODE_REFRACT4: return "refract4"; + case OPCODE_LEAVE: return "leave"; + case OPCODE_CONTINUE: return "continue"; + case OPCODE_TEST: return "test"; + case OPCODE_SWITCH: return "switch"; + case OPCODE_ENDSWITCH: return "endswitch"; default: ASSERT(false); } @@ -1824,8 +1826,10 @@ case OPCODE_TEXM3X2DEPTH: case OPCODE_TEXLDD: case OPCODE_TEXLDL: + case OPCODE_TEXLOD: case OPCODE_TEXOFFSET: - case OPCODE_TEXLDLOFFSET: + case OPCODE_TEXOFFSETBIAS: + case OPCODE_TEXLODOFFSET: case OPCODE_TEXELFETCH: case OPCODE_TEXELFETCHOFFSET: case OPCODE_TEXGRAD:
diff --git a/src/Shader/Shader.hpp b/src/Shader/Shader.hpp index ee69e8b..e5a74d2 100644 --- a/src/Shader/Shader.hpp +++ b/src/Shader/Shader.hpp
@@ -118,7 +118,6 @@ OPCODE_CMP, // D3DSIO_SETP OPCODE_TEXLDL, OPCODE_BREAKP, - OPCODE_TEXSIZE, OPCODE_PHASE = 0xFFFD, OPCODE_COMMENT = 0xFFFE, @@ -207,11 +206,15 @@ OPCODE_ISNAN, OPCODE_ISINF, OPCODE_TEXOFFSET, - OPCODE_TEXLDLOFFSET, + OPCODE_TEXLODOFFSET, OPCODE_TEXELFETCH, OPCODE_TEXELFETCHOFFSET, OPCODE_TEXGRAD, OPCODE_TEXGRADOFFSET, + OPCODE_TEXBIAS, + OPCODE_TEXLOD, + OPCODE_TEXOFFSETBIAS, + OPCODE_TEXSIZE, OPCODE_FLOATBITSTOINT, OPCODE_FLOATBITSTOUINT, OPCODE_INTBITSTOFLOAT,
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp index ca3149a..0f53005 100644 --- a/src/Shader/VertexProgram.cpp +++ b/src/Shader/VertexProgram.cpp
@@ -326,14 +326,15 @@ case Shader::OPCODE_AND: bitwise_and(d, s0, s1); break; case Shader::OPCODE_EQ: equal(d, s0, s1); break; case Shader::OPCODE_NE: notEqual(d, s0, s1); break; - case Shader::OPCODE_TEXLDL: TEXLDL(d, s0, src1); break; + case Shader::OPCODE_TEXLDL: TEXLOD(d, s0, src1, s0.w); break; + case Shader::OPCODE_TEXLOD: TEXLOD(d, s0, src1, s2.x); break; case Shader::OPCODE_TEX: TEX(d, s0, src1); break; case Shader::OPCODE_TEXOFFSET: TEXOFFSET(d, s0, src1, s2); break; - case Shader::OPCODE_TEXLDLOFFSET: TEXLDL(d, s0, src1, s2); break; - case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1); break; - case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2); break; + case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x); break; + case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x); break; + case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break; case Shader::OPCODE_TEXGRAD: TEXGRAD(d, s0, src1, s2, s3); break; - case Shader::OPCODE_TEXGRADOFFSET: TEXGRAD(d, s0, src1, s2, s3, s4); break; + case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4); break; case Shader::OPCODE_TEXSIZE: TEXSIZE(d, s0.x, src1); break; case Shader::OPCODE_END: break; default: @@ -1556,46 +1557,44 @@ // FIXME: Use enableLeave in other control-flow constructs } - void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1) - { - dst = sampleTexture(src1, src0, a0, a0, src0, Lod); - } - void VertexProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1) { - src0.w = Float(0.0f); - dst = sampleTexture(src1, src0, a0, a0, src0, Lod); + dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), (src0), Base); } - void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2) + void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset) { - src0.w = Float(0.0f); - dst = sampleTexture(src1, src0, a0, a0, src2, {Lod, Offset}); + dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Base, Offset}); } - void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset) + void VertexProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod) { - dst = sampleTexture(src1, src0, a0, a0, offset, {Lod, Offset}); + dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod); } - void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1) + void VertexProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod) { - dst = sampleTexture(src1, src0, src0, src0, src0, Fetch); + dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset}); } - void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset) + void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod) { - dst = sampleTexture(src1, src0, src0, src0, offset, {Fetch, Offset}); + dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch); } - void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3) + void VertexProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod) { - dst = sampleTexture(src1, src0, src2, src3, src0, Grad); + dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset}); } - void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset) + void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy) { - dst = sampleTexture(src1, src0, src2, src3, offset, {Grad, Offset}); + dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, src0, Grad); + } + + void VertexProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset) + { + dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset}); } void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1) @@ -1604,13 +1603,13 @@ dst = SamplerCore::textureSize(texture, lod); } - Vector4f VertexProgram::sampleTexture(const Src &s, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) + Vector4f VertexProgram::sampleTexture(const Src &s, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) { Vector4f tmp; if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID) { - tmp = sampleTexture(s.index, uvwq, dsx, dsy, offset, function); + tmp = sampleTexture(s.index, uvwq, lod, dsx, dsy, offset, function); } else { @@ -1622,7 +1621,7 @@ { If(index == i) { - tmp = sampleTexture(i, uvwq, dsx, dsy, offset, function); + tmp = sampleTexture(i, uvwq, lod, dsx, dsy, offset, function); // FIXME: When the sampler states are the same, we could use one sampler and just index the texture } } @@ -1638,9 +1637,9 @@ return c; } - Vector4f VertexProgram::sampleTexture(int sampler, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) + Vector4f VertexProgram::sampleTexture(int sampler, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) { Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + sampler * sizeof(Texture); - return SamplerCore(constants, state.sampler[sampler]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function); + return SamplerCore(constants, state.sampler[sampler]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, lod, dsx, dsy, offset, function); } }
diff --git a/src/Shader/VertexProgram.hpp b/src/Shader/VertexProgram.hpp index 5e70392..fddea49 100644 --- a/src/Shader/VertexProgram.hpp +++ b/src/Shader/VertexProgram.hpp
@@ -107,18 +107,18 @@ void SWITCH(); void RET(); void LEAVE(); - void TEXLDL(Vector4f &dst, Vector4f &src, const Src&); void TEX(Vector4f &dst, Vector4f &src, const Src&); - void TEXOFFSET(Vector4f &dst, Vector4f &src, const Src&, Vector4f &src2); - void TEXLDL(Vector4f &dst, Vector4f &src, const Src&, Vector4f &src2); - void TEXELFETCH(Vector4f &dst, Vector4f &src, const Src&); - void TEXELFETCH(Vector4f &dst, Vector4f &src, const Src&, Vector4f &src2); - void TEXGRAD(Vector4f &dst, Vector4f &src, const Src&, Vector4f &src2, Vector4f &src3); - void TEXGRAD(Vector4f &dst, Vector4f &src, const Src&, Vector4f &src2, Vector4f &src3, Vector4f &src4); + void TEXOFFSET(Vector4f &dst, Vector4f &src, const Src&, Vector4f &offset); + void TEXLOD(Vector4f &dst, Vector4f &src, const Src&, Float4 &lod); + void TEXLODOFFSET(Vector4f &dst, Vector4f &src, const Src&, Vector4f &offset, Float4 &lod); + void TEXELFETCH(Vector4f &dst, Vector4f &src, const Src&, Float4 &lod); + void TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src, const Src&, Vector4f &offset, Float4 &lod); + void TEXGRAD(Vector4f &dst, Vector4f &src, const Src&, Vector4f &dsx, Vector4f &dsy); + void TEXGRADOFFSET(Vector4f &dst, Vector4f &src, const Src&, Vector4f &dsx, Vector4f &dsy, Vector4f &offset); void TEXSIZE(Vector4f &dst, Float4 &lod, const Src&); - Vector4f sampleTexture(const Src &s, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function); - Vector4f sampleTexture(int sampler, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function); + Vector4f sampleTexture(const Src &s, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function); + Vector4f sampleTexture(int sampler, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function); int ifDepth; int loopRepDepth;