blob: 561ccbd21b2df259cd80d6e9aeb9399863d6ad3b [file] [log] [blame]
// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "Shader.hpp"
#include "VertexShader.hpp"
#include "PixelShader.hpp"
#include "Common/Math.hpp"
#include "Common/Debug.hpp"
#include <algorithm>
#include <set>
#include <fstream>
#include <functional>
#include <sstream>
#include <stdarg.h>
#include <unordered_map>
#include <unordered_set>
namespace sw
{
volatile int Shader::serialCounter = 1;
Shader::Opcode Shader::OPCODE_DP(int i)
{
switch(i)
{
default: ASSERT(false);
case 1: return OPCODE_DP1;
case 2: return OPCODE_DP2;
case 3: return OPCODE_DP3;
case 4: return OPCODE_DP4;
}
}
Shader::Opcode Shader::OPCODE_LEN(int i)
{
switch(i)
{
default: ASSERT(false);
case 1: return OPCODE_ABS;
case 2: return OPCODE_LEN2;
case 3: return OPCODE_LEN3;
case 4: return OPCODE_LEN4;
}
}
Shader::Opcode Shader::OPCODE_DIST(int i)
{
switch(i)
{
default: ASSERT(false);
case 1: return OPCODE_DIST1;
case 2: return OPCODE_DIST2;
case 3: return OPCODE_DIST3;
case 4: return OPCODE_DIST4;
}
}
Shader::Opcode Shader::OPCODE_NRM(int i)
{
switch(i)
{
default: ASSERT(false);
case 1: return OPCODE_SGN;
case 2: return OPCODE_NRM2;
case 3: return OPCODE_NRM3;
case 4: return OPCODE_NRM4;
}
}
Shader::Opcode Shader::OPCODE_FORWARD(int i)
{
switch(i)
{
default: ASSERT(false);
case 1: return OPCODE_FORWARD1;
case 2: return OPCODE_FORWARD2;
case 3: return OPCODE_FORWARD3;
case 4: return OPCODE_FORWARD4;
}
}
Shader::Opcode Shader::OPCODE_REFLECT(int i)
{
switch(i)
{
default: ASSERT(false);
case 1: return OPCODE_REFLECT1;
case 2: return OPCODE_REFLECT2;
case 3: return OPCODE_REFLECT3;
case 4: return OPCODE_REFLECT4;
}
}
Shader::Opcode Shader::OPCODE_REFRACT(int i)
{
switch(i)
{
default: ASSERT(false);
case 1: return OPCODE_REFRACT1;
case 2: return OPCODE_REFRACT2;
case 3: return OPCODE_REFRACT3;
case 4: return OPCODE_REFRACT4;
}
}
Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
{
control = CONTROL_RESERVED0;
predicate = false;
predicateNot = false;
predicateSwizzle = 0xE4;
coissue = false;
samplerType = SAMPLER_UNKNOWN;
usage = USAGE_POSITION;
usageIndex = 0;
}
Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
{
parseOperationToken(*token++, majorVersion);
samplerType = SAMPLER_UNKNOWN;
usage = USAGE_POSITION;
usageIndex = 0;
if(opcode == OPCODE_IF ||
opcode == OPCODE_IFC ||
opcode == OPCODE_LOOP ||
opcode == OPCODE_REP ||
opcode == OPCODE_BREAKC ||
opcode == OPCODE_BREAKP) // No destination operand
{
if(size > 0) parseSourceToken(0, token++, majorVersion);
if(size > 1) parseSourceToken(1, token++, majorVersion);
if(size > 2) parseSourceToken(2, token++, majorVersion);
if(size > 3) ASSERT(false);
}
else if(opcode == OPCODE_DCL)
{
parseDeclarationToken(*token++);
parseDestinationToken(token++, majorVersion);
}
else
{
if(size > 0)
{
parseDestinationToken(token, majorVersion);
if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
{
token++;
size--;
}
token++;
size--;
}
if(predicate)
{
ASSERT(size != 0);
predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
token++;
size--;
}
for(int i = 0; size > 0; i++)
{
parseSourceToken(i, token, majorVersion);
token++;
size--;
if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
{
token++;
size--;
}
}
}
}
Shader::Instruction::~Instruction()
{
}
std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
{
std::string instructionString;
if(opcode != OPCODE_DCL)
{
instructionString += coissue ? "+ " : "";
if(predicate)
{
instructionString += predicateNot ? "(!p0" : "(p0";
instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
instructionString += ") ";
}
instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
if(dst.type != PARAMETER_VOID)
{
instructionString += " " + dst.string(shaderType, version) +
dst.relativeString() +
dst.maskString();
}
for(int i = 0; i < 4; i++)
{
if(src[i].type != PARAMETER_VOID)
{
instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
instructionString += src[i].preModifierString() +
src[i].string(shaderType, version) +
src[i].relativeString() +
src[i].postModifierString() +
src[i].swizzleString();
}
}
}
else // DCL
{
instructionString += "dcl";
if(dst.type == PARAMETER_SAMPLER)
{
switch(samplerType)
{
case SAMPLER_UNKNOWN: instructionString += " "; break;
case SAMPLER_1D: instructionString += "_1d "; break;
case SAMPLER_2D: instructionString += "_2d "; break;
case SAMPLER_CUBE: instructionString += "_cube "; break;
case SAMPLER_VOLUME: instructionString += "_volume "; break;
default:
ASSERT(false);
}
instructionString += dst.string(shaderType, version);
}
else if(dst.type == PARAMETER_INPUT ||
dst.type == PARAMETER_OUTPUT ||
dst.type == PARAMETER_TEXTURE)
{
if(version >= 0x0300)
{
switch(usage)
{
case USAGE_POSITION: instructionString += "_position"; break;
case USAGE_BLENDWEIGHT: instructionString += "_blendweight"; break;
case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
case USAGE_NORMAL: instructionString += "_normal"; break;
case USAGE_PSIZE: instructionString += "_psize"; break;
case USAGE_TEXCOORD: instructionString += "_texcoord"; break;
case USAGE_TANGENT: instructionString += "_tangent"; break;
case USAGE_BINORMAL: instructionString += "_binormal"; break;
case USAGE_TESSFACTOR: instructionString += "_tessfactor"; break;
case USAGE_POSITIONT: instructionString += "_positiont"; break;
case USAGE_COLOR: instructionString += "_color"; break;
case USAGE_FOG: instructionString += "_fog"; break;
case USAGE_DEPTH: instructionString += "_depth"; break;
case USAGE_SAMPLE: instructionString += "_sample"; break;
default:
ASSERT(false);
}
if(usageIndex > 0)
{
std::ostringstream buffer;
buffer << (int)usageIndex;
instructionString += buffer.str();
}
}
else ASSERT(dst.type != PARAMETER_OUTPUT);
instructionString += " ";
instructionString += dst.string(shaderType, version);
instructionString += dst.maskString();
}
else if(dst.type == PARAMETER_MISCTYPE) // vPos and vFace
{
instructionString += " ";
instructionString += dst.string(shaderType, version);
}
else ASSERT(false);
}
return instructionString;
}
std::string Shader::DestinationParameter::modifierString() const
{
if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
{
return "";
}
std::string modifierString;
if(saturate)
{
modifierString += "_sat";
}
if(partialPrecision)
{
modifierString += "_pp";
}
if(centroid)
{
modifierString += "_centroid";
}
return modifierString;
}
std::string Shader::DestinationParameter::shiftString() const
{
if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
{
return "";
}
switch(shift)
{
case 0: return "";
case 1: return "_x2";
case 2: return "_x4";
case 3: return "_x8";
case -1: return "_d2";
case -2: return "_d4";
case -3: return "_d8";
default:
return "";
// ASSERT(false); // FIXME
}
}
std::string Shader::DestinationParameter::maskString() const
{
if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
{
return "";
}
switch(mask)
{
case 0x0: return "";
case 0x1: return ".x";
case 0x2: return ".y";
case 0x3: return ".xy";
case 0x4: return ".z";
case 0x5: return ".xz";
case 0x6: return ".yz";
case 0x7: return ".xyz";
case 0x8: return ".w";
case 0x9: return ".xw";
case 0xA: return ".yw";
case 0xB: return ".xyw";
case 0xC: return ".zw";
case 0xD: return ".xzw";
case 0xE: return ".yzw";
case 0xF: return "";
default:
ASSERT(false);
}
return "";
}
std::string Shader::SourceParameter::preModifierString() const
{
if(type == PARAMETER_VOID)
{
return "";
}
switch(modifier)
{
case MODIFIER_NONE: return "";
case MODIFIER_NEGATE: return "-";
case MODIFIER_BIAS: return "";
case MODIFIER_BIAS_NEGATE: return "-";
case MODIFIER_SIGN: return "";
case MODIFIER_SIGN_NEGATE: return "-";
case MODIFIER_COMPLEMENT: return "1-";
case MODIFIER_X2: return "";
case MODIFIER_X2_NEGATE: return "-";
case MODIFIER_DZ: return "";
case MODIFIER_DW: return "";
case MODIFIER_ABS: return "";
case MODIFIER_ABS_NEGATE: return "-";
case MODIFIER_NOT: return "!";
default:
ASSERT(false);
}
return "";
}
std::string Shader::Parameter::relativeString() const
{
if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
{
if(rel.type == PARAMETER_VOID)
{
return "";
}
else if(rel.type == PARAMETER_ADDR)
{
switch(rel.swizzle & 0x03)
{
case 0: return "[a0.x]";
case 1: return "[a0.y]";
case 2: return "[a0.z]";
case 3: return "[a0.w]";
}
}
else if(rel.type == PARAMETER_TEMP)
{
std::ostringstream buffer;
buffer << rel.index;
switch(rel.swizzle & 0x03)
{
case 0: return "[r" + buffer.str() + ".x]";
case 1: return "[r" + buffer.str() + ".y]";
case 2: return "[r" + buffer.str() + ".z]";
case 3: return "[r" + buffer.str() + ".w]";
}
}
else if(rel.type == PARAMETER_LOOP)
{
return "[aL]";
}
else if(rel.type == PARAMETER_CONST)
{
std::ostringstream buffer;
buffer << rel.index;
switch(rel.swizzle & 0x03)
{
case 0: return "[c" + buffer.str() + ".x]";
case 1: return "[c" + buffer.str() + ".y]";
case 2: return "[c" + buffer.str() + ".z]";
case 3: return "[c" + buffer.str() + ".w]";
}
}
else ASSERT(false);
}
return "";
}
std::string Shader::SourceParameter::postModifierString() const
{
if(type == PARAMETER_VOID)
{
return "";
}
switch(modifier)
{
case MODIFIER_NONE: return "";
case MODIFIER_NEGATE: return "";
case MODIFIER_BIAS: return "_bias";
case MODIFIER_BIAS_NEGATE: return "_bias";
case MODIFIER_SIGN: return "_bx2";
case MODIFIER_SIGN_NEGATE: return "_bx2";
case MODIFIER_COMPLEMENT: return "";
case MODIFIER_X2: return "_x2";
case MODIFIER_X2_NEGATE: return "_x2";
case MODIFIER_DZ: return "_dz";
case MODIFIER_DW: return "_dw";
case MODIFIER_ABS: return "_abs";
case MODIFIER_ABS_NEGATE: return "_abs";
case MODIFIER_NOT: return "";
default:
ASSERT(false);
}
return "";
}
std::string Shader::SourceParameter::string(ShaderType shaderType, unsigned short version) const
{
if(type == PARAMETER_CONST && bufferIndex >= 0)
{
std::ostringstream buffer;
buffer << bufferIndex;
std::ostringstream offset;
offset << index;
return "cb" + buffer.str() + "[" + offset.str() + "]";
}
else
{
return Parameter::string(shaderType, version);
}
}
std::string Shader::SourceParameter::swizzleString() const
{
return Instruction::swizzleString(type, swizzle);
}
void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
{
if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000) // Version token
{
opcode = (Opcode)token;
control = CONTROL_RESERVED0;
predicate = false;
coissue = false;
}
else
{
opcode = (Opcode)(token & 0x0000FFFF);
control = (Control)((token & 0x00FF0000) >> 16);
int size = (token & 0x0F000000) >> 24;
predicate = (token & 0x10000000) != 0x00000000;
coissue = (token & 0x40000000) != 0x00000000;
if(majorVersion < 2)
{
if(size != 0)
{
ASSERT(false); // Reserved
}
}
if(majorVersion < 2)
{
if(predicate)
{
ASSERT(false);
}
}
if((token & 0x20000000) != 0x00000000)
{
ASSERT(false); // Reserved
}
if(majorVersion >= 2)
{
if(coissue)
{
ASSERT(false); // Reserved
}
}
if((token & 0x80000000) != 0x00000000)
{
ASSERT(false);
}
}
}
void Shader::Instruction::parseDeclarationToken(unsigned long token)
{
samplerType = (SamplerType)((token & 0x78000000) >> 27);
usage = (Usage)(token & 0x0000001F);
usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
}
void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
{
dst.index = (unsigned short)(token[0] & 0x000007FF);
dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
// TODO: Check type and index range
bool relative = (token[0] & 0x00002000) != 0x00000000;
dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
dst.rel.swizzle = 0x00;
dst.rel.scale = 1;
if(relative && majorVersion >= 3)
{
dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
}
else if(relative) ASSERT(false); // Reserved
if((token[0] & 0x0000C000) != 0x00000000)
{
ASSERT(false); // Reserved
}
dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
dst.saturate = (token[0] & 0x00100000) != 0;
dst.partialPrecision = (token[0] & 0x00200000) != 0;
dst.centroid = (token[0] & 0x00400000) != 0;
dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
if(majorVersion >= 2)
{
if(dst.shift)
{
ASSERT(false); // Reserved
}
}
if((token[0] & 0x80000000) != 0x80000000)
{
ASSERT(false);
}
}
void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
{
// Defaults
src[i].index = 0;
src[i].type = PARAMETER_VOID;
src[i].modifier = MODIFIER_NONE;
src[i].swizzle = 0xE4;
src[i].rel.type = PARAMETER_VOID;
src[i].rel.swizzle = 0x00;
src[i].rel.scale = 1;
switch(opcode)
{
case OPCODE_DEF:
src[0].type = PARAMETER_FLOAT4LITERAL;
src[0].value[i] = *(float*)token;
break;
case OPCODE_DEFB:
src[0].type = PARAMETER_BOOL1LITERAL;
src[0].boolean[0] = *(int*)token;
break;
case OPCODE_DEFI:
src[0].type = PARAMETER_INT4LITERAL;
src[0].integer[i] = *(int*)token;
break;
default:
src[i].index = (unsigned short)(token[0] & 0x000007FF);
src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
// FIXME: Check type and index range
bool relative = (token[0] & 0x00002000) != 0x00000000;
src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
if((token[0] & 0x0000C000) != 0x00000000)
{
if(opcode != OPCODE_DEF &&
opcode != OPCODE_DEFI &&
opcode != OPCODE_DEFB)
{
ASSERT(false);
}
}
src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
if((token[0] & 0x80000000) != 0x80000000)
{
if(opcode != OPCODE_DEF &&
opcode != OPCODE_DEFI &&
opcode != OPCODE_DEFB)
{
ASSERT(false);
}
}
if(relative && majorVersion >= 2)
{
src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
}
}
}
std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
{
if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
{
return "";
}
int x = (swizzle & 0x03) >> 0;
int y = (swizzle & 0x0C) >> 2;
int z = (swizzle & 0x30) >> 4;
int w = (swizzle & 0xC0) >> 6;
std::string swizzleString = ".";
switch(x)
{
case 0: swizzleString += "x"; break;
case 1: swizzleString += "y"; break;
case 2: swizzleString += "z"; break;
case 3: swizzleString += "w"; break;
}
if(!(x == y && y == z && z == w))
{
switch(y)
{
case 0: swizzleString += "x"; break;
case 1: swizzleString += "y"; break;
case 2: swizzleString += "z"; break;
case 3: swizzleString += "w"; break;
}
if(!(y == z && z == w))
{
switch(z)
{
case 0: swizzleString += "x"; break;
case 1: swizzleString += "y"; break;
case 2: swizzleString += "z"; break;
case 3: swizzleString += "w"; break;
}
if(!(z == w))
{
switch(w)
{
case 0: swizzleString += "x"; break;
case 1: swizzleString += "y"; break;
case 2: swizzleString += "z"; break;
case 3: swizzleString += "w"; break;
}
}
}
}
return swizzleString;
}
std::string Shader::Instruction::operationString(unsigned short version) const
{
switch(opcode)
{
case OPCODE_NULL: return "null";
case OPCODE_NOP: return "nop";
case OPCODE_MOV: return "mov";
case OPCODE_ADD: return "add";
case OPCODE_IADD: return "iadd";
case OPCODE_SUB: return "sub";
case OPCODE_ISUB: return "isub";
case OPCODE_MAD: return "mad";
case OPCODE_IMAD: return "imad";
case OPCODE_MUL: return "mul";
case OPCODE_IMUL: return "imul";
case OPCODE_RCPX: return "rcpx";
case OPCODE_DIV: return "div";
case OPCODE_IDIV: return "idiv";
case OPCODE_UDIV: return "udiv";
case OPCODE_MOD: return "mod";
case OPCODE_IMOD: return "imod";
case OPCODE_UMOD: return "umod";
case OPCODE_SHL: return "shl";
case OPCODE_ISHR: return "ishr";
case OPCODE_USHR: return "ushr";
case OPCODE_RSQX: return "rsqx";
case OPCODE_SQRT: return "sqrt";
case OPCODE_RSQ: return "rsq";
case OPCODE_LEN2: return "len2";
case OPCODE_LEN3: return "len3";
case OPCODE_LEN4: return "len4";
case OPCODE_DIST1: return "dist1";
case OPCODE_DIST2: return "dist2";
case OPCODE_DIST3: return "dist3";
case OPCODE_DIST4: return "dist4";
case OPCODE_DP3: return "dp3";
case OPCODE_DP4: return "dp4";
case OPCODE_DET2: return "det2";
case OPCODE_DET3: return "det3";
case OPCODE_DET4: return "det4";
case OPCODE_MIN: return "min";
case OPCODE_IMIN: return "imin";
case OPCODE_UMIN: return "umin";
case OPCODE_MAX: return "max";
case OPCODE_IMAX: return "imax";
case OPCODE_UMAX: return "umax";
case OPCODE_SLT: return "slt";
case OPCODE_SGE: return "sge";
case OPCODE_EXP2X: return "exp2x";
case OPCODE_LOG2X: return "log2x";
case OPCODE_LIT: return "lit";
case OPCODE_ATT: return "att";
case OPCODE_LRP: return "lrp";
case OPCODE_STEP: return "step";
case OPCODE_SMOOTH: return "smooth";
case OPCODE_FLOATBITSTOINT: return "floatBitsToInt";
case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
case OPCODE_INTBITSTOFLOAT: return "intBitsToFloat";
case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
case OPCODE_PACKSNORM2x16: return "packSnorm2x16";
case OPCODE_PACKUNORM2x16: return "packUnorm2x16";
case OPCODE_PACKHALF2x16: return "packHalf2x16";
case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
case OPCODE_UNPACKHALF2x16: return "unpackHalf2x16";
case OPCODE_FRC: return "frc";
case OPCODE_M4X4: return "m4x4";
case OPCODE_M4X3: return "m4x3";
case OPCODE_M3X4: return "m3x4";
case OPCODE_M3X3: return "m3x3";
case OPCODE_M3X2: return "m3x2";
case OPCODE_CALL: return "call";
case OPCODE_CALLNZ: return "callnz";
case OPCODE_LOOP: return "loop";
case OPCODE_RET: return "ret";
case OPCODE_ENDLOOP: return "endloop";
case OPCODE_LABEL: return "label";
case OPCODE_DCL: return "dcl";
case OPCODE_POWX: return "powx";
case OPCODE_CRS: return "crs";
case OPCODE_SGN: return "sgn";
case OPCODE_ISGN: return "isgn";
case OPCODE_ABS: return "abs";
case OPCODE_IABS: return "iabs";
case OPCODE_NRM2: return "nrm2";
case OPCODE_NRM3: return "nrm3";
case OPCODE_NRM4: return "nrm4";
case OPCODE_SINCOS: return "sincos";
case OPCODE_REP: return "rep";
case OPCODE_ENDREP: return "endrep";
case OPCODE_IF: return "if";
case OPCODE_IFC: return "ifc";
case OPCODE_ELSE: return "else";
case OPCODE_ENDIF: return "endif";
case OPCODE_BREAK: return "break";
case OPCODE_BREAKC: return "breakc";
case OPCODE_MOVA: return "mova";
case OPCODE_DEFB: return "defb";
case OPCODE_DEFI: return "defi";
case OPCODE_TEXCOORD: return "texcoord";
case OPCODE_TEXKILL: return "texkill";
case OPCODE_DISCARD: return "discard";
case OPCODE_TEX:
if(version < 0x0104) return "tex";
else return "texld";
case OPCODE_TEXBEM: return "texbem";
case OPCODE_TEXBEML: return "texbeml";
case OPCODE_TEXREG2AR: return "texreg2ar";
case OPCODE_TEXREG2GB: return "texreg2gb";
case OPCODE_TEXM3X2PAD: return "texm3x2pad";
case OPCODE_TEXM3X2TEX: return "texm3x2tex";
case OPCODE_TEXM3X3PAD: return "texm3x3pad";
case OPCODE_TEXM3X3TEX: return "texm3x3tex";
case OPCODE_RESERVED0: return "reserved0";
case OPCODE_TEXM3X3SPEC: return "texm3x3spec";
case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec";
case OPCODE_EXPP: return "expp";
case OPCODE_LOGP: return "logp";
case OPCODE_CND: return "cnd";
case OPCODE_DEF: return "def";
case OPCODE_TEXREG2RGB: return "texreg2rgb";
case OPCODE_TEXDP3TEX: return "texdp3tex";
case OPCODE_TEXM3X2DEPTH: return "texm3x2depth";
case OPCODE_TEXDP3: return "texdp3";
case OPCODE_TEXM3X3: return "texm3x3";
case OPCODE_TEXDEPTH: return "texdepth";
case OPCODE_CMP0: return "cmp0";
case OPCODE_ICMP: return "icmp";
case OPCODE_UCMP: return "ucmp";
case OPCODE_SELECT: return "select";
case OPCODE_EXTRACT: return "extract";
case OPCODE_INSERT: return "insert";
case OPCODE_BEM: return "bem";
case OPCODE_DP2ADD: return "dp2add";
case OPCODE_DFDX: return "dFdx";
case OPCODE_DFDY: return "dFdy";
case OPCODE_FWIDTH: return "fwidth";
case OPCODE_TEXLDD: return "texldd";
case OPCODE_CMP: return "cmp";
case OPCODE_TEXLDL: return "texldl";
case OPCODE_TEXBIAS: return "texbias";
case OPCODE_TEXOFFSET: return "texoffset";
case OPCODE_TEXOFFSETBIAS: return "texoffsetbias";
case OPCODE_TEXLOD: return "texlod";
case OPCODE_TEXLODOFFSET: return "texlodoffset";
case OPCODE_TEXELFETCH: return "texelfetch";
case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset";
case OPCODE_TEXGRAD: return "texgrad";
case OPCODE_TEXGRADOFFSET: return "texgradoffset";
case OPCODE_BREAKP: return "breakp";
case OPCODE_TEXSIZE: return "texsize";
case OPCODE_PHASE: return "phase";
case OPCODE_COMMENT: return "comment";
case OPCODE_END: return "end";
case OPCODE_PS_1_0: return "ps_1_0";
case OPCODE_PS_1_1: return "ps_1_1";
case OPCODE_PS_1_2: return "ps_1_2";
case OPCODE_PS_1_3: return "ps_1_3";
case OPCODE_PS_1_4: return "ps_1_4";
case OPCODE_PS_2_0: return "ps_2_0";
case OPCODE_PS_2_x: return "ps_2_x";
case OPCODE_PS_3_0: return "ps_3_0";
case OPCODE_VS_1_0: return "vs_1_0";
case OPCODE_VS_1_1: return "vs_1_1";
case OPCODE_VS_2_0: return "vs_2_0";
case OPCODE_VS_2_x: return "vs_2_x";
case OPCODE_VS_2_sw: return "vs_2_sw";
case OPCODE_VS_3_0: return "vs_3_0";
case OPCODE_VS_3_sw: return "vs_3_sw";
case OPCODE_WHILE: return "while";
case OPCODE_ENDWHILE: return "endwhile";
case OPCODE_COS: return "cos";
case OPCODE_SIN: return "sin";
case OPCODE_TAN: return "tan";
case OPCODE_ACOS: return "acos";
case OPCODE_ASIN: return "asin";
case OPCODE_ATAN: return "atan";
case OPCODE_ATAN2: return "atan2";
case OPCODE_COSH: return "cosh";
case OPCODE_SINH: return "sinh";
case OPCODE_TANH: return "tanh";
case OPCODE_ACOSH: return "acosh";
case OPCODE_ASINH: return "asinh";
case OPCODE_ATANH: return "atanh";
case OPCODE_DP1: return "dp1";
case OPCODE_DP2: return "dp2";
case OPCODE_TRUNC: return "trunc";
case OPCODE_FLOOR: return "floor";
case OPCODE_ROUND: return "round";
case OPCODE_ROUNDEVEN: return "roundEven";
case OPCODE_CEIL: return "ceil";
case OPCODE_EXP2: return "exp2";
case OPCODE_LOG2: return "log2";
case OPCODE_EXP: return "exp";
case OPCODE_LOG: return "log";
case OPCODE_POW: return "pow";
case OPCODE_F2B: return "f2b";
case OPCODE_B2F: return "b2f";
case OPCODE_F2I: return "f2i";
case OPCODE_I2F: return "i2f";
case OPCODE_F2U: return "f2u";
case OPCODE_U2F: return "u2f";
case OPCODE_B2I: return "b2i";
case OPCODE_I2B: return "i2b";
case OPCODE_ALL: return "all";
case OPCODE_ANY: return "any";
case OPCODE_NEG: return "neg";
case OPCODE_INEG: return "ineg";
case OPCODE_ISNAN: return "isnan";
case OPCODE_ISINF: return "isinf";
case OPCODE_NOT: return "not";
case OPCODE_OR: return "or";
case OPCODE_XOR: return "xor";
case OPCODE_AND: return "and";
case OPCODE_EQ: return "eq";
case OPCODE_NE: return "neq";
case OPCODE_FORWARD1: return "forward1";
case OPCODE_FORWARD2: return "forward2";
case OPCODE_FORWARD3: return "forward3";
case OPCODE_FORWARD4: return "forward4";
case OPCODE_REFLECT1: return "reflect1";
case OPCODE_REFLECT2: return "reflect2";
case OPCODE_REFLECT3: return "reflect3";
case OPCODE_REFLECT4: return "reflect4";
case OPCODE_REFRACT1: return "refract1";
case OPCODE_REFRACT2: return "refract2";
case OPCODE_REFRACT3: return "refract3";
case OPCODE_REFRACT4: return "refract4";
case OPCODE_LEAVE: return "leave";
case OPCODE_CONTINUE: return "continue";
case OPCODE_TEST: return "test";
case OPCODE_SWITCH: return "switch";
case OPCODE_ENDSWITCH: return "endswitch";
case OPCODE_SCALAR: return "scalar";
}
return "<unknown>";
}
std::string Shader::Instruction::controlString() const
{
if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
{
if(project) return "p";
if(bias) return "b";
// FIXME: LOD
}
switch(control)
{
case 1: return "_gt";
case 2: return "_eq";
case 3: return "_ge";
case 4: return "_lt";
case 5: return "_ne";
case 6: return "_le";
default:
return "";
// ASSERT(false); // FIXME
}
}
std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
{
std::ostringstream buffer;
if(type == PARAMETER_FLOAT4LITERAL)
{
buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
return buffer.str();
}
else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
{
buffer << index;
return typeString(shaderType, version) + buffer.str();
}
else
{
return typeString(shaderType, version);
}
}
std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
{
switch(type)
{
case PARAMETER_TEMP: return "r";
case PARAMETER_INPUT: return "v";
case PARAMETER_CONST: return "c";
case PARAMETER_TEXTURE:
// case PARAMETER_ADDR:
if(shaderType == SHADER_PIXEL) return "t";
else return "a0";
case PARAMETER_RASTOUT:
if(index == 0) return "oPos";
else if(index == 1) return "oFog";
else if(index == 2) return "oPts";
else ASSERT(false);
case PARAMETER_ATTROUT: return "oD";
case PARAMETER_TEXCRDOUT:
// case PARAMETER_OUTPUT: return "";
if(version < 0x0300) return "oT";
else return "o";
case PARAMETER_CONSTINT: return "i";
case PARAMETER_COLOROUT: return "oC";
case PARAMETER_DEPTHOUT: return "oDepth";
case PARAMETER_SAMPLER: return "s";
// case PARAMETER_CONST2: return "";
// case PARAMETER_CONST3: return "";
// case PARAMETER_CONST4: return "";
case PARAMETER_CONSTBOOL: return "b";
case PARAMETER_LOOP: return "aL";
// case PARAMETER_TEMPFLOAT16: return "";
case PARAMETER_MISCTYPE:
switch(index)
{
case VPosIndex: return "vPos";
case VFaceIndex: return "vFace";
case InstanceIDIndex: return "iID";
case VertexIDIndex: return "vID";
default: ASSERT(false);
}
case PARAMETER_LABEL: return "l";
case PARAMETER_PREDICATE: return "p0";
case PARAMETER_FLOAT4LITERAL: return "";
case PARAMETER_BOOL1LITERAL: return "";
case PARAMETER_INT4LITERAL: return "";
// case PARAMETER_VOID: return "";
default:
ASSERT(false);
}
return "";
}
bool Shader::Instruction::isBranch() const
{
return opcode == OPCODE_IF || opcode == OPCODE_IFC;
}
bool Shader::Instruction::isCall() const
{
return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
}
bool Shader::Instruction::isBreak() const
{
return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
}
bool Shader::Instruction::isLoop() const
{
return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE;
}
bool Shader::Instruction::isEndLoop() const
{
return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE;
}
bool Shader::Instruction::isPredicated() const
{
return predicate ||
analysisBranch ||
analysisBreak ||
analysisContinue ||
analysisLeave;
}
Shader::Shader() : serialID(serialCounter++)
{
usedSamplers = 0;
}
Shader::~Shader()
{
for(auto &inst : instruction)
{
delete inst;
inst = 0;
}
}
void Shader::parse(const unsigned long *token)
{
minorVersion = (unsigned char)(token[0] & 0x000000FF);
majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
int length = 0;
if(shaderType == SHADER_VERTEX)
{
length = VertexShader::validate(token);
}
else if(shaderType == SHADER_PIXEL)
{
length = PixelShader::validate(token);
}
else ASSERT(false);
ASSERT(length != 0);
instruction.resize(length);
for(int i = 0; i < length; i++)
{
while((*token & 0x0000FFFF) == 0x0000FFFE) // Comment token
{
int length = (*token & 0x7FFF0000) >> 16;
token += length + 1;
}
int tokenCount = size(*token);
instruction[i] = new Instruction(token, tokenCount, majorVersion);
token += 1 + tokenCount;
}
}
int Shader::size(unsigned long opcode) const
{
return size(opcode, shaderModel);
}
int Shader::size(unsigned long opcode, unsigned short shaderModel)
{
if(shaderModel > 0x0300)
{
ASSERT(false);
}
static const signed char size[] =
{
0, // NOP = 0
2, // MOV
3, // ADD
3, // SUB
4, // MAD
3, // MUL
2, // RCP
2, // RSQ
3, // DP3
3, // DP4
3, // MIN
3, // MAX
3, // SLT
3, // SGE
2, // EXP
2, // LOG
2, // LIT
3, // DST
4, // LRP
2, // FRC
3, // M4x4
3, // M4x3
3, // M3x4
3, // M3x3
3, // M3x2
1, // CALL
2, // CALLNZ
2, // LOOP
0, // RET
0, // ENDLOOP
1, // LABEL
2, // DCL
3, // POW
3, // CRS
4, // SGN
2, // ABS
2, // NRM
4, // SINCOS
1, // REP
0, // ENDREP
1, // IF
2, // IFC
0, // ELSE
0, // ENDIF
0, // BREAK
2, // BREAKC
2, // MOVA
2, // DEFB
5, // DEFI
-1, // 49
-1, // 50
-1, // 51
-1, // 52
-1, // 53
-1, // 54
-1, // 55
-1, // 56
-1, // 57
-1, // 58
-1, // 59
-1, // 60
-1, // 61
-1, // 62
-1, // 63
1, // TEXCOORD = 64
1, // TEXKILL
1, // TEX
2, // TEXBEM
2, // TEXBEML
2, // TEXREG2AR
2, // TEXREG2GB
2, // TEXM3x2PAD
2, // TEXM3x2TEX
2, // TEXM3x3PAD
2, // TEXM3x3TEX
-1, // RESERVED0
3, // TEXM3x3SPEC
2, // TEXM3x3VSPEC
2, // EXPP
2, // LOGP
4, // CND
5, // DEF
2, // TEXREG2RGB
2, // TEXDP3TEX
2, // TEXM3x2DEPTH
2, // TEXDP3
2, // TEXM3x3
1, // TEXDEPTH
4, // CMP
3, // BEM
4, // DP2ADD
2, // DSX
2, // DSY
5, // TEXLDD
3, // SETP
3, // TEXLDL
2, // BREAKP
-1, // 97
-1, // 98
-1, // 99
-1, // 100
-1, // 101
-1, // 102
-1, // 103
-1, // 104
-1, // 105
-1, // 106
-1, // 107
-1, // 108
-1, // 109
-1, // 110
-1, // 111
-1, // 112
};
int length = 0;
if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
{
return (opcode & 0x7FFF0000) >> 16;
}
if(opcode != OPCODE_PS_1_0 &&
opcode != OPCODE_PS_1_1 &&
opcode != OPCODE_PS_1_2 &&
opcode != OPCODE_PS_1_3 &&
opcode != OPCODE_PS_1_4 &&
opcode != OPCODE_PS_2_0 &&
opcode != OPCODE_PS_2_x &&
opcode != OPCODE_PS_3_0 &&
opcode != OPCODE_VS_1_0 &&
opcode != OPCODE_VS_1_1 &&
opcode != OPCODE_VS_2_0 &&
opcode != OPCODE_VS_2_x &&
opcode != OPCODE_VS_2_sw &&
opcode != OPCODE_VS_3_0 &&
opcode != OPCODE_VS_3_sw &&
opcode != OPCODE_PHASE &&
opcode != OPCODE_END)
{
if(shaderModel >= 0x0200)
{
length = (opcode & 0x0F000000) >> 24;
}
else
{
length = size[opcode & 0x0000FFFF];
}
}
if(length < 0)
{
ASSERT(false);
}
if(shaderModel == 0x0104)
{
switch(opcode & 0x0000FFFF)
{
case OPCODE_TEX:
length += 1;
break;
case OPCODE_TEXCOORD:
length += 1;
break;
default:
break;
}
}
return length;
}
bool Shader::maskContainsComponent(int mask, int component)
{
return (mask & (1 << component)) != 0;
}
bool Shader::swizzleContainsComponent(int swizzle, int component)
{
if((swizzle & 0x03) >> 0 == component) return true;
if((swizzle & 0x0C) >> 2 == component) return true;
if((swizzle & 0x30) >> 4 == component) return true;
if((swizzle & 0xC0) >> 6 == component) return true;
return false;
}
bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
{
if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
return false;
}
bool Shader::containsDynamicBranching() const
{
return dynamicBranching;
}
bool Shader::containsBreakInstruction() const
{
return containsBreak;
}
bool Shader::containsContinueInstruction() const
{
return containsContinue;
}
bool Shader::containsLeaveInstruction() const
{
return containsLeave;
}
bool Shader::containsDefineInstruction() const
{
return containsDefine;
}
bool Shader::usesSampler(int index) const
{
return (usedSamplers & (1 << index)) != 0;
}
int Shader::getSerialID() const
{
return serialID;
}
size_t Shader::getLength() const
{
return instruction.size();
}
Shader::ShaderType Shader::getShaderType() const
{
return shaderType;
}
unsigned short Shader::getShaderModel() const
{
return shaderModel;
}
void Shader::print(const char *fileName, ...) const
{
char fullName[1024 + 1];
va_list vararg;
va_start(vararg, fileName);
vsnprintf(fullName, 1024, fileName, vararg);
va_end(vararg);
std::ofstream file(fullName, std::ofstream::out);
for(const auto &inst : instruction)
{
file << inst->string(shaderType, shaderModel) << std::endl;
}
}
void Shader::printInstruction(int index, const char *fileName) const
{
std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
file << instruction[index]->string(shaderType, shaderModel) << std::endl;
}
void Shader::append(Instruction *instruction)
{
this->instruction.push_back(instruction);
}
void Shader::declareSampler(int i)
{
if(i >= 0 && i < 16)
{
usedSamplers |= 1 << i;
}
}
const Shader::Instruction *Shader::getInstruction(size_t i) const
{
ASSERT(i < instruction.size());
return instruction[i];
}
void Shader::optimize()
{
optimizeLeave();
optimizeCall();
removeNull();
}
void Shader::optimizeLeave()
{
// A return (leave) right before the end of a function or the shader can be removed
for(unsigned int i = 0; i < instruction.size(); i++)
{
if(instruction[i]->opcode == OPCODE_LEAVE)
{
if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
{
instruction[i]->opcode = OPCODE_NULL;
}
}
}
}
void Shader::optimizeCall()
{
// Eliminate uncalled functions
std::set<int> calledFunctions;
bool rescan = true;
while(rescan)
{
calledFunctions.clear();
rescan = false;
for(const auto &inst : instruction)
{
if(inst->isCall())
{
calledFunctions.insert(inst->dst.label);
}
}
if(!calledFunctions.empty())
{
for(unsigned int i = 0; i < instruction.size(); i++)
{
if(instruction[i]->opcode == OPCODE_LABEL)
{
if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
{
for( ; i < instruction.size(); i++)
{
Opcode oldOpcode = instruction[i]->opcode;
instruction[i]->opcode = OPCODE_NULL;
if(oldOpcode == OPCODE_RET)
{
rescan = true;
break;
}
}
}
}
}
}
}
// Optimize the entry call
if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
{
if(calledFunctions.size() == 1)
{
instruction[0]->opcode = OPCODE_NULL;
instruction[1]->opcode = OPCODE_NULL;
for(size_t i = 2; i < instruction.size(); i++)
{
if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
{
instruction[i]->opcode = OPCODE_NULL;
}
}
}
}
}
void Shader::removeNull()
{
size_t size = 0;
for(size_t i = 0; i < instruction.size(); i++)
{
if(instruction[i]->opcode != OPCODE_NULL)
{
instruction[size] = instruction[i];
size++;
}
else
{
delete instruction[i];
}
}
instruction.resize(size);
}
void Shader::analyzeDirtyConstants()
{
dirtyConstantsF = 0;
dirtyConstantsI = 0;
dirtyConstantsB = 0;
for(const auto &inst : instruction)
{
switch(inst->opcode)
{
case OPCODE_DEF:
if(inst->dst.index + 1 > dirtyConstantsF)
{
dirtyConstantsF = inst->dst.index + 1;
}
break;
case OPCODE_DEFI:
if(inst->dst.index + 1 > dirtyConstantsI)
{
dirtyConstantsI = inst->dst.index + 1;
}
break;
case OPCODE_DEFB:
if(inst->dst.index + 1 > dirtyConstantsB)
{
dirtyConstantsB = inst->dst.index + 1;
}
break;
default:
break;
}
}
}
void Shader::analyzeDynamicBranching()
{
dynamicBranching = false;
containsLeave = false;
containsBreak = false;
containsContinue = false;
containsDefine = false;
// Determine global presence of branching instructions
for(const auto &inst : instruction)
{
switch(inst->opcode)
{
case OPCODE_CALLNZ:
case OPCODE_IF:
case OPCODE_IFC:
case OPCODE_BREAK:
case OPCODE_BREAKC:
case OPCODE_CMP:
case OPCODE_BREAKP:
case OPCODE_LEAVE:
case OPCODE_CONTINUE:
if(inst->src[0].type != PARAMETER_CONSTBOOL)
{
dynamicBranching = true;
}
if(inst->opcode == OPCODE_LEAVE)
{
containsLeave = true;
}
if(inst->isBreak())
{
containsBreak = true;
}
if(inst->opcode == OPCODE_CONTINUE)
{
containsContinue = true;
}
case OPCODE_DEF:
case OPCODE_DEFB:
case OPCODE_DEFI:
containsDefine = true;
default:
break;
}
}
// Conservatively determine which instructions are affected by dynamic branching
int branchDepth = 0;
int breakDepth = 0;
int continueDepth = 0;
bool leaveReturn = false;
unsigned int functionBegin = 0;
for(unsigned int i = 0; i < instruction.size(); i++)
{
// If statements and loops
if(instruction[i]->isBranch() || instruction[i]->isLoop())
{
branchDepth++;
}
else if(instruction[i]->opcode == OPCODE_ENDIF || instruction[i]->isEndLoop())
{
branchDepth--;
}
if(branchDepth > 0)
{
instruction[i]->analysisBranch = true;
if(instruction[i]->isCall())
{
markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
}
}
// Break statemement
if(instruction[i]->isBreak())
{
breakDepth++;
}
if(breakDepth > 0)
{
if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask
{
breakDepth++;
}
else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
{
breakDepth--;
}
instruction[i]->analysisBreak = true;
if(instruction[i]->isCall())
{
markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
}
}
// Continue statement
if(instruction[i]->opcode == OPCODE_CONTINUE)
{
continueDepth++;
}
if(continueDepth > 0)
{
if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH) // Nested loop or switch, don't make the end of it disable the break execution mask
{
continueDepth++;
}
else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
{
continueDepth--;
}
instruction[i]->analysisContinue = true;
if(instruction[i]->isCall())
{
markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
}
}
// Return (leave) statement
if(instruction[i]->opcode == OPCODE_LEAVE)
{
leaveReturn = true;
// Mark loop body instructions prior to the return statement
for(unsigned int l = functionBegin; l < i; l++)
{
if(instruction[l]->isLoop())
{
for(unsigned int r = l + 1; r < i; r++)
{
instruction[r]->analysisLeave = true;
}
break;
}
}
}
else if(instruction[i]->opcode == OPCODE_RET) // End of the function
{
leaveReturn = false;
}
else if(instruction[i]->opcode == OPCODE_LABEL)
{
functionBegin = i;
}
if(leaveReturn)
{
instruction[i]->analysisLeave = true;
if(instruction[i]->isCall())
{
markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
}
}
}
}
void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag)
{
bool marker = false;
for(auto &inst : instruction)
{
if(!marker)
{
if(inst->opcode == OPCODE_LABEL && inst->dst.label == functionLabel)
{
marker = true;
}
}
else
{
if(inst->opcode == OPCODE_RET)
{
break;
}
else if(inst->isCall())
{
markFunctionAnalysis(inst->dst.label, flag);
}
inst->analysis |= flag;
}
}
}
void Shader::analyzeSamplers()
{
for(const auto &inst : instruction)
{
switch(inst->opcode)
{
case OPCODE_TEX:
case OPCODE_TEXBEM:
case OPCODE_TEXBEML:
case OPCODE_TEXREG2AR:
case OPCODE_TEXREG2GB:
case OPCODE_TEXM3X2TEX:
case OPCODE_TEXM3X3TEX:
case OPCODE_TEXM3X3SPEC:
case OPCODE_TEXM3X3VSPEC:
case OPCODE_TEXREG2RGB:
case OPCODE_TEXDP3TEX:
case OPCODE_TEXM3X2DEPTH:
case OPCODE_TEXLDD:
case OPCODE_TEXLDL:
case OPCODE_TEXLOD:
case OPCODE_TEXOFFSET:
case OPCODE_TEXOFFSETBIAS:
case OPCODE_TEXLODOFFSET:
case OPCODE_TEXELFETCH:
case OPCODE_TEXELFETCHOFFSET:
case OPCODE_TEXGRAD:
case OPCODE_TEXGRADOFFSET:
{
Parameter &dst = inst->dst;
Parameter &src1 = inst->src[1];
if(majorVersion >= 2)
{
if(src1.type == PARAMETER_SAMPLER)
{
usedSamplers |= 1 << src1.index;
}
}
else
{
usedSamplers |= 1 << dst.index;
}
}
break;
default:
break;
}
}
}
// Assigns a unique index to each call instruction, on a per label basis.
// This is used to know what basic block to return to.
void Shader::analyzeCallSites()
{
std::unordered_map<int, int> callSiteIndices;
for(auto &inst : instruction)
{
if(inst->opcode == OPCODE_CALL || inst->opcode == OPCODE_CALLNZ)
{
inst->dst.callSite = callSiteIndices[inst->dst.label]++;
}
}
}
void Shader::analyzeIndirectAddressing()
{
indirectAddressableTemporaries = false;
indirectAddressableInput = false;
indirectAddressableOutput = false;
for(const auto &inst : instruction)
{
if(inst->dst.rel.type != PARAMETER_VOID)
{
switch(inst->dst.type)
{
case PARAMETER_TEMP: indirectAddressableTemporaries = true; break;
case PARAMETER_INPUT: indirectAddressableInput = true; break;
case PARAMETER_OUTPUT: indirectAddressableOutput = true; break;
default: break;
}
}
for(int j = 0; j < 3; j++)
{
if(inst->src[j].rel.type != PARAMETER_VOID)
{
switch(inst->src[j].type)
{
case PARAMETER_TEMP: indirectAddressableTemporaries = true; break;
case PARAMETER_INPUT: indirectAddressableInput = true; break;
case PARAMETER_OUTPUT: indirectAddressableOutput = true; break;
default: break;
}
}
}
}
}
// analyzeLimits analyzes the whole shader program to determine the deepest
// nesting of control flow blocks and function calls. These calculations
// are stored into the limits member, and is used by the programs to
// allocate stack storage variables.
void Shader::analyzeLimits()
{
typedef unsigned int FunctionID;
// Identifier of the function with the main entry point.
constexpr FunctionID MAIN_ID = 0xF0000000;
// Invalid function identifier.
constexpr FunctionID INVALID_ID = ~0U;
// Limits on a single function.
struct FunctionLimits
{
uint32_t loops = 0; // maximum nested loop and reps.
uint32_t ifs = 0; // maximum nested if statements.
uint32_t stack = 0; // maximum call depth.
};
// Information about a single function in the shader.
struct FunctionInfo
{
FunctionLimits limits;
std::unordered_set<FunctionID> calls; // What this function calls.
bool reachable; // Is this function reachable?
};
std::unordered_map<FunctionID, FunctionInfo> functions;
uint32_t maxLabel = 0; // Highest label found
// Add a definition for the main entry point.
// This starts at the beginning of the instructions and does not have
// its own label.
functions[MAIN_ID] = FunctionInfo();
functions[MAIN_ID].reachable = true;
// Begin by doing a pass over the instructions to identify all the
// functions. These start with a label and end with a ret. Note that
// functions can have labels within them.
FunctionID currentFunc = MAIN_ID;
for(auto &inst : instruction)
{
switch (inst->opcode)
{
case OPCODE_LABEL:
if (currentFunc == INVALID_ID)
{
// Start of a function.
FunctionID id = inst->dst.label;
ASSERT(id != MAIN_ID); // If this fires, we're going to have to represent main with something else.
functions[id] = FunctionInfo();
}
break;
case OPCODE_RET:
currentFunc = INVALID_ID;
break;
default:
break;
}
}
// Limits for the currently analyzed function.
FunctionLimits currentLimits;
// Now loop over the instructions gathering the limits of each of the
// functions.
currentFunc = MAIN_ID;
for(size_t i = 0; i < instruction.size(); i++)
{
const auto& inst = instruction[i];
switch (inst->opcode)
{
case OPCODE_LABEL:
{
maxLabel = std::max(maxLabel, inst->dst.label);
if (currentFunc == INVALID_ID)
{
// Start of a function.
FunctionID id = inst->dst.label;
ASSERT(functions.find(id) != functions.end()); // Sanity check
currentFunc = id;
}
break;
}
case OPCODE_CALL:
case OPCODE_CALLNZ:
{
ASSERT(currentFunc != INVALID_ID);
FunctionID id = inst->dst.label;
ASSERT(functions.find(id) != functions.end());
functions[currentFunc].calls.emplace(id);
functions[id].reachable = true;
break;
}
case OPCODE_LOOP:
case OPCODE_REP:
case OPCODE_WHILE:
case OPCODE_SWITCH: // Not a mistake - switches share loopReps.
{
ASSERT(currentFunc != INVALID_ID);
++currentLimits.loops;
auto& func = functions[currentFunc];
func.limits.loops = std::max(func.limits.loops, currentLimits.loops);
break;
}
case OPCODE_ENDLOOP:
case OPCODE_ENDREP:
case OPCODE_ENDWHILE:
case OPCODE_ENDSWITCH:
{
ASSERT(currentLimits.loops > 0);
--currentLimits.loops;
break;
}
case OPCODE_IF:
case OPCODE_IFC:
{
ASSERT(currentFunc != INVALID_ID);
++currentLimits.ifs;
auto& func = functions[currentFunc];
func.limits.ifs = std::max(func.limits.ifs, currentLimits.ifs);
break;
}
case OPCODE_ENDIF:
{
ASSERT(currentLimits.ifs > 0);
currentLimits.ifs--;
break;
}
case OPCODE_RET:
{
// Must be in a function to return.
ASSERT(currentFunc != INVALID_ID);
// All stacks should be popped before returning.
ASSERT(currentLimits.ifs == 0);
ASSERT(currentLimits.loops == 0);
currentFunc = INVALID_ID;
currentLimits = FunctionLimits();
break;
}
default:
break;
}
}
#if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
// Assert that every function is reachable (these should have been
// stripped in earlier stages). Unreachable functions may be code
// generated, but their own limits are not considered below, potentially
// causing OOB indexing in later stages.
// If we ever find cases where there are unreachable functions, we can
// replace this assert with NO-OPing or stripping out the dead
// functions.
for (const auto &it : functions) { ASSERT(it.second.reachable); }
#endif
// We have now gathered all the information about each of the functions
// in the shader. Traverse these functions starting from the main
// function to calculate the maximum limits across the entire shader.
std::unordered_set<FunctionID> visited;
std::function<Limits(FunctionID)> traverse;
traverse = [&](FunctionID id) -> Limits
{
const auto& func = functions[id];
ASSERT(visited.count(id) == 0); // Sanity check: Recursive functions are not allowed.
visited.insert(id);
Limits limits;
limits.stack = 1;
for (auto callee : func.calls)
{
auto calleeLimits = traverse(callee);
limits.loops = std::max(limits.loops, calleeLimits.loops);
limits.ifs = std::max(limits.ifs, calleeLimits.ifs);
limits.stack = std::max(limits.stack, calleeLimits.stack + 1);
}
visited.erase(id);
limits.loops += func.limits.loops;
limits.ifs += func.limits.ifs;
return limits;
};
limits = traverse(MAIN_ID);
limits.maxLabel = maxLabel;
}
}