| // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "Renderer.hpp" |
| |
| #include "Clipper.hpp" |
| #include "Surface.hpp" |
| #include "Primitive.hpp" |
| #include "Polygon.hpp" |
| #include "Main/FrameBuffer.hpp" |
| #include "Main/SwiftConfig.hpp" |
| #include "Reactor/Reactor.hpp" |
| #include "Shader/Constants.hpp" |
| #include "Common/MutexLock.hpp" |
| #include "Common/CPUID.hpp" |
| #include "Common/Memory.hpp" |
| #include "Common/Resource.hpp" |
| #include "Common/Half.hpp" |
| #include "Common/Math.hpp" |
| #include "Common/Timer.hpp" |
| #include "Common/Debug.hpp" |
| |
| #undef max |
| |
| bool disableServer = true; |
| |
| #ifndef NDEBUG |
| unsigned int minPrimitives = 1; |
| unsigned int maxPrimitives = 1 << 21; |
| #endif |
| |
| namespace sw |
| { |
| extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates |
| extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] |
| extern bool booleanFaceRegister; |
| extern bool fullPixelPositionRegister; |
| extern bool leadingVertexFirst; // Flat shading uses first vertex, else last |
| extern bool secondaryColor; // Specular lighting is applied after texturing |
| extern bool colorsDefaultToZero; |
| |
| extern bool forceWindowed; |
| extern bool complementaryDepthBuffer; |
| extern bool postBlendSRGB; |
| extern bool exactColorRounding; |
| extern TransparencyAntialiasing transparencyAntialiasing; |
| extern bool forceClearRegisters; |
| |
| extern bool precacheVertex; |
| extern bool precacheSetup; |
| extern bool precachePixel; |
| |
| static const int batchSize = 128; |
| AtomicInt threadCount(1); |
| AtomicInt Renderer::unitCount(1); |
| AtomicInt Renderer::clusterCount(1); |
| |
| TranscendentalPrecision logPrecision = ACCURATE; |
| TranscendentalPrecision expPrecision = ACCURATE; |
| TranscendentalPrecision rcpPrecision = ACCURATE; |
| TranscendentalPrecision rsqPrecision = ACCURATE; |
| bool perspectiveCorrection = true; |
| |
| static void setGlobalRenderingSettings(Conventions conventions, bool exactColorRounding) |
| { |
| static bool initialized = false; |
| |
| if(!initialized) |
| { |
| sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates; |
| sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth; |
| sw::booleanFaceRegister = conventions.booleanFaceRegister; |
| sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister; |
| sw::leadingVertexFirst = conventions.leadingVertexFirst; |
| sw::secondaryColor = conventions.secondaryColor; |
| sw::colorsDefaultToZero = conventions.colorsDefaultToZero; |
| sw::exactColorRounding = exactColorRounding; |
| initialized = true; |
| } |
| } |
| |
| struct Parameters |
| { |
| Renderer *renderer; |
| int threadIndex; |
| }; |
| |
| Query::Query(Type type) : building(false), data(0), type(type), reference(1) |
| { |
| } |
| |
| void Query::addRef() |
| { |
| ++reference; // Atomic |
| } |
| |
| void Query::release() |
| { |
| int ref = reference--; // Atomic |
| |
| ASSERT(ref >= 0); |
| |
| if(ref == 0) |
| { |
| delete this; |
| } |
| } |
| |
| DrawCall::DrawCall() |
| { |
| queries = 0; |
| |
| vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; |
| vsDirtyConstI = 16; |
| vsDirtyConstB = 16; |
| |
| psDirtyConstF = FRAGMENT_UNIFORM_VECTORS; |
| psDirtyConstI = 16; |
| psDirtyConstB = 16; |
| |
| references = -1; |
| |
| data = (DrawData*)allocate(sizeof(DrawData)); |
| data->constants = &constants; |
| } |
| |
| DrawCall::~DrawCall() |
| { |
| delete queries; |
| |
| deallocate(data); |
| } |
| |
| Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport() |
| { |
| setGlobalRenderingSettings(conventions, exactColorRounding); |
| |
| setRenderTarget(0, 0); |
| clipper = new Clipper(symmetricNormalizedDepth); |
| blitter = new Blitter; |
| |
| updateViewMatrix = true; |
| updateBaseMatrix = true; |
| updateProjectionMatrix = true; |
| updateClipPlanes = true; |
| |
| #if PERF_HUD |
| resetTimers(); |
| #endif |
| |
| for(int i = 0; i < 16; i++) |
| { |
| vertexTask[i] = 0; |
| |
| worker[i] = 0; |
| resume[i] = 0; |
| suspend[i] = 0; |
| } |
| |
| threadsAwake = 0; |
| resumeApp = new Event(); |
| |
| currentDraw = 0; |
| nextDraw = 0; |
| |
| qHead = 0; |
| qSize = 0; |
| |
| for(int i = 0; i < 16; i++) |
| { |
| triangleBatch[i] = 0; |
| primitiveBatch[i] = 0; |
| } |
| |
| for(int draw = 0; draw < DRAW_COUNT; draw++) |
| { |
| drawCall[draw] = new DrawCall(); |
| drawList[draw] = drawCall[draw]; |
| } |
| |
| for(int unit = 0; unit < 16; unit++) |
| { |
| primitiveProgress[unit].init(); |
| } |
| |
| for(int cluster = 0; cluster < 16; cluster++) |
| { |
| pixelProgress[cluster].init(); |
| } |
| |
| clipFlags = 0; |
| |
| swiftConfig = new SwiftConfig(disableServer); |
| updateConfiguration(true); |
| |
| sync = new Resource(0); |
| } |
| |
| Renderer::~Renderer() |
| { |
| sync->lock(EXCLUSIVE); |
| sync->destruct(); |
| terminateThreads(); |
| sync->unlock(); |
| |
| delete clipper; |
| clipper = nullptr; |
| |
| delete blitter; |
| blitter = nullptr; |
| |
| delete resumeApp; |
| resumeApp = nullptr; |
| |
| for(int draw = 0; draw < DRAW_COUNT; draw++) |
| { |
| delete drawCall[draw]; |
| drawCall[draw] = nullptr; |
| } |
| |
| delete swiftConfig; |
| swiftConfig = nullptr; |
| } |
| |
| // This object has to be mem aligned |
| void* Renderer::operator new(size_t size) |
| { |
| ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class |
| return sw::allocate(sizeof(Renderer), 16); |
| } |
| |
| void Renderer::operator delete(void * mem) |
| { |
| sw::deallocate(mem); |
| } |
| |
| void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update) |
| { |
| #ifndef NDEBUG |
| if(count < minPrimitives || count > maxPrimitives) |
| { |
| return; |
| } |
| #endif |
| |
| context->drawType = drawType; |
| |
| updateConfiguration(); |
| updateClipper(); |
| |
| int ss = context->getSuperSampleCount(); |
| int ms = context->getMultiSampleCount(); |
| bool requiresSync = false; |
| |
| for(int q = 0; q < ss; q++) |
| { |
| unsigned int oldMultiSampleMask = context->multiSampleMask; |
| context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms)); |
| |
| if(!context->multiSampleMask) |
| { |
| continue; |
| } |
| |
| sync->lock(sw::PRIVATE); |
| |
| if(update || oldMultiSampleMask != context->multiSampleMask) |
| { |
| vertexState = VertexProcessor::update(drawType); |
| setupState = SetupProcessor::update(); |
| pixelState = PixelProcessor::update(); |
| |
| vertexRoutine = VertexProcessor::routine(vertexState); |
| setupRoutine = SetupProcessor::routine(setupState); |
| pixelRoutine = PixelProcessor::routine(pixelState); |
| } |
| |
| int batch = batchSize / ms; |
| |
| int (Renderer::*setupPrimitives)(int batch, int count); |
| |
| if(context->isDrawTriangle()) |
| { |
| switch(context->fillMode) |
| { |
| case FILL_SOLID: |
| setupPrimitives = &Renderer::setupSolidTriangles; |
| break; |
| case FILL_WIREFRAME: |
| setupPrimitives = &Renderer::setupWireframeTriangle; |
| batch = 1; |
| break; |
| case FILL_VERTEX: |
| setupPrimitives = &Renderer::setupVertexTriangle; |
| batch = 1; |
| break; |
| default: |
| ASSERT(false); |
| return; |
| } |
| } |
| else if(context->isDrawLine()) |
| { |
| setupPrimitives = &Renderer::setupLines; |
| } |
| else // Point draw |
| { |
| setupPrimitives = &Renderer::setupPoints; |
| } |
| |
| DrawCall *draw = nullptr; |
| |
| do |
| { |
| for(int i = 0; i < DRAW_COUNT; i++) |
| { |
| if(drawCall[i]->references == -1) |
| { |
| draw = drawCall[i]; |
| drawList[nextDraw & DRAW_COUNT_BITS] = draw; |
| |
| break; |
| } |
| } |
| |
| if(!draw) |
| { |
| resumeApp->wait(); |
| } |
| } |
| while(!draw); |
| |
| DrawData *data = draw->data; |
| |
| if(queries.size() != 0) |
| { |
| draw->queries = new std::list<Query*>(); |
| bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled; |
| for(auto &query : queries) |
| { |
| if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN)) |
| { |
| query->addRef(); |
| draw->queries->push_back(query); |
| } |
| } |
| } |
| |
| draw->drawType = drawType; |
| draw->batchSize = batch; |
| |
| vertexRoutine->bind(); |
| setupRoutine->bind(); |
| pixelRoutine->bind(); |
| |
| draw->vertexRoutine = vertexRoutine; |
| draw->setupRoutine = setupRoutine; |
| draw->pixelRoutine = pixelRoutine; |
| draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry(); |
| draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry(); |
| draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry(); |
| draw->setupPrimitives = setupPrimitives; |
| draw->setupState = setupState; |
| |
| for(int i = 0; i < MAX_VERTEX_INPUTS; i++) |
| { |
| draw->vertexStream[i] = context->input[i].resource; |
| data->input[i] = context->input[i].buffer; |
| data->stride[i] = context->input[i].stride; |
| |
| if(draw->vertexStream[i]) |
| { |
| draw->vertexStream[i]->lock(PUBLIC, PRIVATE); |
| } |
| } |
| |
| if(context->indexBuffer) |
| { |
| data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset; |
| } |
| |
| draw->indexBuffer = context->indexBuffer; |
| |
| for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++) |
| { |
| draw->texture[sampler] = 0; |
| } |
| |
| for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++) |
| { |
| if(pixelState.sampler[sampler].textureType != TEXTURE_NULL) |
| { |
| draw->texture[sampler] = context->texture[sampler]; |
| draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets |
| |
| data->mipmap[sampler] = context->sampler[sampler].getTextureData(); |
| |
| requiresSync |= context->sampler[sampler].requiresSync(); |
| } |
| } |
| |
| if(context->pixelShader) |
| { |
| if(draw->psDirtyConstF) |
| { |
| memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8)); |
| memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF); |
| draw->psDirtyConstF = 0; |
| } |
| |
| if(draw->psDirtyConstI) |
| { |
| memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI); |
| draw->psDirtyConstI = 0; |
| } |
| |
| if(draw->psDirtyConstB) |
| { |
| memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB); |
| draw->psDirtyConstB = 0; |
| } |
| |
| PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers); |
| } |
| else |
| { |
| for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) |
| { |
| draw->pUniformBuffers[i] = nullptr; |
| } |
| } |
| |
| if(context->pixelShaderModel() <= 0x0104) |
| { |
| for(int stage = 0; stage < 8; stage++) |
| { |
| if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader) |
| { |
| data->textureStage[stage] = context->textureStage[stage].uniforms; |
| } |
| else break; |
| } |
| } |
| |
| if(context->vertexShader) |
| { |
| if(context->vertexShader->getShaderModel() >= 0x0300) |
| { |
| for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++) |
| { |
| if(vertexState.sampler[sampler].textureType != TEXTURE_NULL) |
| { |
| draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler]; |
| draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE); |
| |
| data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData(); |
| |
| requiresSync |= context->sampler[TEXTURE_IMAGE_UNITS + sampler].requiresSync(); |
| } |
| } |
| } |
| |
| if(draw->vsDirtyConstF) |
| { |
| memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF); |
| draw->vsDirtyConstF = 0; |
| } |
| |
| if(draw->vsDirtyConstI) |
| { |
| memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI); |
| draw->vsDirtyConstI = 0; |
| } |
| |
| if(draw->vsDirtyConstB) |
| { |
| memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB); |
| draw->vsDirtyConstB = 0; |
| } |
| |
| if(context->vertexShader->isInstanceIdDeclared()) |
| { |
| data->instanceID = context->instanceID; |
| } |
| |
| VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers); |
| VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers); |
| } |
| else |
| { |
| data->ff = ff; |
| |
| draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; |
| draw->vsDirtyConstI = 16; |
| draw->vsDirtyConstB = 16; |
| |
| for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) |
| { |
| draw->vUniformBuffers[i] = nullptr; |
| } |
| |
| for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) |
| { |
| draw->transformFeedbackBuffers[i] = nullptr; |
| } |
| } |
| |
| if(pixelState.stencilActive) |
| { |
| data->stencil[0] = stencil; |
| data->stencil[1] = stencilCCW; |
| } |
| |
| if(pixelState.fogActive) |
| { |
| data->fog = fog; |
| } |
| |
| if(setupState.isDrawPoint) |
| { |
| data->point = point; |
| } |
| |
| data->lineWidth = context->lineWidth; |
| |
| data->factor = factor; |
| |
| if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) |
| { |
| float ref = context->alphaReference * (1.0f / 255.0f); |
| float margin = sw::min(ref, 1.0f - ref); |
| |
| if(ms == 4) |
| { |
| data->a2c0 = replicate(ref - margin * 0.6f); |
| data->a2c1 = replicate(ref - margin * 0.2f); |
| data->a2c2 = replicate(ref + margin * 0.2f); |
| data->a2c3 = replicate(ref + margin * 0.6f); |
| } |
| else if(ms == 2) |
| { |
| data->a2c0 = replicate(ref - margin * 0.3f); |
| data->a2c1 = replicate(ref + margin * 0.3f); |
| } |
| else ASSERT(false); |
| } |
| |
| if(pixelState.occlusionEnabled) |
| { |
| for(int cluster = 0; cluster < clusterCount; cluster++) |
| { |
| data->occlusion[cluster] = 0; |
| } |
| } |
| |
| #if PERF_PROFILE |
| for(int cluster = 0; cluster < clusterCount; cluster++) |
| { |
| for(int i = 0; i < PERF_TIMERS; i++) |
| { |
| data->cycles[i][cluster] = 0; |
| } |
| } |
| #endif |
| |
| // Viewport |
| { |
| float W = 0.5f * viewport.width; |
| float H = 0.5f * viewport.height; |
| float X0 = viewport.x0 + W; |
| float Y0 = viewport.y0 + H; |
| float N = viewport.minZ; |
| float F = viewport.maxZ; |
| float Z = F - N; |
| |
| if(context->isDrawTriangle(false)) |
| { |
| N += context->depthBias; |
| } |
| |
| if(complementaryDepthBuffer) |
| { |
| Z = -Z; |
| N = 1 - N; |
| } |
| |
| static const float X[5][16] = // Fragment offsets |
| { |
| {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample |
| {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples |
| {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples |
| {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples |
| {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples |
| }; |
| |
| static const float Y[5][16] = // Fragment offsets |
| { |
| {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample |
| {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples |
| {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples |
| {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples |
| {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples |
| }; |
| |
| int s = sw::log2(ss); |
| |
| data->Wx16 = replicate(W * 16); |
| data->Hx16 = replicate(H * 16); |
| data->X0x16 = replicate(X0 * 16 - 8); |
| data->Y0x16 = replicate(Y0 * 16 - 8); |
| data->XXXX = replicate(X[s][q] / W); |
| data->YYYY = replicate(Y[s][q] / H); |
| data->halfPixelX = replicate(0.5f / W); |
| data->halfPixelY = replicate(0.5f / H); |
| data->viewportHeight = abs(viewport.height); |
| data->slopeDepthBias = context->slopeDepthBias; |
| data->depthRange = Z; |
| data->depthNear = N; |
| draw->clipFlags = clipFlags; |
| |
| if(clipFlags) |
| { |
| if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0]; |
| if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1]; |
| if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2]; |
| if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3]; |
| if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4]; |
| if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5]; |
| } |
| } |
| |
| // Target |
| { |
| for(int index = 0; index < RENDERTARGETS; index++) |
| { |
| draw->renderTarget[index] = context->renderTarget[index]; |
| |
| if(draw->renderTarget[index]) |
| { |
| unsigned int layer = context->renderTargetLayer[index]; |
| requiresSync |= context->renderTarget[index]->requiresSync(); |
| data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); |
| data->colorBuffer[index] += q * ms * context->renderTarget[index]->getSliceB(true); |
| data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB(); |
| data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB(); |
| } |
| } |
| |
| draw->depthBuffer = context->depthBuffer; |
| draw->stencilBuffer = context->stencilBuffer; |
| |
| if(draw->depthBuffer) |
| { |
| unsigned int layer = context->depthBufferLayer; |
| requiresSync |= context->depthBuffer->requiresSync(); |
| data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); |
| data->depthBuffer += q * ms * context->depthBuffer->getSliceB(true); |
| data->depthPitchB = context->depthBuffer->getInternalPitchB(); |
| data->depthSliceB = context->depthBuffer->getInternalSliceB(); |
| } |
| |
| if(draw->stencilBuffer) |
| { |
| unsigned int layer = context->stencilBufferLayer; |
| requiresSync |= context->stencilBuffer->requiresSync(); |
| data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED); |
| data->stencilBuffer += q * ms * context->stencilBuffer->getSliceB(true); |
| data->stencilPitchB = context->stencilBuffer->getStencilPitchB(); |
| data->stencilSliceB = context->stencilBuffer->getStencilSliceB(); |
| } |
| } |
| |
| // Scissor |
| { |
| data->scissorX0 = scissor.x0; |
| data->scissorX1 = scissor.x1; |
| data->scissorY0 = scissor.y0; |
| data->scissorY1 = scissor.y1; |
| } |
| |
| draw->primitive = 0; |
| draw->count = count; |
| |
| draw->references = (count + batch - 1) / batch; |
| |
| schedulerMutex.lock(); |
| ++nextDraw; // Atomic |
| schedulerMutex.unlock(); |
| |
| #ifndef NDEBUG |
| if(threadCount == 1) // Use main thread for draw execution |
| { |
| threadsAwake = 1; |
| task[0].type = Task::RESUME; |
| |
| taskLoop(0); |
| } |
| else |
| #endif |
| { |
| if(!threadsAwake) |
| { |
| suspend[0]->wait(); |
| |
| threadsAwake = 1; |
| task[0].type = Task::RESUME; |
| |
| resume[0]->signal(); |
| } |
| } |
| } |
| |
| // TODO(sugoi): This is a temporary brute-force workaround to ensure IOSurface synchronization. |
| if(requiresSync) |
| { |
| synchronize(); |
| } |
| } |
| |
| void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask) |
| { |
| blitter->clear(value, format, dest, clearRect, rgbaMask); |
| } |
| |
| void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion) |
| { |
| blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion}); |
| } |
| |
| void Renderer::blit3D(Surface *source, Surface *dest) |
| { |
| blitter->blit3D(source, dest); |
| } |
| |
| void Renderer::threadFunction(void *parameters) |
| { |
| Renderer *renderer = static_cast<Parameters*>(parameters)->renderer; |
| int threadIndex = static_cast<Parameters*>(parameters)->threadIndex; |
| |
| if(logPrecision < IEEE) |
| { |
| CPUID::setFlushToZero(true); |
| CPUID::setDenormalsAreZero(true); |
| } |
| |
| renderer->threadLoop(threadIndex); |
| } |
| |
| void Renderer::threadLoop(int threadIndex) |
| { |
| while(!exitThreads) |
| { |
| taskLoop(threadIndex); |
| |
| suspend[threadIndex]->signal(); |
| resume[threadIndex]->wait(); |
| } |
| } |
| |
| void Renderer::taskLoop(int threadIndex) |
| { |
| while(task[threadIndex].type != Task::SUSPEND) |
| { |
| scheduleTask(threadIndex); |
| executeTask(threadIndex); |
| } |
| } |
| |
| void Renderer::findAvailableTasks() |
| { |
| // Find pixel tasks |
| for(int cluster = 0; cluster < clusterCount; cluster++) |
| { |
| if(!pixelProgress[cluster].executing) |
| { |
| for(int unit = 0; unit < unitCount; unit++) |
| { |
| if(primitiveProgress[unit].references > 0) // Contains processed primitives |
| { |
| if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall) |
| { |
| if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered |
| { |
| Task &task = taskQueue[qHead]; |
| task.type = Task::PIXELS; |
| task.primitiveUnit = unit; |
| task.pixelCluster = cluster; |
| |
| pixelProgress[cluster].executing = true; |
| |
| // Commit to the task queue |
| qHead = (qHead + 1) & TASK_COUNT_BITS; |
| qSize++; |
| |
| break; |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // Find primitive tasks |
| if(currentDraw == nextDraw) |
| { |
| return; // No more primitives to process |
| } |
| |
| for(int unit = 0; unit < unitCount; unit++) |
| { |
| DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS]; |
| |
| int primitive = draw->primitive; |
| int count = draw->count; |
| |
| if(primitive >= count) |
| { |
| ++currentDraw; // Atomic |
| |
| if(currentDraw == nextDraw) |
| { |
| return; // No more primitives to process |
| } |
| |
| draw = drawList[currentDraw & DRAW_COUNT_BITS]; |
| } |
| |
| if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit |
| { |
| primitive = draw->primitive; |
| count = draw->count; |
| int batch = draw->batchSize; |
| |
| primitiveProgress[unit].drawCall = currentDraw; |
| primitiveProgress[unit].firstPrimitive = primitive; |
| primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive; |
| |
| draw->primitive += batch; |
| |
| Task &task = taskQueue[qHead]; |
| task.type = Task::PRIMITIVES; |
| task.primitiveUnit = unit; |
| |
| primitiveProgress[unit].references = -1; |
| |
| // Commit to the task queue |
| qHead = (qHead + 1) & TASK_COUNT_BITS; |
| qSize++; |
| } |
| } |
| } |
| |
| void Renderer::scheduleTask(int threadIndex) |
| { |
| schedulerMutex.lock(); |
| |
| int curThreadsAwake = threadsAwake; |
| |
| if((int)qSize < threadCount - curThreadsAwake + 1) |
| { |
| findAvailableTasks(); |
| } |
| |
| if(qSize != 0) |
| { |
| task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS]; |
| qSize--; |
| |
| if(curThreadsAwake != threadCount) |
| { |
| int wakeup = qSize - curThreadsAwake + 1; |
| |
| for(int i = 0; i < threadCount && wakeup > 0; i++) |
| { |
| if(task[i].type == Task::SUSPEND) |
| { |
| suspend[i]->wait(); |
| task[i].type = Task::RESUME; |
| resume[i]->signal(); |
| |
| ++threadsAwake; // Atomic |
| wakeup--; |
| } |
| } |
| } |
| } |
| else |
| { |
| task[threadIndex].type = Task::SUSPEND; |
| |
| --threadsAwake; // Atomic |
| } |
| |
| schedulerMutex.unlock(); |
| } |
| |
| void Renderer::executeTask(int threadIndex) |
| { |
| #if PERF_HUD |
| int64_t startTick = Timer::ticks(); |
| #endif |
| |
| switch(task[threadIndex].type) |
| { |
| case Task::PRIMITIVES: |
| { |
| int unit = task[threadIndex].primitiveUnit; |
| |
| int input = primitiveProgress[unit].firstPrimitive; |
| int count = primitiveProgress[unit].primitiveCount; |
| DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
| int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives; |
| |
| processPrimitiveVertices(unit, input, count, draw->count, threadIndex); |
| |
| #if PERF_HUD |
| int64_t time = Timer::ticks(); |
| vertexTime[threadIndex] += time - startTick; |
| startTick = time; |
| #endif |
| |
| int visible = 0; |
| |
| if(!draw->setupState.rasterizerDiscard) |
| { |
| visible = (this->*setupPrimitives)(unit, count); |
| } |
| |
| primitiveProgress[unit].visible = visible; |
| primitiveProgress[unit].references = clusterCount; |
| |
| #if PERF_HUD |
| setupTime[threadIndex] += Timer::ticks() - startTick; |
| #endif |
| } |
| break; |
| case Task::PIXELS: |
| { |
| int unit = task[threadIndex].primitiveUnit; |
| int visible = primitiveProgress[unit].visible; |
| |
| if(visible > 0) |
| { |
| int cluster = task[threadIndex].pixelCluster; |
| Primitive *primitive = primitiveBatch[unit]; |
| DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS]; |
| DrawData *data = draw->data; |
| PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer; |
| |
| pixelRoutine(primitive, visible, cluster, data); |
| } |
| |
| finishRendering(task[threadIndex]); |
| |
| #if PERF_HUD |
| pixelTime[threadIndex] += Timer::ticks() - startTick; |
| #endif |
| } |
| break; |
| case Task::RESUME: |
| break; |
| case Task::SUSPEND: |
| break; |
| default: |
| ASSERT(false); |
| } |
| } |
| |
| void Renderer::synchronize() |
| { |
| sync->lock(sw::PUBLIC); |
| sync->unlock(); |
| } |
| |
| void Renderer::finishRendering(Task &pixelTask) |
| { |
| int unit = pixelTask.primitiveUnit; |
| int cluster = pixelTask.pixelCluster; |
| |
| DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
| DrawData &data = *draw.data; |
| int primitive = primitiveProgress[unit].firstPrimitive; |
| int count = primitiveProgress[unit].primitiveCount; |
| int processedPrimitives = primitive + count; |
| |
| pixelProgress[cluster].processedPrimitives = processedPrimitives; |
| |
| if(pixelProgress[cluster].processedPrimitives >= draw.count) |
| { |
| ++pixelProgress[cluster].drawCall; // Atomic |
| pixelProgress[cluster].processedPrimitives = 0; |
| } |
| |
| int ref = primitiveProgress[unit].references--; // Atomic |
| |
| if(ref == 0) |
| { |
| ref = draw.references--; // Atomic |
| |
| if(ref == 0) |
| { |
| #if PERF_PROFILE |
| for(int cluster = 0; cluster < clusterCount; cluster++) |
| { |
| for(int i = 0; i < PERF_TIMERS; i++) |
| { |
| profiler.cycles[i] += data.cycles[i][cluster]; |
| } |
| } |
| #endif |
| |
| if(draw.queries) |
| { |
| for(auto &query : *(draw.queries)) |
| { |
| switch(query->type) |
| { |
| case Query::FRAGMENTS_PASSED: |
| for(int cluster = 0; cluster < clusterCount; cluster++) |
| { |
| query->data += data.occlusion[cluster]; |
| } |
| break; |
| case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: |
| query->data += processedPrimitives; |
| break; |
| default: |
| break; |
| } |
| |
| query->release(); |
| } |
| |
| delete draw.queries; |
| draw.queries = 0; |
| } |
| |
| for(int i = 0; i < RENDERTARGETS; i++) |
| { |
| if(draw.renderTarget[i]) |
| { |
| draw.renderTarget[i]->unlockInternal(); |
| } |
| } |
| |
| if(draw.depthBuffer) |
| { |
| draw.depthBuffer->unlockInternal(); |
| } |
| |
| if(draw.stencilBuffer) |
| { |
| draw.stencilBuffer->unlockStencil(); |
| } |
| |
| for(int i = 0; i < TOTAL_IMAGE_UNITS; i++) |
| { |
| if(draw.texture[i]) |
| { |
| draw.texture[i]->unlock(); |
| } |
| } |
| |
| for(int i = 0; i < MAX_VERTEX_INPUTS; i++) |
| { |
| if(draw.vertexStream[i]) |
| { |
| draw.vertexStream[i]->unlock(); |
| } |
| } |
| |
| if(draw.indexBuffer) |
| { |
| draw.indexBuffer->unlock(); |
| } |
| |
| for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) |
| { |
| if(draw.pUniformBuffers[i]) |
| { |
| draw.pUniformBuffers[i]->unlock(); |
| } |
| if(draw.vUniformBuffers[i]) |
| { |
| draw.vUniformBuffers[i]->unlock(); |
| } |
| } |
| |
| for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) |
| { |
| if(draw.transformFeedbackBuffers[i]) |
| { |
| draw.transformFeedbackBuffers[i]->unlock(); |
| } |
| } |
| |
| draw.vertexRoutine->unbind(); |
| draw.setupRoutine->unbind(); |
| draw.pixelRoutine->unbind(); |
| |
| sync->unlock(); |
| |
| draw.references = -1; |
| resumeApp->signal(); |
| } |
| } |
| |
| pixelProgress[cluster].executing = false; |
| } |
| |
| void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread) |
| { |
| Triangle *triangle = triangleBatch[unit]; |
| int primitiveDrawCall = primitiveProgress[unit].drawCall; |
| DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS]; |
| DrawData *data = draw->data; |
| VertexTask *task = vertexTask[thread]; |
| |
| const void *indices = data->indices; |
| VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer; |
| |
| if(task->vertexCache.drawCall != primitiveDrawCall) |
| { |
| task->vertexCache.clear(); |
| task->vertexCache.drawCall = primitiveDrawCall; |
| } |
| |
| unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size |
| |
| switch(draw->drawType) |
| { |
| case DRAW_POINTLIST: |
| { |
| unsigned int index = start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index; |
| batch[i][1] = index; |
| batch[i][2] = index; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_LINELIST: |
| { |
| unsigned int index = 2 * start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index + 0; |
| batch[i][1] = index + 1; |
| batch[i][2] = index + 1; |
| |
| index += 2; |
| } |
| } |
| break; |
| case DRAW_LINESTRIP: |
| { |
| unsigned int index = start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index + 0; |
| batch[i][1] = index + 1; |
| batch[i][2] = index + 1; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_LINELOOP: |
| { |
| unsigned int index = start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = (index + 0) % loop; |
| batch[i][1] = (index + 1) % loop; |
| batch[i][2] = (index + 1) % loop; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_TRIANGLELIST: |
| { |
| unsigned int index = 3 * start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index + 0; |
| batch[i][1] = index + 1; |
| batch[i][2] = index + 2; |
| |
| index += 3; |
| } |
| } |
| break; |
| case DRAW_TRIANGLESTRIP: |
| { |
| unsigned int index = start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| if(leadingVertexFirst) |
| { |
| batch[i][0] = index + 0; |
| batch[i][1] = index + (index & 1) + 1; |
| batch[i][2] = index + (~index & 1) + 1; |
| } |
| else |
| { |
| batch[i][0] = index + (index & 1); |
| batch[i][1] = index + (~index & 1); |
| batch[i][2] = index + 2; |
| } |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_TRIANGLEFAN: |
| { |
| unsigned int index = start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| if(leadingVertexFirst) |
| { |
| batch[i][0] = index + 1; |
| batch[i][1] = index + 2; |
| batch[i][2] = 0; |
| } |
| else |
| { |
| batch[i][0] = 0; |
| batch[i][1] = index + 1; |
| batch[i][2] = index + 2; |
| } |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_INDEXEDPOINTLIST8: |
| { |
| const unsigned char *index = (const unsigned char*)indices + start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = *index; |
| batch[i][1] = *index; |
| batch[i][2] = *index; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_INDEXEDPOINTLIST16: |
| { |
| const unsigned short *index = (const unsigned short*)indices + start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = *index; |
| batch[i][1] = *index; |
| batch[i][2] = *index; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_INDEXEDPOINTLIST32: |
| { |
| const unsigned int *index = (const unsigned int*)indices + start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = *index; |
| batch[i][1] = *index; |
| batch[i][2] = *index; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_INDEXEDLINELIST8: |
| { |
| const unsigned char *index = (const unsigned char*)indices + 2 * start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[1]; |
| batch[i][2] = index[1]; |
| |
| index += 2; |
| } |
| } |
| break; |
| case DRAW_INDEXEDLINELIST16: |
| { |
| const unsigned short *index = (const unsigned short*)indices + 2 * start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[1]; |
| batch[i][2] = index[1]; |
| |
| index += 2; |
| } |
| } |
| break; |
| case DRAW_INDEXEDLINELIST32: |
| { |
| const unsigned int *index = (const unsigned int*)indices + 2 * start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[1]; |
| batch[i][2] = index[1]; |
| |
| index += 2; |
| } |
| } |
| break; |
| case DRAW_INDEXEDLINESTRIP8: |
| { |
| const unsigned char *index = (const unsigned char*)indices + start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[1]; |
| batch[i][2] = index[1]; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_INDEXEDLINESTRIP16: |
| { |
| const unsigned short *index = (const unsigned short*)indices + start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[1]; |
| batch[i][2] = index[1]; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_INDEXEDLINESTRIP32: |
| { |
| const unsigned int *index = (const unsigned int*)indices + start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[1]; |
| batch[i][2] = index[1]; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_INDEXEDLINELOOP8: |
| { |
| const unsigned char *index = (const unsigned char*)indices; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[(start + i + 0) % loop]; |
| batch[i][1] = index[(start + i + 1) % loop]; |
| batch[i][2] = index[(start + i + 1) % loop]; |
| } |
| } |
| break; |
| case DRAW_INDEXEDLINELOOP16: |
| { |
| const unsigned short *index = (const unsigned short*)indices; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[(start + i + 0) % loop]; |
| batch[i][1] = index[(start + i + 1) % loop]; |
| batch[i][2] = index[(start + i + 1) % loop]; |
| } |
| } |
| break; |
| case DRAW_INDEXEDLINELOOP32: |
| { |
| const unsigned int *index = (const unsigned int*)indices; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[(start + i + 0) % loop]; |
| batch[i][1] = index[(start + i + 1) % loop]; |
| batch[i][2] = index[(start + i + 1) % loop]; |
| } |
| } |
| break; |
| case DRAW_INDEXEDTRIANGLELIST8: |
| { |
| const unsigned char *index = (const unsigned char*)indices + 3 * start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[1]; |
| batch[i][2] = index[2]; |
| |
| index += 3; |
| } |
| } |
| break; |
| case DRAW_INDEXEDTRIANGLELIST16: |
| { |
| const unsigned short *index = (const unsigned short*)indices + 3 * start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[1]; |
| batch[i][2] = index[2]; |
| |
| index += 3; |
| } |
| } |
| break; |
| case DRAW_INDEXEDTRIANGLELIST32: |
| { |
| const unsigned int *index = (const unsigned int*)indices + 3 * start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[1]; |
| batch[i][2] = index[2]; |
| |
| index += 3; |
| } |
| } |
| break; |
| case DRAW_INDEXEDTRIANGLESTRIP8: |
| { |
| const unsigned char *index = (const unsigned char*)indices + start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[((start + i) & 1) + 1]; |
| batch[i][2] = index[(~(start + i) & 1) + 1]; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_INDEXEDTRIANGLESTRIP16: |
| { |
| const unsigned short *index = (const unsigned short*)indices + start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[((start + i) & 1) + 1]; |
| batch[i][2] = index[(~(start + i) & 1) + 1]; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_INDEXEDTRIANGLESTRIP32: |
| { |
| const unsigned int *index = (const unsigned int*)indices + start; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[0]; |
| batch[i][1] = index[((start + i) & 1) + 1]; |
| batch[i][2] = index[(~(start + i) & 1) + 1]; |
| |
| index += 1; |
| } |
| } |
| break; |
| case DRAW_INDEXEDTRIANGLEFAN8: |
| { |
| const unsigned char *index = (const unsigned char*)indices; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[start + i + 1]; |
| batch[i][1] = index[start + i + 2]; |
| batch[i][2] = index[0]; |
| } |
| } |
| break; |
| case DRAW_INDEXEDTRIANGLEFAN16: |
| { |
| const unsigned short *index = (const unsigned short*)indices; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[start + i + 1]; |
| batch[i][1] = index[start + i + 2]; |
| batch[i][2] = index[0]; |
| } |
| } |
| break; |
| case DRAW_INDEXEDTRIANGLEFAN32: |
| { |
| const unsigned int *index = (const unsigned int*)indices; |
| |
| for(unsigned int i = 0; i < triangleCount; i++) |
| { |
| batch[i][0] = index[start + i + 1]; |
| batch[i][1] = index[start + i + 2]; |
| batch[i][2] = index[0]; |
| } |
| } |
| break; |
| case DRAW_QUADLIST: |
| { |
| unsigned int index = 4 * start / 2; |
| |
| for(unsigned int i = 0; i < triangleCount; i += 2) |
| { |
| batch[i+0][0] = index + 0; |
| batch[i+0][1] = index + 1; |
| batch[i+0][2] = index + 2; |
| |
| batch[i+1][0] = index + 0; |
| batch[i+1][1] = index + 2; |
| batch[i+1][2] = index + 3; |
| |
| index += 4; |
| } |
| } |
| break; |
| default: |
| ASSERT(false); |
| return; |
| } |
| |
| task->primitiveStart = start; |
| task->vertexCount = triangleCount * 3; |
| vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data); |
| } |
| |
| int Renderer::setupSolidTriangles(int unit, int count) |
| { |
| Triangle *triangle = triangleBatch[unit]; |
| Primitive *primitive = primitiveBatch[unit]; |
| |
| DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
| SetupProcessor::State &state = draw.setupState; |
| const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; |
| |
| int ms = state.multiSample; |
| int pos = state.positionRegister; |
| const DrawData *data = draw.data; |
| int visible = 0; |
| |
| for(int i = 0; i < count; i++, triangle++) |
| { |
| Vertex &v0 = triangle->v0; |
| Vertex &v1 = triangle->v1; |
| Vertex &v2 = triangle->v2; |
| |
| if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) |
| { |
| Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]); |
| |
| int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags; |
| |
| if(clipFlagsOr != Clipper::CLIP_FINITE) |
| { |
| if(!clipper->clip(polygon, clipFlagsOr, draw)) |
| { |
| continue; |
| } |
| } |
| |
| if(setupRoutine(primitive, triangle, &polygon, data)) |
| { |
| primitive += ms; |
| visible++; |
| } |
| } |
| } |
| |
| return visible; |
| } |
| |
| int Renderer::setupWireframeTriangle(int unit, int count) |
| { |
| Triangle *triangle = triangleBatch[unit]; |
| Primitive *primitive = primitiveBatch[unit]; |
| int visible = 0; |
| |
| DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
| SetupProcessor::State &state = draw.setupState; |
| |
| const Vertex &v0 = triangle[0].v0; |
| const Vertex &v1 = triangle[0].v1; |
| const Vertex &v2 = triangle[0].v2; |
| |
| float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; |
| |
| if(state.cullMode == CULL_CLOCKWISE) |
| { |
| if(d >= 0) return 0; |
| } |
| else if(state.cullMode == CULL_COUNTERCLOCKWISE) |
| { |
| if(d <= 0) return 0; |
| } |
| |
| // Copy attributes |
| triangle[1].v0 = v1; |
| triangle[1].v1 = v2; |
| triangle[2].v0 = v2; |
| triangle[2].v1 = v0; |
| |
| if(state.color[0][0].flat) // FIXME |
| { |
| for(int i = 0; i < 2; i++) |
| { |
| triangle[1].v0.C[i] = triangle[0].v0.C[i]; |
| triangle[1].v1.C[i] = triangle[0].v0.C[i]; |
| triangle[2].v0.C[i] = triangle[0].v0.C[i]; |
| triangle[2].v1.C[i] = triangle[0].v0.C[i]; |
| } |
| } |
| |
| for(int i = 0; i < 3; i++) |
| { |
| if(setupLine(*primitive, *triangle, draw)) |
| { |
| primitive->area = 0.5f * d; |
| |
| primitive++; |
| visible++; |
| } |
| |
| triangle++; |
| } |
| |
| return visible; |
| } |
| |
| int Renderer::setupVertexTriangle(int unit, int count) |
| { |
| Triangle *triangle = triangleBatch[unit]; |
| Primitive *primitive = primitiveBatch[unit]; |
| int visible = 0; |
| |
| DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
| SetupProcessor::State &state = draw.setupState; |
| |
| const Vertex &v0 = triangle[0].v0; |
| const Vertex &v1 = triangle[0].v1; |
| const Vertex &v2 = triangle[0].v2; |
| |
| float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; |
| |
| if(state.cullMode == CULL_CLOCKWISE) |
| { |
| if(d >= 0) return 0; |
| } |
| else if(state.cullMode == CULL_COUNTERCLOCKWISE) |
| { |
| if(d <= 0) return 0; |
| } |
| |
| // Copy attributes |
| triangle[1].v0 = v1; |
| triangle[2].v0 = v2; |
| |
| for(int i = 0; i < 3; i++) |
| { |
| if(setupPoint(*primitive, *triangle, draw)) |
| { |
| primitive->area = 0.5f * d; |
| |
| primitive++; |
| visible++; |
| } |
| |
| triangle++; |
| } |
| |
| return visible; |
| } |
| |
| int Renderer::setupLines(int unit, int count) |
| { |
| Triangle *triangle = triangleBatch[unit]; |
| Primitive *primitive = primitiveBatch[unit]; |
| int visible = 0; |
| |
| DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
| SetupProcessor::State &state = draw.setupState; |
| |
| int ms = state.multiSample; |
| |
| for(int i = 0; i < count; i++) |
| { |
| if(setupLine(*primitive, *triangle, draw)) |
| { |
| primitive += ms; |
| visible++; |
| } |
| |
| triangle++; |
| } |
| |
| return visible; |
| } |
| |
| int Renderer::setupPoints(int unit, int count) |
| { |
| Triangle *triangle = triangleBatch[unit]; |
| Primitive *primitive = primitiveBatch[unit]; |
| int visible = 0; |
| |
| DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
| SetupProcessor::State &state = draw.setupState; |
| |
| int ms = state.multiSample; |
| |
| for(int i = 0; i < count; i++) |
| { |
| if(setupPoint(*primitive, *triangle, draw)) |
| { |
| primitive += ms; |
| visible++; |
| } |
| |
| triangle++; |
| } |
| |
| return visible; |
| } |
| |
| bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw) |
| { |
| const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; |
| const SetupProcessor::State &state = draw.setupState; |
| const DrawData &data = *draw.data; |
| |
| float lineWidth = data.lineWidth; |
| |
| Vertex &v0 = triangle.v0; |
| Vertex &v1 = triangle.v1; |
| |
| int pos = state.positionRegister; |
| |
| const float4 &P0 = v0.v[pos]; |
| const float4 &P1 = v1.v[pos]; |
| |
| if(P0.w <= 0 && P1.w <= 0) |
| { |
| return false; |
| } |
| |
| const float W = data.Wx16[0] * (1.0f / 16.0f); |
| const float H = data.Hx16[0] * (1.0f / 16.0f); |
| |
| float dx = W * (P1.x / P1.w - P0.x / P0.w); |
| float dy = H * (P1.y / P1.w - P0.y / P0.w); |
| |
| if(dx == 0 && dy == 0) |
| { |
| return false; |
| } |
| |
| if(state.multiSample > 1) // Rectangle |
| { |
| float4 P[4]; |
| int C[4]; |
| |
| P[0] = P0; |
| P[1] = P1; |
| P[2] = P1; |
| P[3] = P0; |
| |
| float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy); |
| |
| dx *= scale; |
| dy *= scale; |
| |
| float dx0h = dx * P0.w / H; |
| float dy0w = dy * P0.w / W; |
| |
| float dx1h = dx * P1.w / H; |
| float dy1w = dy * P1.w / W; |
| |
| P[0].x += -dy0w; |
| P[0].y += +dx0h; |
| C[0] = clipper->computeClipFlags(P[0]); |
| |
| P[1].x += -dy1w; |
| P[1].y += +dx1h; |
| C[1] = clipper->computeClipFlags(P[1]); |
| |
| P[2].x += +dy1w; |
| P[2].y += -dx1h; |
| C[2] = clipper->computeClipFlags(P[2]); |
| |
| P[3].x += +dy0w; |
| P[3].y += -dx0h; |
| C[3] = clipper->computeClipFlags(P[3]); |
| |
| if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) |
| { |
| Polygon polygon(P, 4); |
| |
| int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; |
| |
| if(clipFlagsOr != Clipper::CLIP_FINITE) |
| { |
| if(!clipper->clip(polygon, clipFlagsOr, draw)) |
| { |
| return false; |
| } |
| } |
| |
| return setupRoutine(&primitive, &triangle, &polygon, &data); |
| } |
| } |
| else // Diamond test convention |
| { |
| float4 P[8]; |
| int C[8]; |
| |
| P[0] = P0; |
| P[1] = P0; |
| P[2] = P0; |
| P[3] = P0; |
| P[4] = P1; |
| P[5] = P1; |
| P[6] = P1; |
| P[7] = P1; |
| |
| float dx0 = lineWidth * 0.5f * P0.w / W; |
| float dy0 = lineWidth * 0.5f * P0.w / H; |
| |
| float dx1 = lineWidth * 0.5f * P1.w / W; |
| float dy1 = lineWidth * 0.5f * P1.w / H; |
| |
| P[0].x += -dx0; |
| C[0] = clipper->computeClipFlags(P[0]); |
| |
| P[1].y += +dy0; |
| C[1] = clipper->computeClipFlags(P[1]); |
| |
| P[2].x += +dx0; |
| C[2] = clipper->computeClipFlags(P[2]); |
| |
| P[3].y += -dy0; |
| C[3] = clipper->computeClipFlags(P[3]); |
| |
| P[4].x += -dx1; |
| C[4] = clipper->computeClipFlags(P[4]); |
| |
| P[5].y += +dy1; |
| C[5] = clipper->computeClipFlags(P[5]); |
| |
| P[6].x += +dx1; |
| C[6] = clipper->computeClipFlags(P[6]); |
| |
| P[7].y += -dy1; |
| C[7] = clipper->computeClipFlags(P[7]); |
| |
| if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE) |
| { |
| float4 L[6]; |
| |
| if(dx > -dy) |
| { |
| if(dx > dy) // Right |
| { |
| L[0] = P[0]; |
| L[1] = P[1]; |
| L[2] = P[5]; |
| L[3] = P[6]; |
| L[4] = P[7]; |
| L[5] = P[3]; |
| } |
| else // Down |
| { |
| L[0] = P[0]; |
| L[1] = P[4]; |
| L[2] = P[5]; |
| L[3] = P[6]; |
| L[4] = P[2]; |
| L[5] = P[3]; |
| } |
| } |
| else |
| { |
| if(dx > dy) // Up |
| { |
| L[0] = P[0]; |
| L[1] = P[1]; |
| L[2] = P[2]; |
| L[3] = P[6]; |
| L[4] = P[7]; |
| L[5] = P[4]; |
| } |
| else // Left |
| { |
| L[0] = P[1]; |
| L[1] = P[2]; |
| L[2] = P[3]; |
| L[3] = P[7]; |
| L[4] = P[4]; |
| L[5] = P[5]; |
| } |
| } |
| |
| Polygon polygon(L, 6); |
| |
| int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags; |
| |
| if(clipFlagsOr != Clipper::CLIP_FINITE) |
| { |
| if(!clipper->clip(polygon, clipFlagsOr, draw)) |
| { |
| return false; |
| } |
| } |
| |
| return setupRoutine(&primitive, &triangle, &polygon, &data); |
| } |
| } |
| |
| return false; |
| } |
| |
| bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw) |
| { |
| const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; |
| const SetupProcessor::State &state = draw.setupState; |
| const DrawData &data = *draw.data; |
| |
| Vertex &v = triangle.v0; |
| |
| float pSize; |
| |
| int pts = state.pointSizeRegister; |
| |
| if(state.pointSizeRegister != Unused) |
| { |
| pSize = v.v[pts].y; |
| } |
| else |
| { |
| pSize = data.point.pointSize[0]; |
| } |
| |
| pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax); |
| |
| float4 P[4]; |
| int C[4]; |
| |
| int pos = state.positionRegister; |
| |
| P[0] = v.v[pos]; |
| P[1] = v.v[pos]; |
| P[2] = v.v[pos]; |
| P[3] = v.v[pos]; |
| |
| const float X = pSize * P[0].w * data.halfPixelX[0]; |
| const float Y = pSize * P[0].w * data.halfPixelY[0]; |
| |
| P[0].x -= X; |
| P[0].y += Y; |
| C[0] = clipper->computeClipFlags(P[0]); |
| |
| P[1].x += X; |
| P[1].y += Y; |
| C[1] = clipper->computeClipFlags(P[1]); |
| |
| P[2].x += X; |
| P[2].y -= Y; |
| C[2] = clipper->computeClipFlags(P[2]); |
| |
| P[3].x -= X; |
| P[3].y -= Y; |
| C[3] = clipper->computeClipFlags(P[3]); |
| |
| triangle.v1 = triangle.v0; |
| triangle.v2 = triangle.v0; |
| |
| triangle.v1.X += iround(16 * 0.5f * pSize); |
| triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner |
| |
| Polygon polygon(P, 4); |
| |
| if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) |
| { |
| int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; |
| |
| if(clipFlagsOr != Clipper::CLIP_FINITE) |
| { |
| if(!clipper->clip(polygon, clipFlagsOr, draw)) |
| { |
| return false; |
| } |
| } |
| |
| return setupRoutine(&primitive, &triangle, &polygon, &data); |
| } |
| |
| return false; |
| } |
| |
| void Renderer::initializeThreads() |
| { |
| unitCount = ceilPow2(threadCount); |
| clusterCount = ceilPow2(threadCount); |
| |
| for(int i = 0; i < unitCount; i++) |
| { |
| triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle)); |
| primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive)); |
| } |
| |
| for(int i = 0; i < threadCount; i++) |
| { |
| vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask)); |
| vertexTask[i]->vertexCache.drawCall = -1; |
| |
| task[i].type = Task::SUSPEND; |
| |
| resume[i] = new Event(); |
| suspend[i] = new Event(); |
| |
| Parameters parameters; |
| parameters.threadIndex = i; |
| parameters.renderer = this; |
| |
| exitThreads = false; |
| worker[i] = new Thread(threadFunction, ¶meters); |
| |
| suspend[i]->wait(); |
| suspend[i]->signal(); |
| } |
| } |
| |
| void Renderer::terminateThreads() |
| { |
| while(threadsAwake != 0) |
| { |
| Thread::sleep(1); |
| } |
| |
| for(int thread = 0; thread < threadCount; thread++) |
| { |
| if(worker[thread]) |
| { |
| exitThreads = true; |
| resume[thread]->signal(); |
| worker[thread]->join(); |
| |
| delete worker[thread]; |
| worker[thread] = 0; |
| delete resume[thread]; |
| resume[thread] = 0; |
| delete suspend[thread]; |
| suspend[thread] = 0; |
| } |
| |
| deallocate(vertexTask[thread]); |
| vertexTask[thread] = 0; |
| } |
| |
| for(int i = 0; i < 16; i++) |
| { |
| deallocate(triangleBatch[i]); |
| triangleBatch[i] = 0; |
| |
| deallocate(primitiveBatch[i]); |
| primitiveBatch[i] = 0; |
| } |
| } |
| |
| void Renderer::loadConstants(const VertexShader *vertexShader) |
| { |
| if(!vertexShader) return; |
| |
| size_t count = vertexShader->getLength(); |
| |
| for(size_t i = 0; i < count; i++) |
| { |
| const Shader::Instruction *instruction = vertexShader->getInstruction(i); |
| |
| if(instruction->opcode == Shader::OPCODE_DEF) |
| { |
| int index = instruction->dst.index; |
| float value[4]; |
| |
| value[0] = instruction->src[0].value[0]; |
| value[1] = instruction->src[0].value[1]; |
| value[2] = instruction->src[0].value[2]; |
| value[3] = instruction->src[0].value[3]; |
| |
| setVertexShaderConstantF(index, value); |
| } |
| else if(instruction->opcode == Shader::OPCODE_DEFI) |
| { |
| int index = instruction->dst.index; |
| int integer[4]; |
| |
| integer[0] = instruction->src[0].integer[0]; |
| integer[1] = instruction->src[0].integer[1]; |
| integer[2] = instruction->src[0].integer[2]; |
| integer[3] = instruction->src[0].integer[3]; |
| |
| setVertexShaderConstantI(index, integer); |
| } |
| else if(instruction->opcode == Shader::OPCODE_DEFB) |
| { |
| int index = instruction->dst.index; |
| int boolean = instruction->src[0].boolean[0]; |
| |
| setVertexShaderConstantB(index, &boolean); |
| } |
| } |
| } |
| |
| void Renderer::loadConstants(const PixelShader *pixelShader) |
| { |
| if(!pixelShader) return; |
| |
| size_t count = pixelShader->getLength(); |
| |
| for(size_t i = 0; i < count; i++) |
| { |
| const Shader::Instruction *instruction = pixelShader->getInstruction(i); |
| |
| if(instruction->opcode == Shader::OPCODE_DEF) |
| { |
| int index = instruction->dst.index; |
| float value[4]; |
| |
| value[0] = instruction->src[0].value[0]; |
| value[1] = instruction->src[0].value[1]; |
| value[2] = instruction->src[0].value[2]; |
| value[3] = instruction->src[0].value[3]; |
| |
| setPixelShaderConstantF(index, value); |
| } |
| else if(instruction->opcode == Shader::OPCODE_DEFI) |
| { |
| int index = instruction->dst.index; |
| int integer[4]; |
| |
| integer[0] = instruction->src[0].integer[0]; |
| integer[1] = instruction->src[0].integer[1]; |
| integer[2] = instruction->src[0].integer[2]; |
| integer[3] = instruction->src[0].integer[3]; |
| |
| setPixelShaderConstantI(index, integer); |
| } |
| else if(instruction->opcode == Shader::OPCODE_DEFB) |
| { |
| int index = instruction->dst.index; |
| int boolean = instruction->src[0].boolean[0]; |
| |
| setPixelShaderConstantB(index, &boolean); |
| } |
| } |
| } |
| |
| void Renderer::setIndexBuffer(Resource *indexBuffer) |
| { |
| context->indexBuffer = indexBuffer; |
| } |
| |
| void Renderer::setMultiSampleMask(unsigned int mask) |
| { |
| context->sampleMask = mask; |
| } |
| |
| void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing) |
| { |
| sw::transparencyAntialiasing = transparencyAntialiasing; |
| } |
| |
| bool Renderer::isReadWriteTexture(int sampler) |
| { |
| for(int index = 0; index < RENDERTARGETS; index++) |
| { |
| if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource()) |
| { |
| return true; |
| } |
| } |
| |
| if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource()) |
| { |
| return true; |
| } |
| |
| return false; |
| } |
| |
| void Renderer::updateClipper() |
| { |
| if(updateClipPlanes) |
| { |
| if(VertexProcessor::isFixedFunction()) // User plane in world space |
| { |
| const Matrix &scissorWorld = getViewTransform(); |
| |
| if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0]; |
| if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1]; |
| if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2]; |
| if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3]; |
| if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4]; |
| if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5]; |
| } |
| else // User plane in clip space |
| { |
| if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0]; |
| if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1]; |
| if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2]; |
| if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3]; |
| if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4]; |
| if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5]; |
| } |
| |
| updateClipPlanes = false; |
| } |
| } |
| |
| void Renderer::setTextureResource(unsigned int sampler, Resource *resource) |
| { |
| ASSERT(sampler < TOTAL_IMAGE_UNITS); |
| |
| context->texture[sampler] = resource; |
| } |
| |
| void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type) |
| { |
| ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS); |
| |
| context->sampler[sampler].setTextureLevel(face, level, surface, type); |
| } |
| |
| void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setTextureFilter(sampler, textureFilter); |
| } |
| else |
| { |
| VertexProcessor::setTextureFilter(sampler, textureFilter); |
| } |
| } |
| |
| void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setMipmapFilter(sampler, mipmapFilter); |
| } |
| else |
| { |
| VertexProcessor::setMipmapFilter(sampler, mipmapFilter); |
| } |
| } |
| |
| void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setGatherEnable(sampler, enable); |
| } |
| else |
| { |
| VertexProcessor::setGatherEnable(sampler, enable); |
| } |
| } |
| |
| void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setAddressingModeU(sampler, addressMode); |
| } |
| else |
| { |
| VertexProcessor::setAddressingModeU(sampler, addressMode); |
| } |
| } |
| |
| void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setAddressingModeV(sampler, addressMode); |
| } |
| else |
| { |
| VertexProcessor::setAddressingModeV(sampler, addressMode); |
| } |
| } |
| |
| void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setAddressingModeW(sampler, addressMode); |
| } |
| else |
| { |
| VertexProcessor::setAddressingModeW(sampler, addressMode); |
| } |
| } |
| |
| void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setReadSRGB(sampler, sRGB); |
| } |
| else |
| { |
| VertexProcessor::setReadSRGB(sampler, sRGB); |
| } |
| } |
| |
| void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setMipmapLOD(sampler, bias); |
| } |
| else |
| { |
| VertexProcessor::setMipmapLOD(sampler, bias); |
| } |
| } |
| |
| void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setBorderColor(sampler, borderColor); |
| } |
| else |
| { |
| VertexProcessor::setBorderColor(sampler, borderColor); |
| } |
| } |
| |
| void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy); |
| } |
| else |
| { |
| VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy); |
| } |
| } |
| |
| void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); |
| } |
| else |
| { |
| VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); |
| } |
| } |
| |
| void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setSwizzleR(sampler, swizzleR); |
| } |
| else |
| { |
| VertexProcessor::setSwizzleR(sampler, swizzleR); |
| } |
| } |
| |
| void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setSwizzleG(sampler, swizzleG); |
| } |
| else |
| { |
| VertexProcessor::setSwizzleG(sampler, swizzleG); |
| } |
| } |
| |
| void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setSwizzleB(sampler, swizzleB); |
| } |
| else |
| { |
| VertexProcessor::setSwizzleB(sampler, swizzleB); |
| } |
| } |
| |
| void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setSwizzleA(sampler, swizzleA); |
| } |
| else |
| { |
| VertexProcessor::setSwizzleA(sampler, swizzleA); |
| } |
| } |
| |
| void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setCompareFunc(sampler, compFunc); |
| } |
| else |
| { |
| VertexProcessor::setCompareFunc(sampler, compFunc); |
| } |
| } |
| |
| void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setBaseLevel(sampler, baseLevel); |
| } |
| else |
| { |
| VertexProcessor::setBaseLevel(sampler, baseLevel); |
| } |
| } |
| |
| void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setMaxLevel(sampler, maxLevel); |
| } |
| else |
| { |
| VertexProcessor::setMaxLevel(sampler, maxLevel); |
| } |
| } |
| |
| void Renderer::setMinLod(SamplerType type, int sampler, float minLod) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setMinLod(sampler, minLod); |
| } |
| else |
| { |
| VertexProcessor::setMinLod(sampler, minLod); |
| } |
| } |
| |
| void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setMaxLod(sampler, maxLod); |
| } |
| else |
| { |
| VertexProcessor::setMaxLod(sampler, maxLod); |
| } |
| } |
| |
| void Renderer::setSyncRequired(SamplerType type, int sampler, bool syncRequired) |
| { |
| if(type == SAMPLER_PIXEL) |
| { |
| PixelProcessor::setSyncRequired(sampler, syncRequired); |
| } |
| else |
| { |
| VertexProcessor::setSyncRequired(sampler, syncRequired); |
| } |
| } |
| |
| void Renderer::setPointSpriteEnable(bool pointSpriteEnable) |
| { |
| context->setPointSpriteEnable(pointSpriteEnable); |
| } |
| |
| void Renderer::setPointScaleEnable(bool pointScaleEnable) |
| { |
| context->setPointScaleEnable(pointScaleEnable); |
| } |
| |
| void Renderer::setLineWidth(float width) |
| { |
| context->lineWidth = width; |
| } |
| |
| void Renderer::setDepthBias(float bias) |
| { |
| context->depthBias = bias; |
| } |
| |
| void Renderer::setSlopeDepthBias(float slopeBias) |
| { |
| context->slopeDepthBias = slopeBias; |
| } |
| |
| void Renderer::setRasterizerDiscard(bool rasterizerDiscard) |
| { |
| context->rasterizerDiscard = rasterizerDiscard; |
| } |
| |
| void Renderer::setPixelShader(const PixelShader *shader) |
| { |
| context->pixelShader = shader; |
| |
| loadConstants(shader); |
| } |
| |
| void Renderer::setVertexShader(const VertexShader *shader) |
| { |
| context->vertexShader = shader; |
| |
| loadConstants(shader); |
| } |
| |
| void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count) |
| { |
| for(unsigned int i = 0; i < DRAW_COUNT; i++) |
| { |
| if(drawCall[i]->psDirtyConstF < index + count) |
| { |
| drawCall[i]->psDirtyConstF = index + count; |
| } |
| } |
| |
| for(unsigned int i = 0; i < count; i++) |
| { |
| PixelProcessor::setFloatConstant(index + i, value); |
| value += 4; |
| } |
| } |
| |
| void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count) |
| { |
| for(unsigned int i = 0; i < DRAW_COUNT; i++) |
| { |
| if(drawCall[i]->psDirtyConstI < index + count) |
| { |
| drawCall[i]->psDirtyConstI = index + count; |
| } |
| } |
| |
| for(unsigned int i = 0; i < count; i++) |
| { |
| PixelProcessor::setIntegerConstant(index + i, value); |
| value += 4; |
| } |
| } |
| |
| void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count) |
| { |
| for(unsigned int i = 0; i < DRAW_COUNT; i++) |
| { |
| if(drawCall[i]->psDirtyConstB < index + count) |
| { |
| drawCall[i]->psDirtyConstB = index + count; |
| } |
| } |
| |
| for(unsigned int i = 0; i < count; i++) |
| { |
| PixelProcessor::setBooleanConstant(index + i, *boolean); |
| boolean++; |
| } |
| } |
| |
| void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count) |
| { |
| for(unsigned int i = 0; i < DRAW_COUNT; i++) |
| { |
| if(drawCall[i]->vsDirtyConstF < index + count) |
| { |
| drawCall[i]->vsDirtyConstF = index + count; |
| } |
| } |
| |
| for(unsigned int i = 0; i < count; i++) |
| { |
| VertexProcessor::setFloatConstant(index + i, value); |
| value += 4; |
| } |
| } |
| |
| void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count) |
| { |
| for(unsigned int i = 0; i < DRAW_COUNT; i++) |
| { |
| if(drawCall[i]->vsDirtyConstI < index + count) |
| { |
| drawCall[i]->vsDirtyConstI = index + count; |
| } |
| } |
| |
| for(unsigned int i = 0; i < count; i++) |
| { |
| VertexProcessor::setIntegerConstant(index + i, value); |
| value += 4; |
| } |
| } |
| |
| void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count) |
| { |
| for(unsigned int i = 0; i < DRAW_COUNT; i++) |
| { |
| if(drawCall[i]->vsDirtyConstB < index + count) |
| { |
| drawCall[i]->vsDirtyConstB = index + count; |
| } |
| } |
| |
| for(unsigned int i = 0; i < count; i++) |
| { |
| VertexProcessor::setBooleanConstant(index + i, *boolean); |
| boolean++; |
| } |
| } |
| |
| void Renderer::setModelMatrix(const Matrix &M, int i) |
| { |
| VertexProcessor::setModelMatrix(M, i); |
| } |
| |
| void Renderer::setViewMatrix(const Matrix &V) |
| { |
| VertexProcessor::setViewMatrix(V); |
| updateClipPlanes = true; |
| } |
| |
| void Renderer::setBaseMatrix(const Matrix &B) |
| { |
| VertexProcessor::setBaseMatrix(B); |
| updateClipPlanes = true; |
| } |
| |
| void Renderer::setProjectionMatrix(const Matrix &P) |
| { |
| VertexProcessor::setProjectionMatrix(P); |
| updateClipPlanes = true; |
| } |
| |
| void Renderer::addQuery(Query *query) |
| { |
| queries.push_back(query); |
| } |
| |
| void Renderer::removeQuery(Query *query) |
| { |
| queries.remove(query); |
| } |
| |
| #if PERF_HUD |
| int Renderer::getThreadCount() |
| { |
| return threadCount; |
| } |
| |
| int64_t Renderer::getVertexTime(int thread) |
| { |
| return vertexTime[thread]; |
| } |
| |
| int64_t Renderer::getSetupTime(int thread) |
| { |
| return setupTime[thread]; |
| } |
| |
| int64_t Renderer::getPixelTime(int thread) |
| { |
| return pixelTime[thread]; |
| } |
| |
| void Renderer::resetTimers() |
| { |
| for(int thread = 0; thread < threadCount; thread++) |
| { |
| vertexTime[thread] = 0; |
| setupTime[thread] = 0; |
| pixelTime[thread] = 0; |
| } |
| } |
| #endif |
| |
| void Renderer::setViewport(const Viewport &viewport) |
| { |
| this->viewport = viewport; |
| } |
| |
| void Renderer::setScissor(const Rect &scissor) |
| { |
| this->scissor = scissor; |
| } |
| |
| void Renderer::setClipFlags(int flags) |
| { |
| clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum |
| } |
| |
| void Renderer::setClipPlane(unsigned int index, const float plane[4]) |
| { |
| if(index < MAX_CLIP_PLANES) |
| { |
| userPlane[index] = plane; |
| } |
| else ASSERT(false); |
| |
| updateClipPlanes = true; |
| } |
| |
| void Renderer::updateConfiguration(bool initialUpdate) |
| { |
| bool newConfiguration = swiftConfig->hasNewConfiguration(); |
| |
| if(newConfiguration || initialUpdate) |
| { |
| terminateThreads(); |
| |
| SwiftConfig::Configuration configuration = {}; |
| swiftConfig->getConfiguration(configuration); |
| |
| precacheVertex = !newConfiguration && configuration.precache; |
| precacheSetup = !newConfiguration && configuration.precache; |
| precachePixel = !newConfiguration && configuration.precache; |
| |
| VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize); |
| PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize); |
| SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize); |
| |
| switch(configuration.textureSampleQuality) |
| { |
| case 0: Sampler::setFilterQuality(FILTER_POINT); break; |
| case 1: Sampler::setFilterQuality(FILTER_LINEAR); break; |
| case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; |
| default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; |
| } |
| |
| switch(configuration.mipmapQuality) |
| { |
| case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break; |
| case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; |
| default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; |
| } |
| |
| setPerspectiveCorrection(configuration.perspectiveCorrection); |
| |
| switch(configuration.transcendentalPrecision) |
| { |
| case 0: |
| logPrecision = APPROXIMATE; |
| expPrecision = APPROXIMATE; |
| rcpPrecision = APPROXIMATE; |
| rsqPrecision = APPROXIMATE; |
| break; |
| case 1: |
| logPrecision = PARTIAL; |
| expPrecision = PARTIAL; |
| rcpPrecision = PARTIAL; |
| rsqPrecision = PARTIAL; |
| break; |
| case 2: |
| logPrecision = ACCURATE; |
| expPrecision = ACCURATE; |
| rcpPrecision = ACCURATE; |
| rsqPrecision = ACCURATE; |
| break; |
| case 3: |
| logPrecision = WHQL; |
| expPrecision = WHQL; |
| rcpPrecision = WHQL; |
| rsqPrecision = WHQL; |
| break; |
| case 4: |
| logPrecision = IEEE; |
| expPrecision = IEEE; |
| rcpPrecision = IEEE; |
| rsqPrecision = IEEE; |
| break; |
| default: |
| logPrecision = ACCURATE; |
| expPrecision = ACCURATE; |
| rcpPrecision = ACCURATE; |
| rsqPrecision = ACCURATE; |
| break; |
| } |
| |
| switch(configuration.transparencyAntialiasing) |
| { |
| case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break; |
| case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break; |
| default: transparencyAntialiasing = TRANSPARENCY_NONE; break; |
| } |
| |
| switch(configuration.threadCount) |
| { |
| case -1: threadCount = CPUID::coreCount(); break; |
| case 0: threadCount = CPUID::processAffinity(); break; |
| default: threadCount = configuration.threadCount; break; |
| } |
| |
| CPUID::setEnableSSE4_1(configuration.enableSSE4_1); |
| CPUID::setEnableSSSE3(configuration.enableSSSE3); |
| CPUID::setEnableSSE3(configuration.enableSSE3); |
| CPUID::setEnableSSE2(configuration.enableSSE2); |
| CPUID::setEnableSSE(configuration.enableSSE); |
| |
| for(int pass = 0; pass < 10; pass++) |
| { |
| optimization[pass] = configuration.optimization[pass]; |
| } |
| |
| forceWindowed = configuration.forceWindowed; |
| complementaryDepthBuffer = configuration.complementaryDepthBuffer; |
| postBlendSRGB = configuration.postBlendSRGB; |
| exactColorRounding = configuration.exactColorRounding; |
| forceClearRegisters = configuration.forceClearRegisters; |
| |
| #ifndef NDEBUG |
| minPrimitives = configuration.minPrimitives; |
| maxPrimitives = configuration.maxPrimitives; |
| #endif |
| } |
| |
| if(!initialUpdate && !worker[0]) |
| { |
| initializeThreads(); |
| } |
| } |
| } |