Merge changes I10d66439,I6bd62e7b,Iec85e0df,If16c5d11,If803398e, ...
* changes:
Update SPIRV-Tools to 9559cdbd
Squashed 'third_party/SPIRV-Tools/' changes from d14db341b..9559cdbdf
Update SPIRV-Headers to e4322e3b
Squashed 'third_party/SPIRV-Headers/' changes from 79b6681aa..e4322e3be
Temporarily disable warnings-as-errors
Kokoro (Windows): Switch to python3
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c9c0902..bf8a56b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -396,9 +396,13 @@
set_cpp_flag("-fPIC")
endif()
- if(LINUX)
+ if(WIN32)
+ set_cpp_flag("-DVK_USE_PLATFORM_WIN32_KHR")
+ elseif(LINUX)
set_cpp_flag("-DUSE_X11=1")
set_cpp_flag("-DVK_USE_PLATFORM_XLIB_KHR")
+ elseif(APPLE)
+ set_cpp_flag("-DVK_USE_PLATFORM_MACOS_MVK")
endif()
# Use -g3 to have even more debug info
@@ -1533,7 +1537,6 @@
${SOURCE_DIR}/Reactor/Reactor.cpp
${SOURCE_DIR}/Reactor/Reactor.hpp
${SOURCE_DIR}/Reactor/SubzeroReactor.cpp
- ${SOURCE_DIR}/Reactor/Routine.cpp
${SOURCE_DIR}/Reactor/Optimizer.cpp
${SOURCE_DIR}/Reactor/Nucleus.hpp
${SOURCE_DIR}/Reactor/Routine.hpp
@@ -1668,7 +1671,6 @@
${SOURCE_DIR}/Reactor/LLVMReactorDebugInfo.cpp
${SOURCE_DIR}/Reactor/LLVMReactorDebugInfo.hpp
${SOURCE_DIR}/Reactor/Nucleus.hpp
- ${SOURCE_DIR}/Reactor/Routine.cpp
${SOURCE_DIR}/Reactor/Routine.hpp
${SOURCE_DIR}/Reactor/CPUID.cpp
${SOURCE_DIR}/Reactor/CPUID.hpp
@@ -1814,6 +1816,11 @@
list(APPEND OPENGL_COMPILER_LIST
${OPENGL_COMPILER_DIR}/ossource_posix.cpp
)
+
+ list(APPEND VULKAN_LIST
+ ${SOURCE_DIR}/WSI/MacOSSurfaceMVK.mm
+ ${SOURCE_DIR}/WSI/MacOSSurfaceMVK.h
+ )
elseif(ANDROID)
list(APPEND SWIFTSHADER_LIST
${SOURCE_DIR}/Main/FrameBufferAndroid.cpp
diff --git a/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj b/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj
index 956f1fa..458f73c 100644
--- a/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj
+++ b/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj
@@ -127,7 +127,6 @@
<ClCompile Include="$(SolutionDir)src\Reactor\LLVMReactorDebugInfo.cpp" />
<ClInclude Include="$(SolutionDir)src\Reactor\LLVMReactorDebugInfo.hpp" />
<ClInclude Include="$(SolutionDir)src\Reactor\Nucleus.hpp" />
- <ClCompile Include="$(SolutionDir)src\Reactor\Routine.cpp" />
<ClInclude Include="$(SolutionDir)src\Reactor\Routine.hpp" />
<ClCompile Include="$(SolutionDir)src\Reactor\CPUID.cpp" />
<ClInclude Include="$(SolutionDir)src\Reactor\CPUID.hpp" />
diff --git a/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj.filters b/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj.filters
index df9efd5..e31e252 100644
--- a/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj.filters
+++ b/build/Visual Studio 15 2017 Win64/ReactorLLVM.vcxproj.filters
@@ -10,9 +10,6 @@
<ClCompile Include="$(SolutionDir)src\Reactor\LLVMReactorDebugInfo.cpp">
<Filter>src\Reactor</Filter>
</ClCompile>
- <ClCompile Include="$(SolutionDir)src\Reactor\Routine.cpp">
- <Filter>src\Reactor</Filter>
- </ClCompile>
<ClCompile Include="$(SolutionDir)src\Reactor\CPUID.cpp">
<Filter>src\Reactor</Filter>
</ClCompile>
diff --git a/docs/Reactor.md b/docs/Reactor.md
index 7085cc8..df016d7 100644
--- a/docs/Reactor.md
+++ b/docs/Reactor.md
@@ -61,7 +61,7 @@
The Routine is obtained and materialized by "calling" the ```Function<>``` object to give it a name:
```C++
-Routine *routine = function("one");
+auto routine = function("one");
```
Finally, we can obtain the function pointer to the entry point of the routine, and call it:
@@ -84,9 +84,9 @@
{
Int x = function.Arg<0>();
Int y = function.Arg<1>();
-
+
Int sum = x + y;
-
+
Return(sum);
}
```
@@ -119,9 +119,9 @@
Function<Int(Float)> function;
{
Float x = function.Arg<0>();
-
+
Int cast = Int(x);
-
+
Return(cast);
}
```
@@ -132,9 +132,9 @@
Function<Int(Float)> function;
{
Float x = function.Arg<0>();
-
+
Int reinterpret = As<Int>(x);
-
+
Return(reinterpret);
}
```
@@ -185,7 +185,7 @@
Function<Float(Float)> function;
{
Pointer<Float> x = function.Arg<0>();
-
+
If(x > 0.0f)
{
Return(1.0f);
diff --git a/src/Android.bp b/src/Android.bp
index 554a533..8f709bc 100644
--- a/src/Android.bp
+++ b/src/Android.bp
@@ -176,7 +176,6 @@
srcs: [
"Reactor/Reactor.cpp",
"Reactor/LLVMReactor.cpp",
- "Reactor/Routine.cpp",
"Reactor/Debug.cpp",
],
@@ -196,7 +195,6 @@
srcs: [
"Reactor/Reactor.cpp",
"Reactor/LLVMReactor.cpp",
- "Reactor/Routine.cpp",
"Reactor/Debug.cpp",
],
@@ -223,7 +221,6 @@
srcs: [
"Reactor/Reactor.cpp",
"Reactor/SubzeroReactor.cpp",
- "Reactor/Routine.cpp",
"Reactor/Optimizer.cpp",
"Reactor/Debug.cpp",
],
diff --git a/src/Android.mk b/src/Android.mk
index 4bcd422..4777003 100644
--- a/src/Android.mk
+++ b/src/Android.mk
@@ -57,7 +57,6 @@
COMMON_SRC_FILES += \
Reactor/Reactor.cpp \
- Reactor/Routine.cpp \
Reactor/Debug.cpp \
Reactor/DebugAndroid.cpp \
Reactor/ExecutableMemory.cpp
diff --git a/src/Common/Timer.cpp b/src/Common/Timer.cpp
index db0ba4a..0fa339f 100644
--- a/src/Common/Timer.cpp
+++ b/src/Common/Timer.cpp
@@ -65,9 +65,7 @@
return __rdtsc();
#endif
#elif defined(__i386__) || defined(__x86_64__)
- int64_t tsc;
- __asm volatile("rdtsc": "=A" (tsc));
- return tsc;
+ return __builtin_ia32_rdtsc();
#else
return 0;
#endif
diff --git a/src/D3D9/Direct3DDevice9.cpp b/src/D3D9/Direct3DDevice9.cpp
index 71f5f53..2f69420 100644
--- a/src/D3D9/Direct3DDevice9.cpp
+++ b/src/D3D9/Direct3DDevice9.cpp
@@ -1830,7 +1830,7 @@
void *destBuffer = dest->lockExternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
static void (__cdecl *blitFunction)(void *dst, void *src);
- static sw::Routine *blitRoutine;
+ static std::shared_ptr<sw::Routine> blitRoutine;
static sw::BlitState blitState = {};
sw::BlitState update;
@@ -1846,8 +1846,6 @@
if(memcmp(&blitState, &update, sizeof(sw::BlitState)) != 0)
{
blitState = update;
- delete blitRoutine;
-
blitRoutine = sw::FrameBuffer::copyRoutine(blitState);
blitFunction = (void(__cdecl*)(void*, void*))blitRoutine->getEntry();
}
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index 2ca5f3f..f6d714f 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -52,7 +52,7 @@
}
State state(format, dstFormat, 1, dest->getSampleCountFlagBits(), { 0xF });
- Routine *blitRoutine = getBlitRoutine(state);
+ auto blitRoutine = getBlitRoutine(state);
if(!blitRoutine)
{
return;
@@ -1304,7 +1304,7 @@
return s;
}
- Routine *Blitter::generate(const State &state)
+ std::shared_ptr<Routine> Blitter::generate(const State &state)
{
Function<Void(Pointer<Byte>)> function;
{
@@ -1535,13 +1535,13 @@
}
}
- return function(vk::ReactorOptimizationLevel, "BlitRoutine");
+ return function("BlitRoutine");
}
- Routine *Blitter::getBlitRoutine(const State &state)
+ std::shared_ptr<Routine> Blitter::getBlitRoutine(const State &state)
{
std::unique_lock<std::mutex> lock(blitMutex);
- Routine *blitRoutine = blitCache.query(state);
+ auto blitRoutine = blitCache.query(state);
if(!blitRoutine)
{
@@ -1559,10 +1559,10 @@
return blitRoutine;
}
- Routine *Blitter::getCornerUpdateRoutine(const State &state)
+ std::shared_ptr<Routine> Blitter::getCornerUpdateRoutine(const State &state)
{
std::unique_lock<std::mutex> lock(cornerUpdateMutex);
- Routine *cornerUpdateRoutine = cornerUpdateCache.query(state);
+ auto cornerUpdateRoutine = cornerUpdateCache.query(state);
if(!cornerUpdateRoutine)
{
@@ -1587,7 +1587,7 @@
State state(format, format.getNonQuadLayoutFormat(), VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT,
{false, false});
- Routine *blitRoutine = getBlitRoutine(state);
+ auto blitRoutine = getBlitRoutine(state);
if(!blitRoutine)
{
return;
@@ -1653,7 +1653,7 @@
State state(format.getNonQuadLayoutFormat(), format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT,
{false, false});
- Routine *blitRoutine = getBlitRoutine(state);
+ auto blitRoutine = getBlitRoutine(state);
if(!blitRoutine)
{
return;
@@ -1760,7 +1760,7 @@
(static_cast<uint32_t>(region.srcOffsets[1].y) > srcExtent.height) ||
(doFilter && ((x0 < 0.5f) || (y0 < 0.5f)));
- Routine *blitRoutine = getBlitRoutine(state);
+ auto blitRoutine = getBlitRoutine(state);
if(!blitRoutine)
{
return;
@@ -1857,7 +1857,7 @@
write(c0, layer + ComputeOffset(x0, y0, pitchB, bytes, quadLayout), state);
}
- Routine *Blitter::generateCornerUpdate(const State& state)
+ std::shared_ptr<Routine> Blitter::generateCornerUpdate(const State& state)
{
// Reading and writing from/to the same image
ASSERT(state.sourceFormat == state.destFormat);
@@ -1890,7 +1890,7 @@
}
}
- return function(vk::ReactorOptimizationLevel, "BlitRoutine");
+ return function("BlitRoutine");
}
void Blitter::updateBorders(vk::Image* image, const VkImageSubresourceLayers& subresourceLayers)
@@ -1958,7 +1958,7 @@
UNIMPLEMENTED("Multi-sampled cube: %d samples", static_cast<int>(samples));
}
- Routine *cornerUpdateRoutine = getCornerUpdateRoutine(state);
+ auto cornerUpdateRoutine = getCornerUpdateRoutine(state);
if(!cornerUpdateRoutine)
{
return;
diff --git a/src/Device/Blitter.hpp b/src/Device/Blitter.hpp
index b95f14c..c8cddf3 100644
--- a/src/Device/Blitter.hpp
+++ b/src/Device/Blitter.hpp
@@ -134,10 +134,10 @@
static Int ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout);
static Float4 LinearToSRGB(Float4 &color);
static Float4 sRGBtoLinear(Float4 &color);
- Routine *getBlitRoutine(const State &state);
- Routine *generate(const State &state);
- Routine *getCornerUpdateRoutine(const State &state);
- Routine *generateCornerUpdate(const State& state);
+ std::shared_ptr<Routine> getBlitRoutine(const State &state);
+ std::shared_ptr<Routine> generate(const State &state);
+ std::shared_ptr<Routine> getCornerUpdateRoutine(const State &state);
+ std::shared_ptr<Routine> generateCornerUpdate(const State& state);
void computeCubeCorner(Pointer<Byte>& layer, Int& x0, Int& x1, Int& y0, Int& y1, Int& pitchB, const State& state);
void copyCubeEdge(vk::Image* image,
diff --git a/src/Device/Config.hpp b/src/Device/Config.hpp
index ecadc59..3203c14 100644
--- a/src/Device/Config.hpp
+++ b/src/Device/Config.hpp
@@ -49,14 +49,6 @@
{
OUTLINE_RESOLUTION = 8192, // Maximum vertical resolution of the render target
MIPMAP_LEVELS = 14,
- FRAGMENT_UNIFORM_VECTORS = 264,
- VERTEX_UNIFORM_VECTORS = 259,
- MAX_VERTEX_INPUTS = 32,
- MAX_VERTEX_OUTPUTS = 34,
- MAX_FRAGMENT_INPUTS = 32,
- MAX_FRAGMENT_UNIFORM_BLOCKS = 12,
- MAX_VERTEX_UNIFORM_BLOCKS = 12,
- MAX_UNIFORM_BUFFER_BINDINGS = MAX_FRAGMENT_UNIFORM_BLOCKS + MAX_VERTEX_UNIFORM_BLOCKS, // Limited to 127 by SourceParameter.bufferIndex in Shader.hpp
MAX_UNIFORM_BLOCK_SIZE = 16384,
MAX_CLIP_PLANES = 6,
MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS = 64,
@@ -65,8 +57,7 @@
MAX_PROGRAM_TEXEL_OFFSET = 7,
MAX_TEXTURE_LOD = MIPMAP_LEVELS - 2, // Trilinear accesses lod+1
RENDERTARGETS = 8,
- NUM_TEMPORARY_REGISTERS = 4096,
- MAX_INTERFACE_COMPONENTS = 32 * 4, // Must be multiple of 4 for 16-byte alignment.
+ MAX_INTERFACE_COMPONENTS = 16 * 4, // Must be multiple of 4 for 16-byte alignment.
};
}
diff --git a/src/Device/Context.cpp b/src/Device/Context.cpp
index 08b7139..ef3e123 100644
--- a/src/Device/Context.cpp
+++ b/src/Device/Context.cpp
@@ -86,7 +86,7 @@
void Context::init()
{
// Set vertex streams to null stream
- for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
+ for(int i = 0; i < MAX_INTERFACE_COMPONENTS/4; i++)
{
input[i].defaults();
}
diff --git a/src/Device/Context.hpp b/src/Device/Context.hpp
index 0957aeb..9a2864a 100644
--- a/src/Device/Context.hpp
+++ b/src/Device/Context.hpp
@@ -81,7 +81,7 @@
vk::DescriptorSet::Bindings descriptorSets = {};
vk::DescriptorSet::DynamicOffsets descriptorDynamicOffsets = {};
- Stream input[MAX_VERTEX_INPUTS];
+ Stream input[MAX_INTERFACE_COMPONENTS / 4];
void *indexBuffer;
vk::ImageView *renderTarget[RENDERTARGETS];
diff --git a/src/Device/LRUCache.hpp b/src/Device/LRUCache.hpp
index 180b5b9..9f6c120 100644
--- a/src/Device/LRUCache.hpp
+++ b/src/Device/LRUCache.hpp
@@ -19,6 +19,7 @@
#include <cstring>
#include <type_traits>
+#include <unordered_map>
namespace sw
{
@@ -28,15 +29,15 @@
public:
LRUCache(int n);
- ~LRUCache();
+ virtual ~LRUCache();
- Data *query(const Key &key) const;
- Data *add(const Key &key, Data *data);
+ Data query(const Key &key) const;
+ virtual Data add(const Key &key, const Data &data);
int getSize() {return size;}
Key &getKey(int i) {return key[i];}
- private:
+ protected:
int size;
int mask;
int top;
@@ -44,7 +45,30 @@
Key *key;
Key **ref;
- Data **data;
+ Data *data;
+ };
+
+ template<class Key, class Data>
+ class LRUConstCache : public LRUCache<Key, Data>
+ {
+ using LRUBase = LRUCache<Key, Data>;
+ public:
+ LRUConstCache(int n) : LRUBase(n) {}
+ ~LRUConstCache() { clearConstCache(); }
+
+ Data add(const Key &key, const Data& data) override
+ {
+ constCacheNeedsUpdate = true;
+ return LRUBase::add(key, data);
+ }
+
+ void updateConstCache();
+ Data queryConstCache(const Key &key) const;
+
+ private:
+ void clearConstCache();
+ bool constCacheNeedsUpdate = false;
+ std::unordered_map<Key, Data> constCache;
};
// Helper class for clearing the memory of objects at construction.
@@ -100,12 +124,10 @@
key = new Key[size];
ref = new Key*[size];
- data = new Data*[size];
+ data = new Data[size];
for(int i = 0; i < size; i++)
{
- data[i] = nullptr;
-
ref[i] = &key[i];
}
}
@@ -119,21 +141,12 @@
delete[] ref;
ref = nullptr;
- for(int i = 0; i < size; i++)
- {
- if(data[i])
- {
- data[i]->unbind();
- data[i] = nullptr;
- }
- }
-
delete[] data;
data = nullptr;
}
template<class Key, class Data>
- Data *LRUCache<Key, Data>::query(const Key &key) const
+ Data LRUCache<Key, Data>::query(const Key &key) const
{
for(int i = top; i > top - fill; i--)
{
@@ -141,14 +154,14 @@
if(key == *ref[j])
{
- Data *hit = data[j];
+ Data hit = data[j];
if(i != top)
{
// Move one up
int k = (j + 1) & mask;
- Data *swapD = data[k];
+ Data swapD = data[k];
data[k] = data[j];
data[j] = swapD;
@@ -165,24 +178,48 @@
}
template<class Key, class Data>
- Data *LRUCache<Key, Data>::add(const Key &key, Data *data)
+ Data LRUCache<Key, Data>::add(const Key &key, const Data &data)
{
top = (top + 1) & mask;
fill = fill + 1 < size ? fill + 1 : size;
*ref[top] = key;
-
- data->bind();
-
- if(this->data[top])
- {
- this->data[top]->unbind();
- }
-
this->data[top] = data;
return data;
}
+
+ template<class Key, class Data>
+ void LRUConstCache<Key, Data>::clearConstCache()
+ {
+ constCache.clear();
+ }
+
+ template<class Key, class Data>
+ void LRUConstCache<Key, Data>::updateConstCache()
+ {
+ if(constCacheNeedsUpdate)
+ {
+ clearConstCache();
+
+ for(int i = 0; i < LRUBase::size; i++)
+ {
+ if(LRUBase::data[i])
+ {
+ constCache[*LRUBase::ref[i]] = LRUBase::data[i];
+ }
+ }
+
+ constCacheNeedsUpdate = false;
+ }
+ }
+
+ template<class Key, class Data>
+ Data LRUConstCache<Key, Data>::queryConstCache(const Key &key) const
+ {
+ auto it = constCache.find(key);
+ return (it != constCache.end()) ? it->second : nullptr;
+ }
}
#endif // sw_LRUCache_hpp
diff --git a/src/Device/PixelProcessor.cpp b/src/Device/PixelProcessor.cpp
index 51f4517..d6593b2 100644
--- a/src/Device/PixelProcessor.cpp
+++ b/src/Device/PixelProcessor.cpp
@@ -227,18 +227,18 @@
return state;
}
- Routine *PixelProcessor::routine(const State &state,
+ std::shared_ptr<Routine> PixelProcessor::routine(const State &state,
vk::PipelineLayout const *pipelineLayout,
SpirvShader const *pixelShader,
const vk::DescriptorSet::Bindings &descriptorSets)
{
- Routine *routine = routineCache->query(state);
+ auto routine = routineCache->query(state);
if(!routine)
{
QuadRasterizer *generator = new PixelProgram(state, pipelineLayout, pixelShader, descriptorSets);
generator->generate();
- routine = (*generator)(vk::ReactorOptimizationLevel, "PixelRoutine_%0.8X", state.shaderID);
+ routine = (*generator)("PixelRoutine_%0.8X", state.shaderID);
delete generator;
routineCache->add(state, routine);
diff --git a/src/Device/PixelProcessor.hpp b/src/Device/PixelProcessor.hpp
index e48f677..17cda85 100644
--- a/src/Device/PixelProcessor.hpp
+++ b/src/Device/PixelProcessor.hpp
@@ -129,8 +129,8 @@
protected:
const State update(const Context* context) const;
- Routine *routine(const State &state, vk::PipelineLayout const *pipelineLayout,
- SpirvShader const *pixelShader, const vk::DescriptorSet::Bindings &descriptorSets);
+ std::shared_ptr<Routine> routine(const State &state, vk::PipelineLayout const *pipelineLayout,
+ SpirvShader const *pixelShader, const vk::DescriptorSet::Bindings &descriptorSets);
void setRoutineCacheSize(int routineCacheSize);
// Other semi-constants
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index ddba431..f37e261 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -26,6 +26,7 @@
#include "System/Timer.hpp"
#include "Vulkan/VkConfig.h"
#include "Vulkan/VkDebug.hpp"
+#include "Vulkan/VkDevice.hpp"
#include "Vulkan/VkFence.hpp"
#include "Vulkan/VkImageView.hpp"
#include "Vulkan/VkQueryPool.hpp"
@@ -162,7 +163,7 @@
deallocate(data);
}
- Renderer::Renderer()
+ Renderer::Renderer(vk::Device* device) : device(device)
{
for(int i = 0; i < 16; i++)
{
@@ -339,10 +340,6 @@
draw->indexType = indexType;
draw->batchSize = batch;
- vertexRoutine->bind();
- setupRoutine->bind();
- pixelRoutine->bind();
-
draw->vertexRoutine = vertexRoutine;
draw->setupRoutine = setupRoutine;
draw->pixelRoutine = pixelRoutine;
@@ -363,7 +360,7 @@
ASSERT(!draw->events);
draw->events = events;
- for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
+ for(int i = 0; i < MAX_INTERFACE_COMPONENTS/4; i++)
{
data->input[i] = context->input[i].buffer;
data->stride[i] = context->input[i].vertexStride;
@@ -733,6 +730,7 @@
void Renderer::synchronize()
{
sync.wait();
+ device->updateSamplingRoutineConstCache();
}
void Renderer::finishRendering(Task &pixelTask)
@@ -785,9 +783,9 @@
draw.queries = nullptr;
}
- draw.vertexRoutine->unbind();
- draw.setupRoutine->unbind();
- draw.pixelRoutine->unbind();
+ draw.vertexRoutine.reset();
+ draw.setupRoutine.reset();
+ draw.pixelRoutine.reset();
if(draw.events)
{
@@ -1196,12 +1194,6 @@
P[3].y -= Y;
C[3] = Clipper::ComputeClipFlags(P[3]);
- triangle.v1 = triangle.v0;
- triangle.v2 = triangle.v0;
-
- triangle.v1.projected.x += iround(16 * 0.5f * pSize);
- triangle.v2.projected.y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
-
Polygon polygon(P, 4);
if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
@@ -1216,6 +1208,11 @@
}
}
+ triangle.v1 = triangle.v0;
+ triangle.v2 = triangle.v0;
+
+ triangle.v1.projected.x += iround(16 * 0.5f * pSize);
+ triangle.v2.projected.y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
return setupRoutine(&primitive, &triangle, &polygon, &data);
}
diff --git a/src/Device/Renderer.hpp b/src/Device/Renderer.hpp
index 85367f1..2ae2ca5 100644
--- a/src/Device/Renderer.hpp
+++ b/src/Device/Renderer.hpp
@@ -32,6 +32,7 @@
namespace vk
{
class DescriptorSet;
+ class Device;
class Query;
}
@@ -52,8 +53,8 @@
vk::DescriptorSet::Bindings descriptorSets = {};
vk::DescriptorSet::DynamicOffsets descriptorDynamicOffsets = {};
- const void *input[MAX_VERTEX_INPUTS];
- unsigned int stride[MAX_VERTEX_INPUTS];
+ const void *input[MAX_INTERFACE_COMPONENTS / 4];
+ unsigned int stride[MAX_INTERFACE_COMPONENTS / 4];
const void *indices;
int instanceID;
@@ -156,7 +157,7 @@
};
public:
- Renderer();
+ Renderer(vk::Device* device);
virtual ~Renderer();
@@ -251,9 +252,11 @@
SetupProcessor::State setupState;
PixelProcessor::State pixelState;
- Routine *vertexRoutine;
- Routine *setupRoutine;
- Routine *pixelRoutine;
+ std::shared_ptr<Routine> vertexRoutine;
+ std::shared_ptr<Routine> setupRoutine;
+ std::shared_ptr<Routine> pixelRoutine;
+
+ vk::Device* device;
};
struct DrawCall
@@ -266,9 +269,9 @@
std::atomic<int> indexType;
std::atomic<int> batchSize;
- Routine *vertexRoutine;
- Routine *setupRoutine;
- Routine *pixelRoutine;
+ std::shared_ptr<Routine> vertexRoutine;
+ std::shared_ptr<Routine> setupRoutine;
+ std::shared_ptr<Routine> pixelRoutine;
VertexProcessor::RoutinePointer vertexPointer;
SetupProcessor::RoutinePointer setupPointer;
diff --git a/src/Device/RoutineCache.hpp b/src/Device/RoutineCache.hpp
index 8420468..61f635a 100644
--- a/src/Device/RoutineCache.hpp
+++ b/src/Device/RoutineCache.hpp
@@ -24,7 +24,7 @@
using namespace rr;
template<class State>
- using RoutineCache = LRUCache<State, Routine>;
+ using RoutineCache = LRUCache<State, std::shared_ptr<Routine>>;
}
#endif // sw_RoutineCache_hpp
diff --git a/src/Device/SetupProcessor.cpp b/src/Device/SetupProcessor.cpp
index 8294514..abf4c36 100644
--- a/src/Device/SetupProcessor.cpp
+++ b/src/Device/SetupProcessor.cpp
@@ -94,9 +94,9 @@
return state;
}
- Routine *SetupProcessor::routine(const State &state)
+ std::shared_ptr<Routine> SetupProcessor::routine(const State &state)
{
- Routine *routine = routineCache->query(state);
+ auto routine = routineCache->query(state);
if(!routine)
{
diff --git a/src/Device/SetupProcessor.hpp b/src/Device/SetupProcessor.hpp
index c82b8b5..a84f818 100644
--- a/src/Device/SetupProcessor.hpp
+++ b/src/Device/SetupProcessor.hpp
@@ -67,7 +67,7 @@
protected:
State update(const sw::Context* context) const;
- Routine *routine(const State &state);
+ std::shared_ptr<Routine> routine(const State &state);
void setRoutineCacheSize(int cacheSize);
diff --git a/src/Device/VertexProcessor.cpp b/src/Device/VertexProcessor.cpp
index 76daf76..c6e5c13 100644
--- a/src/Device/VertexProcessor.cpp
+++ b/src/Device/VertexProcessor.cpp
@@ -79,7 +79,7 @@
state.shaderID = context->vertexShader->getSerialID();
- for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
+ for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++)
{
state.input[i].type = context->input[i].type;
state.input[i].count = context->input[i].count;
@@ -94,18 +94,18 @@
return state;
}
- Routine *VertexProcessor::routine(const State &state,
- vk::PipelineLayout const *pipelineLayout,
- SpirvShader const *vertexShader,
- const vk::DescriptorSet::Bindings &descriptorSets)
+ std::shared_ptr<Routine> VertexProcessor::routine(const State &state,
+ vk::PipelineLayout const *pipelineLayout,
+ SpirvShader const *vertexShader,
+ const vk::DescriptorSet::Bindings &descriptorSets)
{
- Routine *routine = routineCache->query(state);
+ auto routine = routineCache->query(state);
if(!routine) // Create one
{
VertexRoutine *generator = new VertexProgram(state, pipelineLayout, vertexShader, descriptorSets);
generator->generate();
- routine = (*generator)(vk::ReactorOptimizationLevel, "VertexRoutine_%0.8X", state.shaderID);
+ routine = (*generator)("VertexRoutine_%0.8X", state.shaderID);
delete generator;
routineCache->add(state, routine);
diff --git a/src/Device/VertexProcessor.hpp b/src/Device/VertexProcessor.hpp
index ce91582..24a9b3f 100644
--- a/src/Device/VertexProcessor.hpp
+++ b/src/Device/VertexProcessor.hpp
@@ -70,7 +70,7 @@
unsigned int attribType : BITS(SpirvShader::ATTRIBTYPE_LAST);
};
- Input input[MAX_VERTEX_INPUTS];
+ Input input[MAX_INTERFACE_COMPONENTS / 4];
};
struct State : States
@@ -88,8 +88,8 @@
protected:
const State update(const sw::Context* context);
- Routine *routine(const State &state, vk::PipelineLayout const *pipelineLayout,
- SpirvShader const *vertexShader, const vk::DescriptorSet::Bindings &descriptorSets);
+ std::shared_ptr<Routine> routine(const State &state, vk::PipelineLayout const *pipelineLayout,
+ SpirvShader const *vertexShader, const vk::DescriptorSet::Bindings &descriptorSets);
void setRoutineCacheSize(int cacheSize);
diff --git a/src/Main/FrameBuffer.cpp b/src/Main/FrameBuffer.cpp
index 7d2e6a0..5e4f6c3 100644
--- a/src/Main/FrameBuffer.cpp
+++ b/src/Main/FrameBuffer.cpp
@@ -66,8 +66,6 @@
blitThread->join();
delete blitThread;
}
-
- delete blitRoutine;
}
void FrameBuffer::setCursorImage(sw::Surface *cursorImage)
@@ -154,8 +152,6 @@
if(memcmp(&blitState, &updateState, sizeof(BlitState)) != 0)
{
blitState = updateState;
- delete blitRoutine;
-
blitRoutine = copyRoutine(blitState);
blitFunction = (void(*)(void*, void*, Cursor*))blitRoutine->getEntry();
}
@@ -163,7 +159,7 @@
blitFunction(framebuffer, renderbuffer, &cursor);
}
- Routine *FrameBuffer::copyRoutine(const BlitState &state)
+ std::shared_ptr<Routine> FrameBuffer::copyRoutine(const BlitState &state)
{
const int width = state.width;
const int height = state.height;
diff --git a/src/Main/FrameBuffer.hpp b/src/Main/FrameBuffer.hpp
index f07feb3..94a1890 100644
--- a/src/Main/FrameBuffer.hpp
+++ b/src/Main/FrameBuffer.hpp
@@ -54,7 +54,7 @@
static void setCursorOrigin(int x0, int y0);
static void setCursorPosition(int x, int y);
- static Routine *copyRoutine(const BlitState &state);
+ static std::shared_ptr<Routine> copyRoutine(const BlitState &state);
protected:
void copy(sw::Surface *source);
@@ -90,7 +90,7 @@
static Cursor cursor;
void (*blitFunction)(void *dst, void *src, Cursor *cursor);
- Routine *blitRoutine;
+ std::shared_ptr<Routine> blitRoutine;
BlitState blitState; // State of the current blitRoutine.
BlitState updateState; // State of the routine to be generated.
diff --git a/src/Main/SwiftConfig.cpp b/src/Main/SwiftConfig.cpp
index aa17aa8..5876054 100644
--- a/src/Main/SwiftConfig.cpp
+++ b/src/Main/SwiftConfig.cpp
@@ -401,19 +401,20 @@
html += "<h2><em>Compiler optimizations</em></h2>\n";
html += "<table>\n";
- for(int pass = 0; pass < 10; pass++)
+ for(size_t pass = 0; pass < config.optimization.size(); pass++)
{
html += "<tr><td>Optimization pass " + itoa(pass + 1) + ":</td><td><select name='optimization" + itoa(pass + 1) + "' title='An optimization pass for the shader compiler.'>\n";
- html += "<option value='0'" + (config.optimization[pass] == 0 ? selected : empty) + ">Disabled" + (pass > 0 ? " (default)" : "") + "</option>\n";
- html += "<option value='1'" + (config.optimization[pass] == 1 ? selected : empty) + ">Instruction Combining" + (pass == 0 ? " (default)" : "") + "</option>\n";
- html += "<option value='2'" + (config.optimization[pass] == 2 ? selected : empty) + ">Control Flow Simplification</option>\n";
- html += "<option value='3'" + (config.optimization[pass] == 3 ? selected : empty) + ">Loop Invariant Code Motion</option>\n";
- html += "<option value='4'" + (config.optimization[pass] == 4 ? selected : empty) + ">Aggressive Dead Code Elimination</option>\n";
- html += "<option value='5'" + (config.optimization[pass] == 5 ? selected : empty) + ">Global Value Numbering</option>\n";
- html += "<option value='6'" + (config.optimization[pass] == 6 ? selected : empty) + ">Commutative Expressions Reassociation</option>\n";
- html += "<option value='7'" + (config.optimization[pass] == 7 ? selected : empty) + ">Dead Store Elimination</option>\n";
- html += "<option value='8'" + (config.optimization[pass] == 8 ? selected : empty) + ">Sparse Conditional Copy Propagation</option>\n";
- html += "<option value='9'" + (config.optimization[pass] == 9 ? selected : empty) + ">Scalar Replacement of Aggregates</option>\n";
+ html += "<option value='0'" + (config.optimization[pass] == rr::Optimization::Pass::Disabled ? selected : empty) + ">Disabled" + (pass > 0 ? " (default)" : "") + "</option>\n";
+ html += "<option value='1'" + (config.optimization[pass] == rr::Optimization::Pass::InstructionCombining ? selected : empty) + ">Instruction Combining" + (pass == 0 ? " (default)" : "") + "</option>\n";
+ html += "<option value='2'" + (config.optimization[pass] == rr::Optimization::Pass::CFGSimplification ? selected : empty) + ">Control Flow Simplification</option>\n";
+ html += "<option value='3'" + (config.optimization[pass] == rr::Optimization::Pass::LICM ? selected : empty) + ">Loop Invariant Code Motion</option>\n";
+ html += "<option value='4'" + (config.optimization[pass] == rr::Optimization::Pass::AggressiveDCE ? selected : empty) + ">Aggressive Dead Code Elimination</option>\n";
+ html += "<option value='5'" + (config.optimization[pass] == rr::Optimization::Pass::GVN ? selected : empty) + ">Global Value Numbering</option>\n";
+ html += "<option value='6'" + (config.optimization[pass] == rr::Optimization::Pass::Reassociate ? selected : empty) + ">Commutative Expressions Reassociation</option>\n";
+ html += "<option value='7'" + (config.optimization[pass] == rr::Optimization::Pass::DeadStoreElimination ? selected : empty) + ">Dead Store Elimination</option>\n";
+ html += "<option value='8'" + (config.optimization[pass] == rr::Optimization::Pass::SCCP ? selected : empty) + ">Sparse Conditional Copy Propagation</option>\n";
+ html += "<option value='9'" + (config.optimization[pass] == rr::Optimization::Pass::ScalarReplAggregates ? selected : empty) + ">Scalar Replacement of Aggregates</option>\n";
+ html += "<option value='10'" + (config.optimization[pass] == rr::Optimization::Pass::EarlyCSEPass ? selected : empty) + ">Eliminate trivially redundant instructions</option>\n";
html += "</select></td></tr>\n";
}
@@ -652,7 +653,7 @@
}
else if(sscanf(post, "optimization%d=%d", &index, &integer))
{
- config.optimization[index - 1] = (rr::Optimization)integer;
+ config.optimization[index - 1] = (rr::Optimization::Pass)integer;
}
else if(strstr(post, "disableServer=on"))
{
@@ -737,9 +738,10 @@
config.enableSSSE3 = ini.getBoolean("Processor", "EnableSSSE3", true);
config.enableSSE4_1 = ini.getBoolean("Processor", "EnableSSE4_1", true);
- for(int pass = 0; pass < 10; pass++)
+ for(size_t pass = 0; pass < config.optimization.size(); pass++)
{
- config.optimization[pass] = (rr::Optimization)ini.getInteger("Optimization", "OptimizationPass" + itoa(pass + 1), pass == 0 ? rr::InstructionCombining : rr::Disabled);
+ auto def = pass == 0 ? rr::Optimization::Pass::InstructionCombining : rr::Optimization::Pass::Disabled;
+ config.optimization[pass] = (rr::Optimization::Pass)ini.getInteger("Optimization", "OptimizationPass" + itoa(pass + 1), (int)def);
}
config.disableServer = ini.getBoolean("Testing", "DisableServer", false);
@@ -795,9 +797,9 @@
ini.addValue("Processor", "EnableSSSE3", itoa(config.enableSSSE3));
ini.addValue("Processor", "EnableSSE4_1", itoa(config.enableSSE4_1));
- for(int pass = 0; pass < 10; pass++)
+ for(size_t pass = 0; pass < config.optimization.size(); pass++)
{
- ini.addValue("Optimization", "OptimizationPass" + itoa(pass + 1), itoa(config.optimization[pass]));
+ ini.addValue("Optimization", "OptimizationPass" + itoa(pass + 1), itoa((int)config.optimization[pass]));
}
ini.addValue("Testing", "DisableServer", itoa(config.disableServer));
diff --git a/src/Main/SwiftConfig.hpp b/src/Main/SwiftConfig.hpp
index ad3dcb5..a40648c 100644
--- a/src/Main/SwiftConfig.hpp
+++ b/src/Main/SwiftConfig.hpp
@@ -21,6 +21,7 @@
#include "Common/MutexLock.hpp"
#include "Common/Socket.hpp"
+#include <array>
#include <string>
namespace sw
@@ -48,7 +49,7 @@
bool enableSSE3;
bool enableSSSE3;
bool enableSSE4_1;
- rr::Optimization optimization[10];
+ std::array<rr::Optimization::Pass, 10> optimization;
bool disableServer;
bool keepSystemCursor;
bool forceWindowed;
diff --git a/src/OpenGL/compiler/Compiler.vcxproj b/src/OpenGL/compiler/Compiler.vcxproj
index fbe01c5..5964bce 100644
--- a/src/OpenGL/compiler/Compiler.vcxproj
+++ b/src/OpenGL/compiler/Compiler.vcxproj
@@ -125,7 +125,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<BrowseInformation>true</BrowseInformation>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;4005;</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;4005;</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<ErrorReporting>Queue</ErrorReporting>
@@ -145,7 +145,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<BrowseInformation>true</BrowseInformation>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;4005;4267;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;4005;4267;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<ErrorReporting>Queue</ErrorReporting>
@@ -164,7 +164,7 @@
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;4005;</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;4005;</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<ErrorReporting>Queue</ErrorReporting>
@@ -183,7 +183,7 @@
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;4005;4267;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;4005;4267;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<ErrorReporting>Queue</ErrorReporting>
@@ -203,7 +203,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<OmitFramePointers>false</OmitFramePointers>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;4005;</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;4005;</DisableSpecificWarnings>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<ErrorReporting>Queue</ErrorReporting>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
@@ -222,7 +222,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<OmitFramePointers>false</OmitFramePointers>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;4005;4267;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;4005;4267;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<ErrorReporting>Queue</ErrorReporting>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
diff --git a/src/OpenGL/compiler/preprocessor/preprocessor.vcxproj b/src/OpenGL/compiler/preprocessor/preprocessor.vcxproj
index 07f364d..b2308ef 100644
--- a/src/OpenGL/compiler/preprocessor/preprocessor.vcxproj
+++ b/src/OpenGL/compiler/preprocessor/preprocessor.vcxproj
@@ -89,7 +89,7 @@
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>4005;</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;4005;</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
@@ -104,7 +104,7 @@
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>4005;4267;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;4005;4267;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
@@ -121,7 +121,7 @@
<IntrinsicFunctions>true</IntrinsicFunctions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>4005;</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;4005;</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
@@ -140,7 +140,7 @@
<IntrinsicFunctions>true</IntrinsicFunctions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>4005;4267;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;4005;4267;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
diff --git a/src/OpenGL/libEGL/BUILD.gn b/src/OpenGL/libEGL/BUILD.gn
index 993ca6a..a17a3a7 100644
--- a/src/OpenGL/libEGL/BUILD.gn
+++ b/src/OpenGL/libEGL/BUILD.gn
@@ -73,7 +73,11 @@
"CoreFoundation.framework",
"IOSurface.framework",
]
- ldflags = [ "-Wl,-install_name,@rpath/libswiftshader_libEGL.dylib" ]
+ ldflags = [
+ "-Wl,-install_name,@rpath/libswiftshader_libEGL.dylib",
+ "-Wl,-exported_symbols_list," +
+ rebase_path("libEGL.exports", root_build_dir),
+ ]
} else if (is_linux) {
if (use_x11) {
sources += [ "../../Main/libX11.cpp" ]
diff --git a/src/OpenGL/libEGL/libEGL.cpp b/src/OpenGL/libEGL/libEGL.cpp
index 51f5309..70df27f 100644
--- a/src/OpenGL/libEGL/libEGL.cpp
+++ b/src/OpenGL/libEGL/libEGL.cpp
@@ -932,13 +932,6 @@
return EGL_FALSE;
}
- if((draw != EGL_NO_SURFACE && drawSurface->hasClientBuffer()) ||
- (read != EGL_NO_SURFACE && readSurface->hasClientBuffer()))
- {
- // Make current is not supported on IOSurface pbuffers.
- return error(EGL_BAD_SURFACE, EGL_FALSE);
- }
-
if((draw != EGL_NO_SURFACE) ^ (read != EGL_NO_SURFACE))
{
return error(EGL_BAD_MATCH, EGL_FALSE);
diff --git a/src/OpenGL/libEGL/libEGL.vcxproj b/src/OpenGL/libEGL/libEGL.vcxproj
index 64c35fd..7c6d827 100644
--- a/src/OpenGL/libEGL/libEGL.vcxproj
+++ b/src/OpenGL/libEGL/libEGL.vcxproj
@@ -131,7 +131,7 @@
<BrowseInformation>true</BrowseInformation>
<TreatWarningAsError>true</TreatWarningAsError>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
- <DisableSpecificWarnings>5030</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030</DisableSpecificWarnings>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
@@ -164,7 +164,7 @@
<BrowseInformation>true</BrowseInformation>
<TreatWarningAsError>true</TreatWarningAsError>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
- <DisableSpecificWarnings>5030</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030</DisableSpecificWarnings>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
@@ -198,7 +198,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<TreatWarningAsError>true</TreatWarningAsError>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
- <DisableSpecificWarnings>5030</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030</DisableSpecificWarnings>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
@@ -232,7 +232,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<TreatWarningAsError>true</TreatWarningAsError>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
- <DisableSpecificWarnings>5030</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030</DisableSpecificWarnings>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
@@ -268,7 +268,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<OmitFramePointers>false</OmitFramePointers>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030</DisableSpecificWarnings>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
@@ -302,7 +302,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<OmitFramePointers>false</OmitFramePointers>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030</DisableSpecificWarnings>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
diff --git a/src/OpenGL/libGLES_CM/libGLES_CM.vcxproj b/src/OpenGL/libGLES_CM/libGLES_CM.vcxproj
index b5da9ce..28d6728 100644
--- a/src/OpenGL/libGLES_CM/libGLES_CM.vcxproj
+++ b/src/OpenGL/libGLES_CM/libGLES_CM.vcxproj
@@ -139,6 +139,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalDependencies>dxguid.lib;WS2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
@@ -169,6 +170,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalDependencies>dxguid.lib;WS2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
@@ -206,6 +208,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalDependencies>dxguid.lib;WS2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
@@ -244,6 +247,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalDependencies>dxguid.lib;WS2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
@@ -282,6 +286,7 @@
<IntrinsicFunctions>false</IntrinsicFunctions>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalDependencies>dxguid.lib;WS2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
@@ -318,6 +323,7 @@
<IntrinsicFunctions>false</IntrinsicFunctions>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalDependencies>dxguid.lib;WS2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
diff --git a/src/OpenGL/libGLESv2/BUILD.gn b/src/OpenGL/libGLESv2/BUILD.gn
index d7037ca..6fa7dff 100644
--- a/src/OpenGL/libGLESv2/BUILD.gn
+++ b/src/OpenGL/libGLESv2/BUILD.gn
@@ -125,7 +125,11 @@
]
if (is_mac) {
- ldflags = [ "-Wl,-install_name,@rpath/libswiftshader_libGLESv2.dylib" ]
+ ldflags = [
+ "-Wl,-install_name,@rpath/libswiftshader_libGLESv2.dylib",
+ "-Wl,-exported_symbols_list," +
+ rebase_path("libGLESv2.exports", root_build_dir),
+ ]
} else if (is_linux) {
inputs = [
"libGLESv2.lds",
diff --git a/src/OpenGL/libGLESv2/libGLESv2.vcxproj b/src/OpenGL/libGLESv2/libGLESv2.vcxproj
index 8793995..a6d72c5 100644
--- a/src/OpenGL/libGLESv2/libGLESv2.vcxproj
+++ b/src/OpenGL/libGLESv2/libGLESv2.vcxproj
@@ -136,7 +136,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<BrowseInformation>true</BrowseInformation>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
@@ -168,7 +168,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<BrowseInformation>true</BrowseInformation>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
@@ -207,7 +207,7 @@
<WholeProgramOptimization>true</WholeProgramOptimization>
<IntrinsicFunctions>false</IntrinsicFunctions>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
@@ -247,7 +247,7 @@
<WholeProgramOptimization>true</WholeProgramOptimization>
<IntrinsicFunctions>false</IntrinsicFunctions>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
@@ -288,7 +288,7 @@
<WholeProgramOptimization>true</WholeProgramOptimization>
<IntrinsicFunctions>false</IntrinsicFunctions>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
@@ -326,7 +326,7 @@
<WholeProgramOptimization>true</WholeProgramOptimization>
<IntrinsicFunctions>false</IntrinsicFunctions>
<TreatWarningAsError>true</TreatWarningAsError>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index a769a2c..85c3783 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -181,6 +181,7 @@
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
case VK_FORMAT_B8G8R8A8_SRGB:
case VK_FORMAT_R8G8B8A8_SRGB:
@@ -234,6 +235,7 @@
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
case VK_FORMAT_B8G8R8A8_SRGB:
case VK_FORMAT_R8G8B8A8_SRGB:
diff --git a/src/Pipeline/SetupRoutine.cpp b/src/Pipeline/SetupRoutine.cpp
index 1973a71..80ac5df 100644
--- a/src/Pipeline/SetupRoutine.cpp
+++ b/src/Pipeline/SetupRoutine.cpp
@@ -453,7 +453,7 @@
Return(1);
}
- routine = function(vk::ReactorOptimizationLevel, "SetupRoutine");
+ routine = function("SetupRoutine");
}
void SetupRoutine::setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flat, bool perspective, int component)
@@ -601,7 +601,7 @@
#endif
}
- Routine *SetupRoutine::getRoutine()
+ std::shared_ptr<Routine> SetupRoutine::getRoutine()
{
return routine;
}
diff --git a/src/Pipeline/SetupRoutine.hpp b/src/Pipeline/SetupRoutine.hpp
index 0ea0c71..469b4de 100644
--- a/src/Pipeline/SetupRoutine.hpp
+++ b/src/Pipeline/SetupRoutine.hpp
@@ -30,7 +30,7 @@
virtual ~SetupRoutine();
void generate();
- Routine *getRoutine();
+ std::shared_ptr<Routine> getRoutine();
private:
void setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flatShading, bool perspective, int component);
@@ -40,7 +40,7 @@
const SetupProcessor::State &state;
- Routine *routine;
+ std::shared_ptr<Routine> routine;
};
}
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index ce621ee..dd04fc9 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -287,11 +287,11 @@
{
template<typename T>
- T Load(Pointer ptr, bool robust, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
+ T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
{
using EL = typename Element<T>::type;
- if (ptr.isStaticAllInBounds(sizeof(float)))
+ if (ptr.isStaticallyInBounds(sizeof(float), robustness))
{
// All elements are statically known to be in-bounds.
// We can avoid costly conditional on masks.
@@ -307,9 +307,19 @@
return T(*rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], alignment));
}
}
- else if(robust) // Disable OOB reads.
+ else
{
- mask &= ptr.isInBounds(sizeof(float));
+ switch(robustness)
+ {
+ case OutOfBoundsBehavior::Nullify:
+ case OutOfBoundsBehavior::RobustBufferAccess:
+ case OutOfBoundsBehavior::UndefinedValue:
+ mask &= ptr.isInBounds(sizeof(float), robustness); // Disable out-of-bounds reads.
+ break;
+ case OutOfBoundsBehavior::UndefinedBehavior:
+ // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
+ break;
+ }
}
auto offsets = ptr.offsets();
@@ -329,11 +339,26 @@
}
return out;
}
+
+ bool zeroMaskedLanes = true;
+ switch(robustness)
+ {
+ case OutOfBoundsBehavior::Nullify:
+ case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero.
+ zeroMaskedLanes = true;
+ break;
+ case OutOfBoundsBehavior::UndefinedValue:
+ case OutOfBoundsBehavior::UndefinedBehavior:
+ zeroMaskedLanes = false;
+ break;
+ }
+
if (ptr.hasStaticSequentialOffsets(sizeof(float)))
{
- return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment, robust);
+ return rr::MaskedLoad(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), mask, alignment, zeroMaskedLanes);
}
- return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment, robust);
+
+ return rr::Gather(rr::Pointer<EL>(ptr.base), offsets, mask, alignment, zeroMaskedLanes);
}
else
{
@@ -370,15 +395,22 @@
}
template<typename T>
- void Store(Pointer ptr, T val, bool robust, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
+ void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
{
using EL = typename Element<T>::type;
constexpr size_t alignment = sizeof(float);
auto offsets = ptr.offsets();
- if(robust) // Disable OOB writes.
+ switch(robustness)
{
- mask &= ptr.isInBounds(sizeof(float));
+ case OutOfBoundsBehavior::Nullify:
+ case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking.
+ case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access.
+ mask &= ptr.isInBounds(sizeof(float), robustness); // Disable out-of-bounds writes.
+ break;
+ case OutOfBoundsBehavior::UndefinedBehavior:
+ // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
+ break;
}
if (!atomic && order == std::memory_order_relaxed)
@@ -400,7 +432,7 @@
}
else if (ptr.hasStaticSequentialOffsets(sizeof(float)))
{
- if (ptr.isStaticAllInBounds(sizeof(float)))
+ if (ptr.isStaticallyInBounds(sizeof(float), robustness))
{
// Pointer has no elements OOB, and the store is not atomic.
// Perform a RMW.
@@ -487,7 +519,7 @@
{
case spv::OpEntryPoint:
{
- auto executionModel = spv::ExecutionModel(insn.word(1));
+ executionModel = spv::ExecutionModel(insn.word(1));
auto id = Function::ID(insn.word(2));
auto name = insn.string(3);
auto stage = executionModelToStage(executionModel);
@@ -1967,6 +1999,36 @@
object.definition = insn;
}
+ OutOfBoundsBehavior SpirvShader::EmitState::getOutOfBoundsBehavior(spv::StorageClass storageClass) const
+ {
+ switch(storageClass)
+ {
+ case spv::StorageClassUniform:
+ case spv::StorageClassStorageBuffer:
+ // Buffer resource access. robustBufferAccess feature applies.
+ return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
+ : OutOfBoundsBehavior::UndefinedBehavior;
+
+ case spv::StorageClassImage:
+ return OutOfBoundsBehavior::UndefinedValue; // "The value returned by a read of an invalid texel is undefined"
+
+ case spv::StorageClassInput:
+ if(executionModel == spv::ExecutionModelVertex)
+ {
+ // Vertex attributes follow robustBufferAccess rules.
+ return robustBufferAccess ? OutOfBoundsBehavior::RobustBufferAccess
+ : OutOfBoundsBehavior::UndefinedBehavior;
+ }
+ // Fall through to default case.
+ default:
+ // TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain.
+ // TODO(b/131224163): Optimize cases statically known to be within bounds.
+ return OutOfBoundsBehavior::UndefinedValue;
+ }
+
+ return OutOfBoundsBehavior::Nullify;
+ }
+
// emit-time
void SpirvShader::emitProlog(SpirvRoutine *routine) const
@@ -2004,7 +2066,7 @@
void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask, const vk::DescriptorSet::Bindings &descriptorSets) const
{
- EmitState state(routine, entryPoint, activeLaneMask, descriptorSets, robustBufferAccess);
+ EmitState state(routine, entryPoint, activeLaneMask, descriptorSets, robustBufferAccess, executionModel);
// Emit everything up to the first label
// TODO: Separate out dispatch of block from non-block instructions?
@@ -2743,7 +2805,8 @@
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, initialValue.Float(i), state->robust, state->activeLaneMask());
+ auto robustness = OutOfBoundsBehavior::UndefinedBehavior; // Local variables are always within bounds.
+ SIMD::Store(p, initialValue.Float(i), robustness, state->activeLaneMask());
});
break;
}
@@ -2786,16 +2849,15 @@
}
auto ptr = GetPointerToData(pointerId, 0, state);
-
bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
-
auto &dst = state->createIntermediate(resultId, resultTy.sizeInComponents);
+ auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
{
auto p = ptr + offset;
- if (interleavedByLane) { p = interleaveByLane(p); }
- dst.move(i, SIMD::Load<SIMD::Float>(p, state->robust, state->activeLaneMask(), atomic, memoryOrder));
+ if (interleavedByLane) { p = interleaveByLane(p); } // TODO: Interleave once, then add offset?
+ dst.move(i, SIMD::Load<SIMD::Float>(p, robustness, state->activeLaneMask(), atomic, memoryOrder));
});
return EmitResult::Continue;
@@ -2823,6 +2885,7 @@
auto ptr = GetPointerToData(pointerId, 0, state);
bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
+ auto robustness = state->getOutOfBoundsBehavior(pointerTy.storageClass);
if (object.kind == Object::Kind::Constant)
{
@@ -2832,7 +2895,7 @@
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, SIMD::Float(src[i]), state->robust, state->activeLaneMask(), atomic, memoryOrder);
+ SIMD::Store(p, SIMD::Float(src[i]), robustness, state->activeLaneMask(), atomic, memoryOrder);
});
}
else
@@ -2843,7 +2906,7 @@
{
auto p = ptr + offset;
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, src.Float(i), state->robust, state->activeLaneMask(), atomic, memoryOrder);
+ SIMD::Store(p, src.Float(i), robustness, state->activeLaneMask(), atomic, memoryOrder);
});
}
@@ -3891,6 +3954,11 @@
auto ptrTy = getType(getObject(ptrId).type);
auto ptr = GetPointerToData(ptrId, 0, state);
bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
+ // TODO: GLSL modf() takes an output parameter and thus the pointer is assumed
+ // to be in bounds even for inactive lanes.
+ // - Clarify the SPIR-V spec.
+ // - Eliminate lane masking and assume interleaving.
+ auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
for (auto i = 0u; i < type.sizeInComponents; i++)
{
@@ -3899,7 +3967,7 @@
dst.move(i, frac);
auto p = ptr + (i * sizeof(float));
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, whole, state->robust, state->activeLaneMask());
+ SIMD::Store(p, whole, robustness, state->activeLaneMask());
}
break;
}
@@ -4024,6 +4092,11 @@
auto ptrTy = getType(getObject(ptrId).type);
auto ptr = GetPointerToData(ptrId, 0, state);
bool interleavedByLane = IsStorageInterleavedByLane(ptrTy.storageClass);
+ // TODO: GLSL frexp() takes an output parameter and thus the pointer is assumed
+ // to be in bounds even for inactive lanes.
+ // - Clarify the SPIR-V spec.
+ // - Eliminate lane masking and assume interleaving.
+ auto robustness = OutOfBoundsBehavior::UndefinedBehavior;
for (auto i = 0u; i < type.sizeInComponents; i++)
{
@@ -4035,7 +4108,7 @@
auto p = ptr + (i * sizeof(float));
if (interleavedByLane) { p = interleaveByLane(p); }
- SIMD::Store(p, exponent, state->robust, state->activeLaneMask());
+ SIMD::Store(p, exponent, robustness, state->activeLaneMask());
}
break;
}
@@ -5245,13 +5318,18 @@
auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect);
+ // "The value returned by a read of an invalid texel is undefined,
+ // unless that read operation is from a buffer resource and the robustBufferAccess feature is enabled."
+ // TODO: Don't always assume a buffer resource.
+ auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
+
SIMD::Int packed[4];
// Round up texel size: for formats smaller than 32 bits per texel, we will emit a bunch
// of (overlapping) 32b loads here, and each lane will pick out what it needs from the low bits.
// TODO: specialize for small formats?
for (auto i = 0; i < (texelSize + 3)/4; i++)
{
- packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->robust, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
+ packed[i] = SIMD::Load<SIMD::Int>(texelPtr, robustness, state->activeLaneMask(), false, std::memory_order_relaxed, std::min(texelSize, 4));
texelPtr += sizeof(float);
}
@@ -5587,9 +5665,12 @@
auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
auto texelPtr = GetTexelAddress(state, basePtr, coordinate, imageType, binding, texelSize, 0, false);
+ // SPIR-V 1.4: "If the coordinates are outside the image, the memory location that is accessed is undefined."
+ auto robustness = OutOfBoundsBehavior::UndefinedValue;
+
for (auto i = 0u; i < numPackedElements; i++)
{
- SIMD::Store(texelPtr, packed[i], state->robust, state->activeLaneMask());
+ SIMD::Store(texelPtr, packed[i], robustness, state->activeLaneMask());
texelPtr += sizeof(float);
}
@@ -5778,8 +5859,11 @@
if (dstInterleavedByLane) { dst = interleaveByLane(dst); }
if (srcInterleavedByLane) { src = interleaveByLane(src); }
- auto value = SIMD::Load<SIMD::Float>(src, state->robust, state->activeLaneMask());
- SIMD::Store(dst, value, state->robust, state->activeLaneMask());
+ // TODO(b/131224163): Optimize based on src/dst storage classes.
+ auto robustness = OutOfBoundsBehavior::RobustBufferAccess;
+
+ auto value = SIMD::Load<SIMD::Float>(src, robustness, state->activeLaneMask());
+ SIMD::Store(dst, value, robustness, state->activeLaneMask());
});
return EmitResult::Continue;
}
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index a0c07be..7c850ba 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -55,6 +55,14 @@
// Forward declarations.
class SpirvRoutine;
+ enum class OutOfBoundsBehavior
+ {
+ Nullify, // Loads become zero, stores are elided.
+ RobustBufferAccess, // As defined by the Vulkan spec (in short: access anywhere within bounds, or zeroing).
+ UndefinedValue, // Only for load operations. Not secure. No program termination.
+ UndefinedBehavior, // Program may terminate.
+ };
+
// SIMD contains types that represent multiple scalars packed into a single
// vector data type. Types in the SIMD namespace provide a semantic hint
// that the data should be treated as a per-execution-lane scalar instead of
@@ -137,11 +145,11 @@
return dynamicOffsets + SIMD::Int(staticOffsets[0], staticOffsets[1], staticOffsets[2], staticOffsets[3]);
}
- inline SIMD::Int isInBounds(unsigned int accessSize) const
+ inline SIMD::Int isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
{
ASSERT(accessSize > 0);
- if (isStaticAllInBounds(accessSize))
+ if (isStaticallyInBounds(accessSize, robustness))
{
return SIMD::Int(0xffffffff);
}
@@ -160,12 +168,31 @@
return CmpLT(offsets() + SIMD::Int(accessSize - 1), SIMD::Int(limit()));
}
- inline bool isStaticAllInBounds(unsigned int accessSize) const
+ inline bool isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const
{
- if (hasDynamicOffsets || hasDynamicLimit)
+ if (hasDynamicOffsets)
{
return false;
}
+
+ if (hasDynamicLimit)
+ {
+ if (hasStaticEqualOffsets() || hasStaticSequentialOffsets(accessSize))
+ {
+ switch(robustness)
+ {
+ case OutOfBoundsBehavior::UndefinedBehavior:
+ // With this robustness setting the application/compiler guarantees in-bounds accesses on active lanes,
+ // but since it can't know in advance which branches are taken this must be true even for inactives lanes.
+ return true;
+ case OutOfBoundsBehavior::Nullify:
+ case OutOfBoundsBehavior::RobustBufferAccess:
+ case OutOfBoundsBehavior::UndefinedValue:
+ return false;
+ }
+ }
+ }
+
for (int i = 0; i < SIMD::Width; i++)
{
if (staticOffsets[i] + accessSize - 1 >= staticLimit)
@@ -173,6 +200,7 @@
return false;
}
}
+
return true;
}
@@ -247,8 +275,8 @@
SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero.
std::array<int32_t, SIMD::Width> staticOffsets;
- bool hasDynamicLimit; // True if dynamicLimit is zero.
- bool hasDynamicOffsets; // True if all dynamicOffsets are zero.
+ bool hasDynamicLimit; // True if dynamicLimit is non-zero.
+ bool hasDynamicOffsets; // True if any dynamicOffsets are non-zero.
};
template <typename T> struct Element {};
@@ -257,16 +285,16 @@
template <> struct Element<UInt> { using type = rr::UInt; };
template<typename T>
- void Store(Pointer ptr, T val, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
+ void Store(Pointer ptr, T val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
template<typename T>
- void Store(Pointer ptr, RValue<T> val, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed)
+ void Store(Pointer ptr, RValue<T> val, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed)
{
- Store(ptr, T(val), robust, mask, atomic, order);
+ Store(ptr, T(val), robustness, mask, atomic, order);
}
template<typename T>
- T Load(Pointer ptr, bool robust, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
+ T Load(Pointer ptr, OutOfBoundsBehavior robustness, Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
}
// Incrementally constructed complex bundle of rvalues
@@ -850,6 +878,7 @@
Function::ID entryPoint;
const bool robustBufferAccess = true;
+ spv::ExecutionModel executionModel = spv::ExecutionModelMax; // Invalid prior to OpEntryPoint parsing.
// DeclareType creates a Type for the given OpTypeX instruction, storing
// it into the types map. It is called from the analysis pass (constructor).
@@ -934,13 +963,16 @@
Function::ID function,
RValue<SIMD::Int> activeLaneMask,
const vk::DescriptorSet::Bindings &descriptorSets,
- bool robustBufferAccess)
+ bool robustBufferAccess,
+ spv::ExecutionModel executionModel)
: routine(routine),
function(function),
activeLaneMaskValue(activeLaneMask.value),
descriptorSets(descriptorSets),
- robust(robustBufferAccess)
+ robustBufferAccess(robustBufferAccess),
+ executionModel(executionModel)
{
+ ASSERT(executionModelToStage(executionModel) != VkShaderStageFlagBits(0)); // Must parse OpEntryPoint before emitting.
}
RValue<SIMD::Int> activeLaneMask() const
@@ -975,7 +1007,7 @@
const vk::DescriptorSet::Bindings &descriptorSets;
- const bool robust = true; // Emit robustBufferAccess safe code.
+ OutOfBoundsBehavior getOutOfBoundsBehavior(spv::StorageClass storageClass) const;
Intermediate& createIntermediate(Object::ID id, uint32_t size)
{
@@ -1005,9 +1037,13 @@
ASSERT_MSG(it != pointers.end(), "Unknown pointer %d", id.value());
return it->second;
}
+
private:
std::unordered_map<Object::ID, Intermediate> intermediates;
std::unordered_map<Object::ID, SIMD::Pointer> pointers;
+
+ const bool robustBufferAccess = true; // Emit robustBufferAccess safe code.
+ const spv::ExecutionModel executionModel = spv::ExecutionModelMax;
};
// EmitResult is an enumerator of result values from the Emit functions.
@@ -1196,13 +1232,15 @@
std::pair<SIMD::Float, SIMD::Int> Frexp(RValue<SIMD::Float> val) const;
static ImageSampler *getImageSampler(uint32_t instruction, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler);
- static ImageSampler *emitSamplerFunction(ImageInstruction instruction, const Sampler &samplerState);
+ static std::shared_ptr<rr::Routine> emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState);
// TODO(b/129523279): Eliminate conversion and use vk::Sampler members directly.
static sw::TextureType convertTextureType(VkImageViewType imageViewType);
static sw::FilterType convertFilterMode(const vk::Sampler *sampler);
static sw::MipmapType convertMipmapMode(const vk::Sampler *sampler);
static sw::AddressingMode convertAddressingMode(int coordinateIndex, VkSamplerAddressMode addressMode, VkImageViewType imageViewType);
+
+ // Returns 0 when invalid.
static VkShaderStageFlagBits executionModelToStage(spv::ExecutionModel model);
};
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp
index e02c32a..5e56977 100644
--- a/src/Pipeline/SpirvShaderSampling.cpp
+++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -16,13 +16,11 @@
#include "SamplerCore.hpp" // TODO: Figure out what's needed.
#include "System/Math.hpp"
-#include "Vulkan/VkBuffer.hpp"
#include "Vulkan/VkDebug.hpp"
-#include "Vulkan/VkDescriptorSet.hpp"
-#include "Vulkan/VkPipelineLayout.hpp"
+#include "Vulkan/VkDescriptorSetLayout.hpp"
+#include "Vulkan/VkDevice.hpp"
#include "Vulkan/VkImageView.hpp"
#include "Vulkan/VkSampler.hpp"
-#include "Vulkan/VkDescriptorSetLayout.hpp"
#include "Device/Config.hpp"
#include <spirv/unified1/spirv.hpp>
@@ -31,31 +29,6 @@
#include <climits>
#include <mutex>
-namespace
-{
-
-struct SamplingRoutineKey
-{
- uint32_t instruction;
- uint32_t sampler;
- uint32_t imageView;
-
- bool operator==(const SamplingRoutineKey &rhs) const
- {
- return instruction == rhs.instruction && sampler == rhs.sampler && imageView == rhs.imageView;
- }
-
- struct Hash
- {
- std::size_t operator()(const SamplingRoutineKey &key) const noexcept
- {
- return (key.instruction << 16) ^ (key.sampler << 8) ^ key.imageView;
- }
- };
-};
-
-}
-
namespace sw {
SpirvShader::ImageSampler *SpirvShader::getImageSampler(uint32_t inst, vk::SampledImageDescriptor const *imageDescriptor, const vk::Sampler *sampler)
@@ -63,15 +36,24 @@
ImageInstruction instruction(inst);
ASSERT(imageDescriptor->imageViewId != 0 && (sampler->id != 0 || instruction.samplerMethod == Fetch));
- // TODO(b/129523279): Move somewhere sensible.
- static std::unordered_map<SamplingRoutineKey, ImageSampler*, SamplingRoutineKey::Hash> cache;
- static std::mutex mutex;
+ vk::Device::SamplingRoutineCache::Key key = {inst, imageDescriptor->imageViewId, sampler->id};
- SamplingRoutineKey key = {inst, imageDescriptor->imageViewId, sampler->id};
+ ASSERT(imageDescriptor->device);
- std::unique_lock<std::mutex> lock(mutex);
- auto it = cache.find(key);
- if (it != cache.end()) { return it->second; }
+ auto routine = imageDescriptor->device->findInConstCache(key);
+ if(routine)
+ {
+ return (ImageSampler*)(routine->getEntry());
+ }
+
+ std::unique_lock<std::mutex> lock(imageDescriptor->device->getSamplingRoutineCacheMutex());
+ vk::Device::SamplingRoutineCache* cache = imageDescriptor->device->getSamplingRoutineCache();
+
+ routine = cache->query(key);
+ if(routine)
+ {
+ return (ImageSampler*)(routine->getEntry());
+ }
auto type = imageDescriptor->type;
@@ -108,13 +90,13 @@
UNSUPPORTED("anisotropyEnable");
}
- auto fptr = emitSamplerFunction(instruction, samplerState);
+ routine = emitSamplerRoutine(instruction, samplerState);
- cache.emplace(key, fptr);
- return fptr;
+ cache->add(key, routine);
+ return (ImageSampler*)(routine->getEntry());
}
-SpirvShader::ImageSampler *SpirvShader::emitSamplerFunction(ImageInstruction instruction, const Sampler &samplerState)
+std::shared_ptr<rr::Routine> SpirvShader::emitSamplerRoutine(ImageInstruction instruction, const Sampler &samplerState)
{
// TODO(b/129523279): Hold a separate mutex lock for the sampler being built.
rr::Function<Void(Pointer<Byte>, Pointer<Byte>, Pointer<SIMD::Float>, Pointer<SIMD::Float>, Pointer<Byte>)> function;
@@ -231,7 +213,7 @@
}
}
- return (ImageSampler*)function(vk::ReactorOptimizationLevel, "sampler")->getEntry();
+ return function("sampler");
}
sw::TextureType SpirvShader::convertTextureType(VkImageViewType imageViewType)
diff --git a/src/Reactor/BUILD.gn b/src/Reactor/BUILD.gn
index 94a66da..ccde7db 100644
--- a/src/Reactor/BUILD.gn
+++ b/src/Reactor/BUILD.gn
@@ -46,7 +46,6 @@
"Debug.cpp",
"ExecutableMemory.cpp",
"Reactor.cpp",
- "Routine.cpp",
]
}
diff --git a/src/Reactor/Coroutine.hpp b/src/Reactor/Coroutine.hpp
index 993e7e0..aaf08ae 100644
--- a/src/Reactor/Coroutine.hpp
+++ b/src/Reactor/Coroutine.hpp
@@ -133,7 +133,7 @@
// called without building a new rr::Function or rr::Coroutine.
// While automatically called by operator(), finalize() should be called
// as early as possible to release the global Reactor mutex lock.
- inline void finalize(OptimizationLevel optLevel = OptimizationLevel::Default);
+ inline void finalize(const Config::Edit &cfg = Config::Edit::None);
// Starts execution of the coroutine and returns a unique_ptr to a
// Stream<> that exposes the await() function for obtaining yielded
@@ -164,11 +164,11 @@
}
template<typename Return, typename... Arguments>
- void Coroutine<Return(Arguments...)>::finalize(OptimizationLevel optLevel /* = OptimizationLevel::Default */)
+ void Coroutine<Return(Arguments...)>::finalize(const Config::Edit &cfg /* = Config::Edit::None */)
{
if(core != nullptr)
{
- routine.reset(core->acquireCoroutine("coroutine", optLevel));
+ routine = core->acquireCoroutine("coroutine", cfg);
core.reset(nullptr);
}
}
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index c2c7ae7..106ac35 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -105,46 +105,125 @@
namespace
{
- class LLVMInitializer
+ // Default configuration settings. Must be accessed under mutex lock.
+ std::mutex defaultConfigLock;
+ rr::Config &defaultConfig()
{
- protected:
- LLVMInitializer()
- {
- llvm::InitializeNativeTarget();
- llvm::InitializeNativeTargetAsmPrinter();
- llvm::InitializeNativeTargetAsmParser();
- }
+ // This uses a static in a function to avoid the cost of a global static
+ // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
+ static rr::Config config = rr::Config::Edit()
+ .set(rr::Optimization::Level::Default)
+ .add(rr::Optimization::Pass::ScalarReplAggregates)
+ .add(rr::Optimization::Pass::InstructionCombining)
+ .apply({});
+ return config;
+ }
+
+ // Cache provides a simple, thread-safe key-value store.
+ template <typename KEY, typename VALUE>
+ class Cache
+ {
+ public:
+ Cache() = default;
+ Cache(const Cache& other);
+ VALUE getOrCreate(KEY key, std::function<VALUE()> create);
+ private:
+ mutable std::mutex mutex; // mutable required for copy constructor.
+ std::unordered_map<KEY, VALUE> map;
};
+ template <typename KEY, typename VALUE>
+ Cache<KEY, VALUE>::Cache(const Cache& other)
+ {
+ std::unique_lock<std::mutex> lock(other.mutex);
+ map = other.map;
+ }
+
+ template <typename KEY, typename VALUE>
+ VALUE Cache<KEY, VALUE>::getOrCreate(KEY key, std::function<VALUE()> create)
+ {
+ std::unique_lock<std::mutex> lock(mutex);
+ auto it = map.find(key);
+ if (it != map.end())
+ {
+ return it->second;
+ }
+ auto value = create();
+ map.emplace(key, value);
+ return value;
+ }
+
// JITGlobals is a singleton that holds all the immutable machine specific
// information for the host device.
- class JITGlobals : LLVMInitializer
+ class JITGlobals
{
public:
- static JITGlobals const * get();
+ using TargetMachineSPtr = std::shared_ptr<llvm::TargetMachine>;
- std::string mcpu;
- std::vector<std::string> mattrs;
- const char* march;
- llvm::TargetOptions targetOptions;
- llvm::DataLayout dataLayout = llvm::DataLayout("");
+ static JITGlobals * get();
+
+ const std::string mcpu;
+ const std::vector<std::string> mattrs;
+ const char* const march;
+ const llvm::TargetOptions targetOptions;
+ const llvm::DataLayout dataLayout;
+
+ TargetMachineSPtr getTargetMachine(rr::Optimization::Level optlevel);
private:
- JITGlobals();
+ static JITGlobals create();
+ static llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level);
+ JITGlobals(const char *mcpu,
+ const std::vector<std::string> &mattrs,
+ const char *march,
+ const llvm::TargetOptions &targetOptions,
+ const llvm::DataLayout &dataLayout);
+ JITGlobals(const JITGlobals&) = default;
+
+ // The cache key here is actually a rr::Optimization::Level. We use int
+ // as 'enum class' types do not provide builtin hash functions until
+ // C++14. See: https://stackoverflow.com/a/29618545.
+ Cache<int, TargetMachineSPtr> targetMachines;
};
- JITGlobals const * JITGlobals::get()
+ JITGlobals * JITGlobals::get()
{
- static JITGlobals instance;
+ static JITGlobals instance = create();
return &instance;
}
- JITGlobals::JITGlobals()
+ JITGlobals::TargetMachineSPtr JITGlobals::getTargetMachine(rr::Optimization::Level optlevel)
{
- // mcpu
- mcpu = llvm::sys::getHostCPUName();
+ return targetMachines.getOrCreate(static_cast<int>(optlevel), [&]() {
+ return TargetMachineSPtr(llvm::EngineBuilder()
+#ifdef ENABLE_RR_DEBUG_INFO
+ .setOptLevel(toLLVM(rr::Optimization::Level::None))
+#else
+ .setOptLevel(toLLVM(optlevel))
+#endif // ENABLE_RR_DEBUG_INFO
+ .setMCPU(mcpu)
+ .setMArch(march)
+ .setMAttrs(mattrs)
+ .setTargetOptions(targetOptions)
+ .selectTarget());
+ });
+ }
- // mattrs
+ JITGlobals JITGlobals::create()
+ {
+ struct LLVMInitializer
+ {
+ LLVMInitializer()
+ {
+ llvm::InitializeNativeTarget();
+ llvm::InitializeNativeTargetAsmPrinter();
+ llvm::InitializeNativeTargetAsmParser();
+ }
+ };
+ static LLVMInitializer initializeLLVM;
+
+ auto mcpu = llvm::sys::getHostCPUName();
+
llvm::StringMap<bool> features;
bool ok = llvm::sys::getHostCPUFeatures(features);
@@ -155,31 +234,13 @@
(void) ok; // getHostCPUFeatures always returns false on other platforms
#endif
+ std::vector<std::string> mattrs;
for (auto &feature : features)
{
if (feature.second) { mattrs.push_back(feature.first()); }
}
-#if 0
-#if defined(__i386__) || defined(__x86_64__)
- mattrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
- mattrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
- mattrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
- mattrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
- mattrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
- mattrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
- mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
-#elif defined(__arm__)
-#if __ARM_ARCH >= 8
- mattrs.push_back("+armv8-a");
-#else
- // armv7-a requires compiler-rt routines; otherwise, compiled kernel
- // might fail to link.
-#endif
-#endif
-#endif
-
- // arch
+ const char* march = nullptr;
#if defined(__x86_64__)
march = "x86-64";
#elif defined(__i386__)
@@ -200,9 +261,8 @@
#error "unknown architecture"
#endif
+ llvm::TargetOptions targetOptions;
targetOptions.UnsafeFPMath = false;
- // targetOpts.NoInfsFPMath = true;
- // targetOpts.NoNaNsFPMath = true;
auto targetMachine = std::unique_ptr<llvm::TargetMachine>(
llvm::EngineBuilder()
@@ -213,7 +273,35 @@
.setTargetOptions(targetOptions)
.selectTarget());
- dataLayout = targetMachine->createDataLayout();
+ auto dataLayout = targetMachine->createDataLayout();
+
+ return JITGlobals(mcpu.data(), mattrs, march, targetOptions, dataLayout);
+ }
+
+ llvm::CodeGenOpt::Level JITGlobals::toLLVM(rr::Optimization::Level level)
+ {
+ switch (level)
+ {
+ case rr::Optimization::Level::None: return ::llvm::CodeGenOpt::None;
+ case rr::Optimization::Level::Less: return ::llvm::CodeGenOpt::Less;
+ case rr::Optimization::Level::Default: return ::llvm::CodeGenOpt::Default;
+ case rr::Optimization::Level::Aggressive: return ::llvm::CodeGenOpt::Aggressive;
+ default: UNREACHABLE("Unknown Optimization Level %d", int(level));
+ }
+ return ::llvm::CodeGenOpt::Default;
+ }
+
+ JITGlobals::JITGlobals(const char* mcpu,
+ const std::vector<std::string> &mattrs,
+ const char* march,
+ const llvm::TargetOptions &targetOptions,
+ const llvm::DataLayout &dataLayout) :
+ mcpu(mcpu),
+ mattrs(mattrs),
+ march(march),
+ targetOptions(targetOptions),
+ dataLayout(dataLayout)
+ {
}
// JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and
@@ -228,7 +316,7 @@
std::unique_ptr<llvm::Module> module,
llvm::Function **funcs,
size_t count,
- rr::OptimizationLevel optLevel) :
+ const rr::Config &config) :
resolver(createLegacyLookupResolver(
session,
[&](const std::string &name) {
@@ -247,17 +335,7 @@
return;
}
})),
- targetMachine(llvm::EngineBuilder()
-#ifdef ENABLE_RR_DEBUG_INFO
- .setOptLevel(llvm::CodeGenOpt::None)
-#else
- .setOptLevel(toLLVM(optLevel))
-#endif // ENABLE_RR_DEBUG_INFO
- .setMCPU(JITGlobals::get()->mcpu)
- .setMArch(JITGlobals::get()->march)
- .setMAttrs(JITGlobals::get()->mattrs)
- .setTargetOptions(JITGlobals::get()->targetOptions)
- .selectTarget()),
+ targetMachine(JITGlobals::get()->getTargetMachine(config.getOptimization().getLevel())),
compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
objLayer(
session,
@@ -318,21 +396,8 @@
}
private:
- static ::llvm::CodeGenOpt::Level toLLVM(rr::OptimizationLevel level)
- {
- switch (level)
- {
- case rr::OptimizationLevel::None: return ::llvm::CodeGenOpt::None;
- case rr::OptimizationLevel::Less: return ::llvm::CodeGenOpt::Less;
- case rr::OptimizationLevel::Default: return ::llvm::CodeGenOpt::Default;
- case rr::OptimizationLevel::Aggressive: return ::llvm::CodeGenOpt::Aggressive;
- default: UNREACHABLE("Unknown OptimizationLevel %d", int(level));
- }
- return ::llvm::CodeGenOpt::Default;
- }
-
std::shared_ptr<llvm::orc::SymbolResolver> resolver;
- std::unique_ptr<llvm::TargetMachine> targetMachine;
+ std::shared_ptr<llvm::TargetMachine> targetMachine;
llvm::orc::ExecutionSession session;
CompileLayer compileLayer;
ObjLayer objLayer;
@@ -343,15 +408,17 @@
class JITBuilder
{
public:
- JITBuilder():
+ JITBuilder(const rr::Config &config) :
+ config(config),
module(new llvm::Module("", context)),
builder(new llvm::IRBuilder<>(context))
{
module->setDataLayout(JITGlobals::get()->dataLayout);
}
- void optimize()
+ void optimize(const rr::Config &cfg)
{
+
#ifdef ENABLE_RR_DEBUG_INFO
if (debugInfo != nullptr)
{
@@ -362,36 +429,36 @@
std::unique_ptr<llvm::legacy::PassManager> passManager(
new llvm::legacy::PassManager());
- passManager->add(llvm::createSROAPass());
-
- for(int pass = 0; pass < 10 && rr::optimization[pass] != rr::Disabled; pass++)
+ for(auto pass : cfg.getOptimization().getPasses())
{
- switch(rr::optimization[pass])
+ switch(pass)
{
- case rr::Disabled: break;
- case rr::CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
- case rr::LICM: passManager->add(llvm::createLICMPass()); break;
- case rr::AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
- case rr::GVN: passManager->add(llvm::createGVNPass()); break;
- case rr::InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
- case rr::Reassociate: passManager->add(llvm::createReassociatePass()); break;
- case rr::DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
- case rr::SCCP: passManager->add(llvm::createSCCPPass()); break;
- case rr::ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
+ case rr::Optimization::Pass::Disabled: break;
+ case rr::Optimization::Pass::CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
+ case rr::Optimization::Pass::LICM: passManager->add(llvm::createLICMPass()); break;
+ case rr::Optimization::Pass::AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
+ case rr::Optimization::Pass::GVN: passManager->add(llvm::createGVNPass()); break;
+ case rr::Optimization::Pass::InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
+ case rr::Optimization::Pass::Reassociate: passManager->add(llvm::createReassociatePass()); break;
+ case rr::Optimization::Pass::DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
+ case rr::Optimization::Pass::SCCP: passManager->add(llvm::createSCCPPass()); break;
+ case rr::Optimization::Pass::ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
+ case rr::Optimization::Pass::EarlyCSEPass: passManager->add(llvm::createEarlyCSEPass()); break;
default:
- UNREACHABLE("optimization[pass]: %d, pass: %d", int(rr::optimization[pass]), int(pass));
+ UNREACHABLE("pass: %d", int(pass));
}
}
passManager->run(*module);
}
- rr::Routine *acquireRoutine(llvm::Function **funcs, size_t count, rr::OptimizationLevel optLevel)
+ std::shared_ptr<rr::Routine> acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg)
{
ASSERT(module);
- return new JITRoutine(std::move(module), funcs, count, optLevel);
+ return std::make_shared<JITRoutine>(std::move(module), funcs, count, cfg);
}
+ const rr::Config config;
llvm::LLVMContext context;
std::unique_ptr<llvm::Module> module;
std::unique_ptr<llvm::IRBuilder<>> builder;
@@ -404,6 +471,8 @@
llvm::Value *handle = nullptr;
llvm::Value *id = nullptr;
llvm::Value *promise = nullptr;
+ llvm::Type *yieldType = nullptr;
+ llvm::BasicBlock *entryBlock = nullptr;
llvm::BasicBlock *suspendBlock = nullptr;
llvm::BasicBlock *endBlock = nullptr;
llvm::BasicBlock *destroyBlock = nullptr;
@@ -1002,8 +1071,6 @@
return it->second;
}
- Optimization optimization[10] = {InstructionCombining, Disabled};
-
// The abstract Type* types are implemented as LLVM types, except that
// 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86
// and VFP in ARM, and eliminate the overhead of converting them to explicit
@@ -1134,7 +1201,7 @@
::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
ASSERT(jit == nullptr);
- jit.reset(new JITBuilder());
+ jit.reset(new JITBuilder(Nucleus::getDefaultConfig()));
}
Nucleus::~Nucleus()
@@ -1143,8 +1210,29 @@
::codegenMutex.unlock();
}
- Routine *Nucleus::acquireRoutine(const char *name, OptimizationLevel optimizationLevel)
+ void Nucleus::setDefaultConfig(const Config &cfg)
{
+ std::unique_lock<std::mutex> lock(::defaultConfigLock);
+ ::defaultConfig() = cfg;
+ }
+
+ void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
+ {
+ std::unique_lock<std::mutex> lock(::defaultConfigLock);
+ auto &config = ::defaultConfig();
+ config = cfgEdit.apply(config);
+ }
+
+ Config Nucleus::getDefaultConfig()
+ {
+ std::unique_lock<std::mutex> lock(::defaultConfigLock);
+ return ::defaultConfig();
+ }
+
+ std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
+ {
+ auto cfg = cfgEdit.apply(jit->config);
+
if(jit->builder->GetInsertBlock()->empty() || !jit->builder->GetInsertBlock()->back().isTerminator())
{
llvm::Type *type = jit->function->getReturnType();
@@ -1181,7 +1269,7 @@
}
#endif // defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
- optimize();
+ jit->optimize(cfg);
if(false)
{
@@ -1190,17 +1278,12 @@
jit->module->print(file, 0);
}
- auto routine = jit->acquireRoutine(&jit->function, 1, optimizationLevel);
+ auto routine = jit->acquireRoutine(&jit->function, 1, cfg);
jit.reset();
return routine;
}
- void Nucleus::optimize()
- {
- jit->optimize();
- }
-
Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
{
// Need to allocate it in the entry block for mem2reg to work
@@ -1246,7 +1329,7 @@
jit->function = rr::createFunction("", T(ReturnType), T(Params));
#ifdef ENABLE_RR_DEBUG_INFO
- jit->debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(jit->builder, jit->context, jit->module.get(), jit->function));
+ jit->debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(jit->builder.get(), &jit->context, jit->module.get(), jit->function));
#endif // ENABLE_RR_DEBUG_INFO
jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->function));
@@ -4462,22 +4545,19 @@
SuspendActionDestroy = 1
};
-} // anonymous namespace
-namespace rr {
-
-void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params)
+void promoteFunctionToCoroutine()
{
+ ASSERT(jit->coroutine.id == nullptr);
+
// Types
auto voidTy = ::llvm::Type::getVoidTy(jit->context);
auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
auto i8Ty = ::llvm::Type::getInt8Ty(jit->context);
auto i32Ty = ::llvm::Type::getInt32Ty(jit->context);
auto i8PtrTy = ::llvm::Type::getInt8PtrTy(jit->context);
- auto promiseTy = T(YieldType);
+ auto promiseTy = jit->coroutine.yieldType;
auto promisePtrTy = promiseTy->getPointerTo();
- auto handleTy = i8PtrTy;
- auto boolTy = i1Ty;
// LLVM intrinsics
auto coro_id = ::llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::coro_id);
@@ -4496,6 +4576,8 @@
auto freeFrameTy = ::llvm::FunctionType::get(voidTy, {i8PtrTy}, false);
auto freeFrame = jit->module->getOrInsertFunction("coroutine_free_frame", freeFrameTy);
+ auto oldInsertionPoint = jit->builder->saveIP();
+
// Build the coroutine_await() function:
//
// bool coroutine_await(CoroutineHandle* handle, YieldType* out)
@@ -4512,7 +4594,6 @@
// }
// }
//
- jit->coroutine.await = rr::createFunction("coroutine_await", boolTy, {handleTy, promisePtrTy});
{
auto args = jit->coroutine.await->arg_begin();
auto handle = args++;
@@ -4543,7 +4624,6 @@
// llvm.coro.destroy(handle);
// }
//
- jit->coroutine.destroy = rr::createFunction("coroutine_destroy", voidTy, {handleTy});
{
auto handle = jit->coroutine.destroy->arg_begin();
jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->coroutine.destroy));
@@ -4583,20 +4663,17 @@
// return handle;
// }
//
- jit->function = rr::createFunction("coroutine_begin", handleTy, T(Params));
#ifdef ENABLE_RR_DEBUG_INFO
- jit->debugInfo = std::unique_ptr<DebugInfo>(new DebugInfo(jit->builder, jit->context, jit->module, jit->function));
+ jit->debugInfo = std::unique_ptr<rr::DebugInfo>(new rr::DebugInfo(jit->builder.get(), &jit->context, jit->module.get(), jit->function));
#endif // ENABLE_RR_DEBUG_INFO
- auto entryBlock = llvm::BasicBlock::Create(jit->context, "coroutine", jit->function);
jit->coroutine.suspendBlock = llvm::BasicBlock::Create(jit->context, "suspend", jit->function);
jit->coroutine.endBlock = llvm::BasicBlock::Create(jit->context, "end", jit->function);
jit->coroutine.destroyBlock = llvm::BasicBlock::Create(jit->context, "destroy", jit->function);
- jit->builder->SetInsertPoint(entryBlock);
- Variable::materializeAll();
- jit->coroutine.promise = jit->builder->CreateAlloca(T(YieldType), nullptr, "promise");
+ jit->builder->SetInsertPoint(jit->coroutine.entryBlock, jit->coroutine.entryBlock->begin());
+ jit->coroutine.promise = jit->builder->CreateAlloca(promiseTy, nullptr, "promise");
jit->coroutine.id = jit->builder->CreateCall(coro_id, {
::llvm::ConstantInt::get(i32Ty, 0),
jit->builder->CreatePointerCast(jit->coroutine.promise, i8PtrTy),
@@ -4628,13 +4705,45 @@
jit->builder->CreateCall(freeFrame, {memory});
jit->builder->CreateBr(jit->coroutine.suspendBlock);
- // Switch back to the entry block for reactor codegen.
- jit->builder->SetInsertPoint(entryBlock);
+ // Switch back to original insert point to continue building the coroutine.
+ jit->builder->restoreIP(oldInsertionPoint);
+}
+
+} // anonymous namespace
+
+namespace rr {
+
+void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params)
+{
+ // Coroutines are initially created as a regular function.
+ // Upon the first call to Yield(), the function is promoted to a true
+ // coroutine.
+ auto voidTy = ::llvm::Type::getVoidTy(jit->context);
+ auto i1Ty = ::llvm::Type::getInt1Ty(jit->context);
+ auto i8PtrTy = ::llvm::Type::getInt8PtrTy(jit->context);
+ auto handleTy = i8PtrTy;
+ auto boolTy = i1Ty;
+ auto promiseTy = T(YieldType);
+ auto promisePtrTy = promiseTy->getPointerTo();
+
+ jit->function = rr::createFunction("coroutine_begin", handleTy, T(Params));
+ jit->coroutine.await = rr::createFunction("coroutine_await", boolTy, {handleTy, promisePtrTy});
+ jit->coroutine.destroy = rr::createFunction("coroutine_destroy", voidTy, {handleTy});
+ jit->coroutine.yieldType = promiseTy;
+ jit->coroutine.entryBlock = llvm::BasicBlock::Create(jit->context, "function", jit->function);
+
+ jit->builder->SetInsertPoint(jit->coroutine.entryBlock);
}
void Nucleus::yield(Value* val)
{
- ASSERT_MSG(jit->coroutine.id != nullptr, "yield() can only be called when building a Coroutine");
+ if (jit->coroutine.id == nullptr)
+ {
+ // First call to yield().
+ // Promote the function to a full coroutine.
+ promoteFunctionToCoroutine();
+ ASSERT(jit->coroutine.id != nullptr);
+ }
// promise = val;
//
@@ -4678,11 +4787,26 @@
jit->builder->SetInsertPoint(resumeBlock);
}
-Routine* Nucleus::acquireCoroutine(const char *name, OptimizationLevel optimizationLevel)
+std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
{
- ASSERT_MSG(jit->coroutine.id != nullptr, "acquireCoroutine() called without a call to createCoroutine()");
-
- jit->builder->CreateBr(jit->coroutine.endBlock);
+ bool isCoroutine = jit->coroutine.id != nullptr;
+ if (isCoroutine)
+ {
+ jit->builder->CreateBr(jit->coroutine.endBlock);
+ }
+ else
+ {
+ // Coroutine without a Yield acts as a regular function.
+ // The 'coroutine_begin' function returns a nullptr for the coroutine
+ // handle.
+ jit->builder->CreateRet(llvm::Constant::getNullValue(jit->function->getReturnType()));
+ // The 'coroutine_await' function always returns false (coroutine done).
+ jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->coroutine.await));
+ jit->builder->CreateRet(llvm::Constant::getNullValue(jit->coroutine.await->getReturnType()));
+ // The 'coroutine_destroy' does nothing, returns void.
+ jit->builder->SetInsertPoint(llvm::BasicBlock::Create(jit->context, "", jit->coroutine.destroy));
+ jit->builder->CreateRetVoid();
+ }
#ifdef ENABLE_RR_DEBUG_INFO
if (jit->debugInfo != nullptr)
@@ -4698,16 +4822,28 @@
jit->module->print(file, 0);
}
- // Run manadory coroutine transforms.
- llvm::legacy::PassManager pm;
- pm.add(llvm::createCoroEarlyPass());
- pm.add(llvm::createCoroSplitPass());
- pm.add(llvm::createCoroElidePass());
- pm.add(llvm::createBarrierNoopPass());
- pm.add(llvm::createCoroCleanupPass());
- pm.run(*jit->module);
+ if (isCoroutine)
+ {
+ // Run manadory coroutine transforms.
+ llvm::legacy::PassManager pm;
+ pm.add(llvm::createCoroEarlyPass());
+ pm.add(llvm::createCoroSplitPass());
+ pm.add(llvm::createCoroElidePass());
+ pm.add(llvm::createBarrierNoopPass());
+ pm.add(llvm::createCoroCleanupPass());
+ pm.run(*jit->module);
+ }
- optimize();
+#if defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
+ {
+ llvm::legacy::PassManager pm;
+ pm.add(llvm::createVerifierPass());
+ pm.run(*jit->module);
+ }
+#endif // defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG)
+
+ auto cfg = cfgEdit.apply(jit->config);
+ jit->optimize(cfg);
if(false)
{
@@ -4720,7 +4856,7 @@
funcs[Nucleus::CoroutineEntryBegin] = jit->function;
funcs[Nucleus::CoroutineEntryAwait] = jit->coroutine.await;
funcs[Nucleus::CoroutineEntryDestroy] = jit->coroutine.destroy;
- auto routine = jit->acquireRoutine(funcs, Nucleus::CoroutineEntryCount, optimizationLevel);
+ auto routine = jit->acquireRoutine(funcs, Nucleus::CoroutineEntryCount, cfg);
jit.reset();
return routine;
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index 819d100..cc20e27 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -15,11 +15,12 @@
#ifndef rr_Nucleus_hpp
#define rr_Nucleus_hpp
+#include <atomic>
#include <cassert>
#include <cstdarg>
#include <cstdint>
+#include <memory>
#include <vector>
-#include <atomic>
#ifdef None
#undef None // b/127920555
@@ -33,30 +34,86 @@
class BasicBlock;
class Routine;
- enum Optimization
+ // Optimization holds the optimization settings for code generation.
+ class Optimization
{
- Disabled = 0,
- InstructionCombining = 1,
- CFGSimplification = 2,
- LICM = 3,
- AggressiveDCE = 4,
- GVN = 5,
- Reassociate = 6,
- DeadStoreElimination = 7,
- SCCP = 8,
- ScalarReplAggregates = 9,
+ public:
+ enum class Level
+ {
+ None,
+ Less,
+ Default,
+ Aggressive,
+ };
- OptimizationCount
+ enum class Pass
+ {
+ Disabled,
+ InstructionCombining,
+ CFGSimplification,
+ LICM,
+ AggressiveDCE,
+ GVN,
+ Reassociate,
+ DeadStoreElimination,
+ SCCP,
+ ScalarReplAggregates,
+ EarlyCSEPass,
+
+ Count,
+ };
+
+ using Passes = std::vector<Pass>;
+
+ Optimization() = default;
+ Optimization(Level level, const Passes & passes) : level(level), passes(passes) {}
+
+ Level getLevel() const { return level; }
+ const Passes & getPasses() const { return passes; }
+
+ private:
+ Level level = Level::Default;
+ Passes passes;
};
- extern Optimization optimization[10];
-
- enum class OptimizationLevel
+ // Config holds the Reactor configuration settings.
+ class Config
{
- None,
- Less,
- Default,
- Aggressive,
+ public:
+ // Edit holds a number of modifications to a config, that can be applied
+ // on an existing Config to produce a new Config with the specified
+ // changes.
+ class Edit
+ {
+ public:
+ static const Edit None;
+
+ Edit & set(Optimization::Level level) { optLevel = level; optLevelChanged = true; return *this; }
+ Edit & add(Optimization::Pass pass) { optPassEdits.push_back({ListEdit::Add, pass}); return *this; }
+ Edit & remove(Optimization::Pass pass) { optPassEdits.push_back({ListEdit::Remove, pass}); return *this; }
+ Edit & clearOptimizationPasses() { optPassEdits.push_back({ListEdit::Clear, Optimization::Pass::Disabled}); return *this; }
+
+ Config apply(const Config &cfg) const;
+
+ private:
+ enum class ListEdit { Add, Remove, Clear };
+ using OptPassesEdit = std::pair<ListEdit, Optimization::Pass>;
+
+ template <typename T>
+ void apply(const std::vector<std::pair<ListEdit, T>> & edits, std::vector<T>& list) const;
+
+ Optimization::Level optLevel;
+ bool optLevelChanged = false;
+ std::vector<OptPassesEdit> optPassEdits;
+ };
+
+ Config() = default;
+ Config(const Optimization & optimization) : optimization(optimization) {}
+
+ const Optimization & getOptimization() const { return optimization; }
+
+ private:
+ Optimization optimization;
};
class Nucleus
@@ -66,7 +123,13 @@
virtual ~Nucleus();
- Routine *acquireRoutine(const char *name, OptimizationLevel optimizationLevel);
+ // Default configuration to use when no other configuration is specified.
+ // The new configuration will be applied to subsequent reactor calls.
+ static void setDefaultConfig(const Config &cfg);
+ static void adjustDefaultConfig(const Config::Edit &cfgEdit);
+ static Config getDefaultConfig();
+
+ std::shared_ptr<Routine> acquireRoutine(const char *name, const Config::Edit &cfgEdit = Config::Edit::None);
static Value *allocateStackVariable(Type *type, int arraySize = 0);
static BasicBlock *createBasicBlock();
@@ -93,7 +156,7 @@
};
static void createCoroutine(Type *ReturnType, std::vector<Type*> &Params);
- Routine *acquireCoroutine(const char *name, OptimizationLevel optimizationLevel);
+ std::shared_ptr<Routine> acquireCoroutine(const char *name, const Config::Edit &cfg = Config::Edit::None);
static void yield(Value*);
// Terminators
@@ -219,9 +282,6 @@
static Value *createConstantVector(const double *constants, Type *type);
static Type *getPointerType(Type *elementType);
-
- private:
- void optimize();
};
}
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index bb94cf5..60ee656 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -21,8 +21,59 @@
#define REACTOR_MATERIALIZE_LVALUES_ON_DEFINITION 0
#endif
+namespace
+{
+ // Introduced in C++20.
+ template <class ForwardIterator, class UnaryPredicate>
+ ForwardIterator remove_if(ForwardIterator first, ForwardIterator last,
+ UnaryPredicate pred)
+ {
+ ForwardIterator result = first;
+ while (first!=last) {
+ if (!pred(*first)) {
+ *result = std::move(*first);
+ ++result;
+ }
+ ++first;
+ }
+ return result;
+ }
+}
+
namespace rr
{
+ const Config::Edit Config::Edit::None = {};
+
+ Config Config::Edit::apply(const Config &cfg) const
+ {
+ if (this == &None) { return cfg; }
+
+ auto level = optLevelChanged ? optLevel : cfg.optimization.getLevel();
+ auto passes = cfg.optimization.getPasses();
+ apply(optPassEdits, passes);
+ return Config{ Optimization{level, passes} };
+ }
+
+ template <typename T>
+ void rr::Config::Edit::apply(const std::vector<std::pair<ListEdit, T>> & edits, std::vector<T>& list) const
+ {
+ for (auto & edit : edits)
+ {
+ switch (edit.first)
+ {
+ case ListEdit::Add:
+ list.push_back(edit.second);
+ break;
+ case ListEdit::Remove:
+ ::remove_if(list.begin(), list.end(), [&](T item) { return item == edit.second; });
+ break;
+ case ListEdit::Clear:
+ list.clear();
+ break;
+ }
+ }
+ }
+
// Set of variables that do not have a stack location yet.
std::unordered_set<Variable*> Variable::unmaterializedVariables;
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 1391275..5add9cd 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2464,8 +2464,8 @@
return Argument<typename std::tuple_element<index, std::tuple<Arguments...>>::type>(arg);
}
- Routine *operator()(const char *name, ...);
- Routine *operator()(OptimizationLevel optLevel, const char *name, ...);
+ std::shared_ptr<Routine> operator()(const char *name, ...);
+ std::shared_ptr<Routine> operator()(const Config::Edit &cfg, const char *name, ...);
protected:
Nucleus *core;
@@ -3031,7 +3031,7 @@
}
template<typename Return, typename... Arguments>
- Routine *Function<Return(Arguments...)>::operator()(const char *name, ...)
+ std::shared_ptr<Routine> Function<Return(Arguments...)>::operator()(const char *name, ...)
{
char fullName[1024 + 1];
@@ -3040,11 +3040,11 @@
vsnprintf(fullName, 1024, name, vararg);
va_end(vararg);
- return core->acquireRoutine(fullName, OptimizationLevel::Default);
+ return core->acquireRoutine(fullName, Config::Edit::None);
}
template<typename Return, typename... Arguments>
- Routine *Function<Return(Arguments...)>::operator()(OptimizationLevel optLevel, const char *name, ...)
+ std::shared_ptr<Routine> Function<Return(Arguments...)>::operator()(const Config::Edit &cfg, const char *name, ...)
{
char fullName[1024 + 1];
@@ -3053,7 +3053,7 @@
vsnprintf(fullName, 1024, name, vararg);
va_end(vararg);
- return core->acquireRoutine(fullName, optLevel);
+ return core->acquireRoutine(fullName, cfg);
}
template<class T, class S>
diff --git a/src/Reactor/Reactor.vcxproj b/src/Reactor/Reactor.vcxproj
index 5326d2c..a885861 100644
--- a/src/Reactor/Reactor.vcxproj
+++ b/src/Reactor/Reactor.vcxproj
@@ -290,7 +290,6 @@
<ClCompile Include="LLVMReactorDebugInfo.cpp" />
<ClCompile Include="ExecutableMemory.cpp" />
<ClCompile Include="Reactor.cpp" />
- <ClCompile Include="Routine.cpp" />
<ClCompile Include="Thread.cpp" />
</ItemGroup>
<ItemGroup>
diff --git a/src/Reactor/Reactor.vcxproj.filters b/src/Reactor/Reactor.vcxproj.filters
index f66a728..c7efa0b 100644
--- a/src/Reactor/Reactor.vcxproj.filters
+++ b/src/Reactor/Reactor.vcxproj.filters
@@ -15,9 +15,6 @@
</Filter>
</ItemGroup>
<ItemGroup>
- <ClCompile Include="Routine.cpp">
- <Filter>Source Files</Filter>
- </ClCompile>
<ClCompile Include="LLVMReactor.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index 490433e..9bc1227c 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -38,7 +38,7 @@
TEST(ReactorUnitTests, Sample)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Int>, Int)> function;
@@ -73,12 +73,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, Uninitialized)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int()> function;
@@ -110,12 +109,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, Unreachable)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Int)> function;
@@ -141,12 +139,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, VariableAddress)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Int)> function;
@@ -169,12 +166,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, SubVectorLoadStore)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>, Pointer<Byte>)> function;
@@ -229,12 +225,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, VectorConstant)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>)> function;
@@ -278,12 +273,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, Concatenate)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>)> function;
@@ -321,12 +315,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, Swizzle)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>)> function;
@@ -448,12 +441,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, Branching)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Void)> function;
@@ -513,12 +505,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, MinMax)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>)> function;
@@ -604,12 +595,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, NotNeg)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>)> function;
@@ -689,12 +679,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, VectorCompare)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>)> function;
@@ -751,12 +740,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, SaturatedAddAndSubtract)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>)> function;
@@ -864,12 +852,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, Unpack)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>,Pointer<Byte>)> function;
@@ -911,12 +898,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, Pack)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>)> function;
@@ -973,12 +959,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, MulHigh)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>)> function;
@@ -1050,12 +1035,11 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, MulAdd)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Int(Pointer<Byte>)> function;
@@ -1086,7 +1070,6 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, Call)
@@ -1097,7 +1080,7 @@
return;
}
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
struct Class
{
@@ -1137,7 +1120,6 @@
}
}
- delete routine;
}
// Check that a complex generated function which utilizes all 8 or 16 XMM
@@ -1148,7 +1130,7 @@
// It's necessary to inspect the registers in a debugger to actually verify.)
TEST(ReactorUnitTests, PreserveXMMRegisters)
{
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function<Void(Pointer<Byte>, Pointer<Byte>)> function;
@@ -1225,7 +1207,6 @@
EXPECT_EQ(result[3], 0.0f);
}
- delete routine;
}
template <typename T>
@@ -1255,7 +1236,7 @@
using CType = typename TestFixture::CType;
using ReactorType = typename TestFixture::ReactorType;
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function< Int(ReactorType) > function;
@@ -1277,7 +1258,6 @@
}
}
- delete routine;
}
template <typename T>
@@ -1327,7 +1307,7 @@
using CType = typename TestFixture::CType;
using ReactorType = typename TestFixture::ReactorType;
- Routine *routine = nullptr;
+ std::shared_ptr<Routine> routine;
{
Function< Pointer<ReactorType>(Pointer<ReactorType>, Int) > function;
@@ -1367,7 +1347,6 @@
}
}
- delete routine;
}
TEST(ReactorUnitTests, Coroutines_Fibonacci)
diff --git a/src/Reactor/ReactorUnitTests.vcxproj b/src/Reactor/ReactorUnitTests.vcxproj
index d5bd456..606cc1a 100644
--- a/src/Reactor/ReactorUnitTests.vcxproj
+++ b/src/Reactor/ReactorUnitTests.vcxproj
@@ -126,6 +126,7 @@
<TreatSpecificWarningsAsErrors>5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -170,6 +171,7 @@
<TreatSpecificWarningsAsErrors>5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
diff --git a/src/Reactor/Routine.cpp b/src/Reactor/Routine.cpp
deleted file mode 100644
index 23cf929..0000000
--- a/src/Reactor/Routine.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "Routine.hpp"
-
-#include "Thread.hpp"
-
-#include <cassert>
-
-namespace rr
-{
- Routine::Routine()
- {
- bindCount = 0;
- }
-
- void Routine::bind()
- {
- atomicIncrement(&bindCount);
- }
-
- void Routine::unbind()
- {
- long count = atomicDecrement(&bindCount);
-
- if(count == 0)
- {
- delete this;
- }
- }
-
- Routine::~Routine()
- {
- assert(bindCount == 0);
- }
-}
diff --git a/src/Reactor/Routine.hpp b/src/Reactor/Routine.hpp
index 0158bcc..67560e8 100644
--- a/src/Reactor/Routine.hpp
+++ b/src/Reactor/Routine.hpp
@@ -20,18 +20,10 @@
class Routine
{
public:
- Routine();
-
- virtual ~Routine();
+ Routine() = default;
+ virtual ~Routine() = default;
virtual const void *getEntry(int index = 0) = 0;
-
- // Reference counting
- void bind();
- void unbind();
-
- private:
- volatile int bindCount;
};
}
diff --git a/src/Reactor/Subzero.vcxproj b/src/Reactor/Subzero.vcxproj
index 8866f16..6b6490d 100644
--- a/src/Reactor/Subzero.vcxproj
+++ b/src/Reactor/Subzero.vcxproj
@@ -102,6 +102,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -122,6 +123,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -144,6 +146,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -172,6 +175,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -226,7 +230,6 @@
<ClCompile Include="ExecutableMemory.cpp" />
<ClCompile Include="Optimizer.cpp" />
<ClCompile Include="Reactor.cpp" />
- <ClCompile Include="Routine.cpp" />
<ClCompile Include="SubzeroReactor.cpp" />
</ItemGroup>
<ItemGroup>
diff --git a/src/Reactor/Subzero.vcxproj.filters b/src/Reactor/Subzero.vcxproj.filters
index 7229339..5b9bb2d 100644
--- a/src/Reactor/Subzero.vcxproj.filters
+++ b/src/Reactor/Subzero.vcxproj.filters
@@ -102,9 +102,6 @@
<ClCompile Include="SubzeroReactor.cpp">
<Filter>Source Files</Filter>
</ClCompile>
- <ClCompile Include="Routine.cpp">
- <Filter>Source Files</Filter>
- </ClCompile>
<ClCompile Include="$(SolutionDir)third_party\subzero\src\IceInstX8632.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/src/Reactor/SubzeroLLVMDependencies.vcxproj b/src/Reactor/SubzeroLLVMDependencies.vcxproj
index dbae0f8..38d7598 100644
--- a/src/Reactor/SubzeroLLVMDependencies.vcxproj
+++ b/src/Reactor/SubzeroLLVMDependencies.vcxproj
@@ -96,6 +96,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@@ -109,6 +110,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
@@ -124,6 +126,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@@ -143,6 +146,7 @@
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index bcc2f7e..0cf2370 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -55,6 +55,18 @@
namespace
{
+ // Default configuration settings. Must be accessed under mutex lock.
+ std::mutex defaultConfigLock;
+ rr::Config &defaultConfig()
+ {
+ // This uses a static in a function to avoid the cost of a global static
+ // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html
+ static rr::Config config = rr::Config::Edit()
+ .set(rr::Optimization::Level::Default)
+ .apply({});
+ return config;
+ }
+
Ice::GlobalContext *context = nullptr;
Ice::Cfg *function = nullptr;
Ice::CfgNode *basicBlock = nullptr;
@@ -77,6 +89,19 @@
#define __x86_64__ 1
#endif
+ static Ice::OptLevel toIce(rr::Optimization::Level level)
+ {
+ switch (level)
+ {
+ case rr::Optimization::Level::None: return Ice::Opt_0;
+ case rr::Optimization::Level::Less: return Ice::Opt_1;
+ case rr::Optimization::Level::Default: return Ice::Opt_2;
+ case rr::Optimization::Level::Aggressive: return Ice::Opt_2;
+ default: UNREACHABLE("Unknown Optimization Level %d", int(level));
+ }
+ return Ice::Opt_2;
+ }
+
class CPUID
{
public:
@@ -204,8 +229,6 @@
return Ice::typeWidthInBytes(T(type));
}
- Optimization optimization[10] = {InstructionCombining, Disabled};
-
using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type;
using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type;
@@ -548,7 +571,7 @@
Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
#endif
Flags.setOutFileType(Ice::FT_Elf);
- Flags.setOptLevel(Ice::Opt_2);
+ Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel()));
Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
Flags.setVerbose(false ? Ice::IceV_Most : Ice::IceV_None);
Flags.setDisableHybridAssembly(true);
@@ -585,7 +608,26 @@
::codegenMutex.unlock();
}
- Routine *Nucleus::acquireRoutine(const char *name, OptimizationLevel optimizationLevel)
+ void Nucleus::setDefaultConfig(const Config &cfg)
+ {
+ std::unique_lock<std::mutex> lock(::defaultConfigLock);
+ ::defaultConfig() = cfg;
+ }
+
+ void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit)
+ {
+ std::unique_lock<std::mutex> lock(::defaultConfigLock);
+ auto &config = ::defaultConfig();
+ config = cfgEdit.apply(config);
+ }
+
+ Config Nucleus::getDefaultConfig()
+ {
+ std::unique_lock<std::mutex> lock(::defaultConfigLock);
+ return ::defaultConfig();
+ }
+
+ std::shared_ptr<Routine> Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */)
{
if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret)
{
@@ -594,7 +636,7 @@
::function->setFunctionName(Ice::GlobalString::createWithString(::context, name));
- optimize();
+ rr::optimize(::function);
::function->translate();
ASSERT(!::function->hasError());
@@ -621,12 +663,7 @@
Routine *handoffRoutine = ::routine;
::routine = nullptr;
- return handoffRoutine;
- }
-
- void Nucleus::optimize()
- {
- rr::optimize(::function);
+ return std::shared_ptr<Routine>(handoffRoutine);
}
Value *Nucleus::allocateStackVariable(Type *t, int arraySize)
@@ -3506,7 +3543,7 @@
void FlushDebug() {}
void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params) { UNIMPLEMENTED("createCoroutine"); }
- Routine* Nucleus::acquireCoroutine(const char *name, OptimizationLevel optimizationLevel) { UNIMPLEMENTED("acquireCoroutine"); return nullptr; }
+ std::shared_ptr<Routine> Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */) { UNIMPLEMENTED("acquireCoroutine"); return nullptr; }
void Nucleus::yield(Value* val) { UNIMPLEMENTED("Yield"); }
}
diff --git a/src/Renderer/Blitter.cpp b/src/Renderer/Blitter.cpp
index 30ef1e9..d4edbfa 100644
--- a/src/Renderer/Blitter.cpp
+++ b/src/Renderer/Blitter.cpp
@@ -1179,7 +1179,7 @@
return s;
}
- Routine *Blitter::generate(const State &state)
+ std::shared_ptr<Routine> Blitter::generate(const State &state)
{
Function<Void(Pointer<Byte>)> function;
{
@@ -1420,7 +1420,7 @@
state.destSamples = dest->getSamples();
criticalSection.lock();
- Routine *blitRoutine = blitCache->query(state);
+ auto blitRoutine = blitCache->query(state);
if(!blitRoutine)
{
diff --git a/src/Renderer/Blitter.hpp b/src/Renderer/Blitter.hpp
index e3db745..9c6b4c0 100644
--- a/src/Renderer/Blitter.hpp
+++ b/src/Renderer/Blitter.hpp
@@ -111,7 +111,7 @@
static Float4 LinearToSRGB(Float4 &color);
static Float4 sRGBtoLinear(Float4 &color);
bool blitReactor(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, const Options &options);
- Routine *generate(const State &state);
+ std::shared_ptr<Routine> generate(const State &state);
RoutineCache<State> *blitCache;
MutexLock criticalSection;
diff --git a/src/Renderer/LRUCache.hpp b/src/Renderer/LRUCache.hpp
index 1a1a302..bdd0950 100644
--- a/src/Renderer/LRUCache.hpp
+++ b/src/Renderer/LRUCache.hpp
@@ -27,9 +27,9 @@
~LRUCache();
- Data *query(const Key &key) const;
- Data *add(const Key &key, Data *data);
-
+ Data query(const Key &key) const;
+ Data add(const Key &key, const Data &data);
+
int getSize() {return size;}
Key &getKey(int i) {return key[i];}
@@ -41,7 +41,7 @@
Key *key;
Key **ref;
- Data **data;
+ Data *data;
};
}
@@ -57,12 +57,10 @@
key = new Key[size];
ref = new Key*[size];
- data = new Data*[size];
+ data = new Data[size];
for(int i = 0; i < size; i++)
{
- data[i] = nullptr;
-
ref[i] = &key[i];
}
}
@@ -76,21 +74,12 @@
delete[] ref;
ref = nullptr;
- for(int i = 0; i < size; i++)
- {
- if(data[i])
- {
- data[i]->unbind();
- data[i] = nullptr;
- }
- }
-
delete[] data;
data = nullptr;
}
template<class Key, class Data>
- Data *LRUCache<Key, Data>::query(const Key &key) const
+ Data LRUCache<Key, Data>::query(const Key &key) const
{
for(int i = top; i > top - fill; i--)
{
@@ -98,14 +87,14 @@
if(key == *ref[j])
{
- Data *hit = data[j];
+ Data hit = data[j];
if(i != top)
{
// Move one up
int k = (j + 1) & mask;
- Data *swapD = data[k];
+ Data swapD = data[k];
data[k] = data[j];
data[j] = swapD;
@@ -122,20 +111,12 @@
}
template<class Key, class Data>
- Data *LRUCache<Key, Data>::add(const Key &key, Data *data)
+ Data LRUCache<Key, Data>::add(const Key &key, const Data &data)
{
top = (top + 1) & mask;
fill = fill + 1 < size ? fill + 1 : size;
*ref[top] = key;
-
- data->bind();
-
- if(this->data[top])
- {
- this->data[top]->unbind();
- }
-
this->data[top] = data;
return data;
diff --git a/src/Renderer/PixelProcessor.cpp b/src/Renderer/PixelProcessor.cpp
index 133f90c..0b80727 100644
--- a/src/Renderer/PixelProcessor.cpp
+++ b/src/Renderer/PixelProcessor.cpp
@@ -1182,9 +1182,9 @@
return state;
}
- Routine *PixelProcessor::routine(const State &state)
+ std::shared_ptr<Routine> PixelProcessor::routine(const State &state)
{
- Routine *routine = routineCache->query(state);
+ auto routine = routineCache->query(state);
if(!routine)
{
diff --git a/src/Renderer/PixelProcessor.hpp b/src/Renderer/PixelProcessor.hpp
index 98300de..4fa627c 100644
--- a/src/Renderer/PixelProcessor.hpp
+++ b/src/Renderer/PixelProcessor.hpp
@@ -306,7 +306,7 @@
protected:
const State update() const;
- Routine *routine(const State &state);
+ std::shared_ptr<Routine> routine(const State &state);
void setRoutineCacheSize(int routineCacheSize);
// Shader constants
diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp
index 87b8dd1..c3c2260 100644
--- a/src/Renderer/Renderer.cpp
+++ b/src/Renderer/Renderer.cpp
@@ -360,10 +360,6 @@
draw->drawType = drawType;
draw->batchSize = batch;
- vertexRoutine->bind();
- setupRoutine->bind();
- pixelRoutine->bind();
-
draw->vertexRoutine = vertexRoutine;
draw->setupRoutine = setupRoutine;
draw->pixelRoutine = pixelRoutine;
@@ -1105,9 +1101,9 @@
}
}
- draw.vertexRoutine->unbind();
- draw.setupRoutine->unbind();
- draw.pixelRoutine->unbind();
+ draw.vertexRoutine.reset();
+ draw.setupRoutine.reset();
+ draw.pixelRoutine.reset();
sync->unlock();
@@ -2010,12 +2006,6 @@
P[3].y -= Y;
C[3] = clipper->computeClipFlags(P[3]);
- triangle.v1 = triangle.v0;
- triangle.v2 = triangle.v0;
-
- triangle.v1.X += iround(16 * 0.5f * pSize);
- triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
-
Polygon polygon(P, 4);
if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
@@ -2030,6 +2020,11 @@
}
}
+ triangle.v1 = triangle.v0;
+ triangle.v2 = triangle.v0;
+
+ triangle.v1.X += iround(16 * 0.5f * pSize);
+ triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
return setupRoutine(&primitive, &triangle, &polygon, &data);
}
@@ -2855,10 +2850,13 @@
CPUID::setEnableSSE2(configuration.enableSSE2);
CPUID::setEnableSSE(configuration.enableSSE);
- for(int pass = 0; pass < 10; pass++)
+ rr::Config::Edit cfg;
+ cfg.clearOptimizationPasses();
+ for(auto pass : configuration.optimization)
{
- optimization[pass] = configuration.optimization[pass];
+ if (pass != rr::Optimization::Pass::Disabled) { cfg.add(pass); }
}
+ rr::Nucleus::adjustDefaultConfig(cfg);
forceWindowed = configuration.forceWindowed;
complementaryDepthBuffer = configuration.complementaryDepthBuffer;
diff --git a/src/Renderer/Renderer.hpp b/src/Renderer/Renderer.hpp
index 1118c59..4ed11f6 100644
--- a/src/Renderer/Renderer.hpp
+++ b/src/Renderer/Renderer.hpp
@@ -458,9 +458,9 @@
SetupProcessor::State setupState;
PixelProcessor::State pixelState;
- Routine *vertexRoutine;
- Routine *setupRoutine;
- Routine *pixelRoutine;
+ std::shared_ptr<Routine> vertexRoutine;
+ std::shared_ptr<Routine> setupRoutine;
+ std::shared_ptr<Routine> pixelRoutine;
};
struct DrawCall
@@ -472,9 +472,9 @@
AtomicInt drawType;
AtomicInt batchSize;
- Routine *vertexRoutine;
- Routine *setupRoutine;
- Routine *pixelRoutine;
+ std::shared_ptr<Routine> vertexRoutine;
+ std::shared_ptr<Routine> setupRoutine;
+ std::shared_ptr<Routine> pixelRoutine;
VertexProcessor::RoutinePointer vertexPointer;
SetupProcessor::RoutinePointer setupPointer;
diff --git a/src/Renderer/RoutineCache.hpp b/src/Renderer/RoutineCache.hpp
index 8420468..61f635a 100644
--- a/src/Renderer/RoutineCache.hpp
+++ b/src/Renderer/RoutineCache.hpp
@@ -24,7 +24,7 @@
using namespace rr;
template<class State>
- using RoutineCache = LRUCache<State, Routine>;
+ using RoutineCache = LRUCache<State, std::shared_ptr<Routine>>;
}
#endif // sw_RoutineCache_hpp
diff --git a/src/Renderer/SetupProcessor.cpp b/src/Renderer/SetupProcessor.cpp
index 7211406..d8b9b91 100644
--- a/src/Renderer/SetupProcessor.cpp
+++ b/src/Renderer/SetupProcessor.cpp
@@ -223,9 +223,9 @@
return state;
}
- Routine *SetupProcessor::routine(const State &state)
+ std::shared_ptr<Routine> SetupProcessor::routine(const State &state)
{
- Routine *routine = routineCache->query(state);
+ auto routine = routineCache->query(state);
if(!routine)
{
diff --git a/src/Renderer/SetupProcessor.hpp b/src/Renderer/SetupProcessor.hpp
index be0adc7..de12afd 100644
--- a/src/Renderer/SetupProcessor.hpp
+++ b/src/Renderer/SetupProcessor.hpp
@@ -91,7 +91,7 @@
protected:
State update() const;
- Routine *routine(const State &state);
+ std::shared_ptr<Routine> routine(const State &state);
void setRoutineCacheSize(int cacheSize);
diff --git a/src/Renderer/VertexProcessor.cpp b/src/Renderer/VertexProcessor.cpp
index 463393a..9bd786e 100644
--- a/src/Renderer/VertexProcessor.cpp
+++ b/src/Renderer/VertexProcessor.cpp
@@ -1088,9 +1088,9 @@
return state;
}
- Routine *VertexProcessor::routine(const State &state)
+ std::shared_ptr<Routine> VertexProcessor::routine(const State &state)
{
- Routine *routine = routineCache->query(state);
+ auto routine = routineCache->query(state);
if(!routine) // Create one
{
diff --git a/src/Renderer/VertexProcessor.hpp b/src/Renderer/VertexProcessor.hpp
index b53263b..329bdac 100644
--- a/src/Renderer/VertexProcessor.hpp
+++ b/src/Renderer/VertexProcessor.hpp
@@ -284,7 +284,7 @@
const Matrix &getViewTransform();
const State update(DrawType drawType);
- Routine *routine(const State &state);
+ std::shared_ptr<Routine> routine(const State &state);
bool isFixedFunction();
void setRoutineCacheSize(int cacheSize);
diff --git a/src/Shader/SetupRoutine.cpp b/src/Shader/SetupRoutine.cpp
index 6024869..4f2955c 100644
--- a/src/Shader/SetupRoutine.cpp
+++ b/src/Shader/SetupRoutine.cpp
@@ -665,7 +665,7 @@
#endif
}
- Routine *SetupRoutine::getRoutine()
+ std::shared_ptr<Routine> SetupRoutine::getRoutine()
{
return routine;
}
diff --git a/src/Shader/SetupRoutine.hpp b/src/Shader/SetupRoutine.hpp
index c1c3205..0f34249 100644
--- a/src/Shader/SetupRoutine.hpp
+++ b/src/Shader/SetupRoutine.hpp
@@ -30,7 +30,7 @@
virtual ~SetupRoutine();
void generate();
- Routine *getRoutine();
+ std::shared_ptr<Routine> getRoutine();
private:
void setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flatShading, bool sprite, bool perspective, bool wrap, int component);
@@ -40,7 +40,7 @@
const SetupProcessor::State &state;
- Routine *routine;
+ std::shared_ptr<Routine> routine;
};
}
diff --git a/src/SwiftShader/SwiftShader.vcxproj b/src/SwiftShader/SwiftShader.vcxproj
index c772c28..78b8925 100644
--- a/src/SwiftShader/SwiftShader.vcxproj
+++ b/src/SwiftShader/SwiftShader.vcxproj
@@ -131,7 +131,7 @@
<BrowseInformation>true</BrowseInformation>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
<TreatWarningAsError>true</TreatWarningAsError>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
@@ -167,7 +167,7 @@
<BrowseInformation>true</BrowseInformation>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
<TreatWarningAsError>true</TreatWarningAsError>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
@@ -200,7 +200,7 @@
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>
</DebugInformationFormat>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
<EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
<StringPooling>true</StringPooling>
@@ -236,7 +236,7 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
<EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
<StringPooling>true</StringPooling>
@@ -276,7 +276,7 @@
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>
</DebugInformationFormat>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
<EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
<StringPooling>true</StringPooling>
@@ -315,7 +315,7 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- <DisableSpecificWarnings>5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
+ <DisableSpecificWarnings>4267;5030;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<ForcedIncludeFiles>%(ForcedIncludeFiles)</ForcedIncludeFiles>
<EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
<StringPooling>true</StringPooling>
diff --git a/src/System/Memory.cpp b/src/System/Memory.cpp
index 663732f..e045254 100644
--- a/src/System/Memory.cpp
+++ b/src/System/Memory.cpp
@@ -31,6 +31,7 @@
#endif
#include <cstring>
+#include <cstdlib>
#undef allocate
#undef deallocate
@@ -70,7 +71,7 @@
return allocation;
}
#else
- unsigned char *block = new unsigned char[bytes + sizeof(Allocation) + alignment];
+ unsigned char *block = (unsigned char*)malloc(bytes + sizeof(Allocation) + alignment);
unsigned char *aligned = nullptr;
if(block)
@@ -127,7 +128,7 @@
unsigned char *aligned = (unsigned char*)memory;
Allocation *allocation = (Allocation*)(aligned - sizeof(Allocation));
- delete[] allocation->block;
+ free(allocation->block);
}
#endif
}
diff --git a/src/Vulkan/BUILD.gn b/src/Vulkan/BUILD.gn
index 6aeaec8..3fd343f 100644
--- a/src/Vulkan/BUILD.gn
+++ b/src/Vulkan/BUILD.gn
@@ -108,7 +108,13 @@
output_name = "libvulkan"
output_dir = "$root_out_dir/swiftshader"
- if (is_linux) {
+ if (is_mac) {
+ ldflags = [
+ "-Wl,-install_name,@rpath/libvk_swiftshader.dylib",
+ "-Wl,-exported_symbols_list," +
+ rebase_path("libvk_swiftshader.exports", root_build_dir),
+ ]
+ } else if (is_linux) {
inputs = [
"libvk_swiftshader.lds",
]
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index 7c4edac..d787886 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -1491,7 +1491,7 @@
for(uint32_t i = 0; i < rangeCount; i++)
{
- addCommand<ClearColorImage>(image, pColor[i], pRanges[i]);
+ addCommand<ClearColorImage>(image, *pColor, pRanges[i]);
}
}
@@ -1502,7 +1502,7 @@
for(uint32_t i = 0; i < rangeCount; i++)
{
- addCommand<ClearDepthStencilImage>(image, pDepthStencil[i], pRanges[i]);
+ addCommand<ClearDepthStencilImage>(image, *pDepthStencil, pRanges[i]);
}
}
diff --git a/src/Vulkan/VkConfig.h b/src/Vulkan/VkConfig.h
index adaa353..157f34e 100644
--- a/src/Vulkan/VkConfig.h
+++ b/src/Vulkan/VkConfig.h
@@ -17,8 +17,6 @@
#include "Version.h"
-#include "Reactor/Nucleus.hpp" // ReactorOptimizationLevel
-
#include <Vulkan/VulkanPlatform.h>
namespace vk
@@ -79,9 +77,6 @@
MAX_POINT_SIZE = 1, // Large points are not supported. If/when we turn this on, must be >= 64.
};
-// Optimization level to use for JIT functions.
-static constexpr auto ReactorOptimizationLevel = rr::OptimizationLevel::Default;
-
}
#endif // VK_CONFIG_HPP_
diff --git a/src/Vulkan/VkDescriptorPool.cpp b/src/Vulkan/VkDescriptorPool.cpp
index 18a9d16..79b46cc 100644
--- a/src/Vulkan/VkDescriptorPool.cpp
+++ b/src/Vulkan/VkDescriptorPool.cpp
@@ -51,12 +51,12 @@
size_t DescriptorPool::ComputeRequiredAllocationSize(const VkDescriptorPoolCreateInfo* pCreateInfo)
{
- size_t size = pCreateInfo->maxSets * sizeof(DescriptorSetHeader);
+ size_t size = pCreateInfo->maxSets * sw::align(sizeof(DescriptorSetHeader), 16);
for(uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++)
{
size += pCreateInfo->pPoolSizes[i].descriptorCount *
- DescriptorSetLayout::GetDescriptorSize(pCreateInfo->pPoolSizes[i].type);
+ sw::align(DescriptorSetLayout::GetDescriptorSize(pCreateInfo->pPoolSizes[i].type), 16);
}
return size;
@@ -101,7 +101,7 @@
}
// Second, look for space at the beginning of the pool
- const auto itBegin = nodes.end();
+ const auto itBegin = nodes.begin();
freeSpace = itBegin->set - pool;
if(freeSpace >= size)
{
@@ -215,7 +215,7 @@
totalFreeSize += poolSize - (itLast->set - pool) + itLast->size;
// Compute space at the beginning of the pool
- const auto itBegin = nodes.end();
+ const auto itBegin = nodes.begin();
totalFreeSize += itBegin->set - pool;
// Finally, look between existing pool items
diff --git a/src/Vulkan/VkDescriptorSet.hpp b/src/Vulkan/VkDescriptorSet.hpp
index a733a5b..fc50148 100644
--- a/src/Vulkan/VkDescriptorSet.hpp
+++ b/src/Vulkan/VkDescriptorSet.hpp
@@ -29,7 +29,7 @@
DescriptorSetLayout* layout;
};
- class DescriptorSet
+ class alignas(16) DescriptorSet
{
public:
static inline DescriptorSet* Cast(VkDescriptorSet object)
diff --git a/src/Vulkan/VkDescriptorSetLayout.cpp b/src/Vulkan/VkDescriptorSetLayout.cpp
index 65e625e..e4d87ce 100644
--- a/src/Vulkan/VkDescriptorSetLayout.cpp
+++ b/src/Vulkan/VkDescriptorSetLayout.cpp
@@ -269,7 +269,7 @@
}
}
-void DescriptorSetLayout::WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src)
+void DescriptorSetLayout::WriteDescriptorSet(Device* device, DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src)
{
DescriptorSetLayout* dstLayout = dstSet->header.layout;
auto &binding = dstLayout->bindings[dstLayout->getBindingIndex(entry.dstBinding)];
@@ -294,6 +294,7 @@
{
imageSampler[i].updateSampler(vk::Cast(update->sampler));
}
+ imageSampler[i].device = device;
}
}
else if (entry.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
@@ -319,6 +320,7 @@
imageSampler[i].texture.width = sw::replicate(static_cast<float>(numElements));
imageSampler[i].texture.height = sw::replicate(1);
imageSampler[i].texture.depth = sw::replicate(1);
+ imageSampler[i].device = device;
sw::Mipmap &mipmap = imageSampler[i].texture.mipmap[0];
mipmap.buffer = bufferView->getPointer();
@@ -360,6 +362,7 @@
imageSampler[i].type = imageView->getType();
imageSampler[i].swizzle = imageView->getComponentMapping();
imageSampler[i].format = format;
+ imageSampler[i].device = device;
auto &subresourceRange = imageView->getSubresourceRange();
@@ -572,7 +575,7 @@
mipmap.sliceP[3] = sliceP;
}
-void DescriptorSetLayout::WriteDescriptorSet(const VkWriteDescriptorSet& writeDescriptorSet)
+void DescriptorSetLayout::WriteDescriptorSet(Device* device, const VkWriteDescriptorSet& writeDescriptorSet)
{
DescriptorSet* dstSet = vk::Cast(writeDescriptorSet.dstSet);
VkDescriptorUpdateTemplateEntry e;
@@ -611,7 +614,7 @@
UNIMPLEMENTED("descriptor type %u", writeDescriptorSet.descriptorType);
}
- WriteDescriptorSet(dstSet, e, reinterpret_cast<char const *>(ptr));
+ WriteDescriptorSet(device, dstSet, e, reinterpret_cast<char const *>(ptr));
}
void DescriptorSetLayout::CopyDescriptorSet(const VkCopyDescriptorSet& descriptorCopies)
diff --git a/src/Vulkan/VkDescriptorSetLayout.hpp b/src/Vulkan/VkDescriptorSetLayout.hpp
index 44ac8f2..73535e8 100644
--- a/src/Vulkan/VkDescriptorSetLayout.hpp
+++ b/src/Vulkan/VkDescriptorSetLayout.hpp
@@ -25,6 +25,7 @@
{
class DescriptorSet;
+class Device;
// TODO(b/129523279): Move to the Device or Pipeline layer.
struct alignas(16) SampledImageDescriptor
@@ -35,6 +36,7 @@
// TODO(b/129523279): Minimize to the data actually needed.
vk::Sampler sampler;
+ vk::Device* device;
uint32_t imageViewId;
VkImageViewType type;
@@ -84,10 +86,10 @@
static size_t ComputeRequiredAllocationSize(const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
static size_t GetDescriptorSize(VkDescriptorType type);
- static void WriteDescriptorSet(const VkWriteDescriptorSet& descriptorWrites);
+ static void WriteDescriptorSet(Device* device, const VkWriteDescriptorSet& descriptorWrites);
static void CopyDescriptorSet(const VkCopyDescriptorSet& descriptorCopies);
- static void WriteDescriptorSet(DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src);
+ static void WriteDescriptorSet(Device* device, DescriptorSet *dstSet, VkDescriptorUpdateTemplateEntry const &entry, char const *src);
static void WriteTextureLevelInfo(sw::Texture *texture, int level, int width, int height, int depth, int pitchP, int sliceP);
void initialize(DescriptorSet* descriptorSet);
diff --git a/src/Vulkan/VkDescriptorUpdateTemplate.cpp b/src/Vulkan/VkDescriptorUpdateTemplate.cpp
index 76acbe7..e70ad73 100644
--- a/src/Vulkan/VkDescriptorUpdateTemplate.cpp
+++ b/src/Vulkan/VkDescriptorUpdateTemplate.cpp
@@ -35,14 +35,14 @@
return info->descriptorUpdateEntryCount * sizeof(VkDescriptorUpdateTemplateEntry);
}
- void DescriptorUpdateTemplate::updateDescriptorSet(VkDescriptorSet vkDescriptorSet, const void* pData)
+ void DescriptorUpdateTemplate::updateDescriptorSet(Device* device, VkDescriptorSet vkDescriptorSet, const void* pData)
{
DescriptorSet* descriptorSet = vk::Cast(vkDescriptorSet);
for(uint32_t i = 0; i < descriptorUpdateEntryCount; i++)
{
- DescriptorSetLayout::WriteDescriptorSet(descriptorSet, descriptorUpdateEntries[i],
+ DescriptorSetLayout::WriteDescriptorSet(device, descriptorSet, descriptorUpdateEntries[i],
reinterpret_cast<char const *>(pData));
}
}
diff --git a/src/Vulkan/VkDescriptorUpdateTemplate.hpp b/src/Vulkan/VkDescriptorUpdateTemplate.hpp
index 7f0e5be..90a8b96 100644
--- a/src/Vulkan/VkDescriptorUpdateTemplate.hpp
+++ b/src/Vulkan/VkDescriptorUpdateTemplate.hpp
@@ -20,6 +20,7 @@
namespace vk
{
class DescriptorSetLayout;
+ class Device;
class DescriptorUpdateTemplate : public Object<DescriptorUpdateTemplate, VkDescriptorUpdateTemplate>
{
@@ -28,7 +29,7 @@
static size_t ComputeRequiredAllocationSize(const VkDescriptorUpdateTemplateCreateInfo* info);
- void updateDescriptorSet(VkDescriptorSet descriptorSet, const void* pData);
+ void updateDescriptorSet(Device* device, VkDescriptorSet descriptorSet, const void* pData);
private:
uint32_t descriptorUpdateEntryCount = 0;
diff --git a/src/Vulkan/VkDevice.cpp b/src/Vulkan/VkDevice.cpp
index 6b918cd..230d2d0 100644
--- a/src/Vulkan/VkDevice.cpp
+++ b/src/Vulkan/VkDevice.cpp
@@ -36,6 +36,32 @@
namespace vk
{
+std::shared_ptr<rr::Routine> Device::SamplingRoutineCache::query(const vk::Device::SamplingRoutineCache::Key& key) const
+{
+ return cache.query(hash(key));
+}
+
+void Device::SamplingRoutineCache::add(const vk::Device::SamplingRoutineCache::Key& key, const std::shared_ptr<rr::Routine>& routine)
+{
+ ASSERT(routine);
+ cache.add(hash(key), routine);
+}
+
+std::shared_ptr<rr::Routine> Device::SamplingRoutineCache::queryConst(const vk::Device::SamplingRoutineCache::Key& key) const
+{
+ return cache.queryConstCache(hash(key));
+}
+
+void Device::SamplingRoutineCache::updateConstCache()
+{
+ cache.updateConstCache();
+}
+
+std::size_t Device::SamplingRoutineCache::hash(const vk::Device::SamplingRoutineCache::Key &key)
+{
+ return (key.instruction << 16) ^ (key.sampler << 8) ^ key.imageView;
+}
+
Device::Device(const VkDeviceCreateInfo* pCreateInfo, void* mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures)
: physicalDevice(physicalDevice),
queues(reinterpret_cast<Queue*>(mem)),
@@ -55,7 +81,7 @@
for(uint32_t j = 0; j < queueCreateInfo.queueCount; j++, queueID++)
{
- new (&queues[queueID]) Queue();
+ new (&queues[queueID]) Queue(this);
}
}
@@ -72,7 +98,8 @@
}
// FIXME (b/119409619): use an allocator here so we can control all memory allocations
- blitter = new sw::Blitter();
+ blitter.reset(new sw::Blitter());
+ samplingRoutineCache.reset(new SamplingRoutineCache());
}
void Device::destroy(const VkAllocationCallbacks* pAllocator)
@@ -83,8 +110,6 @@
}
vk::deallocate(queues, pAllocator);
-
- delete blitter;
}
size_t Device::ComputeRequiredAllocationSize(const VkDeviceCreateInfo* pCreateInfo)
@@ -212,7 +237,7 @@
{
for(uint32_t i = 0; i < descriptorWriteCount; i++)
{
- DescriptorSetLayout::WriteDescriptorSet(pDescriptorWrites[i]);
+ DescriptorSetLayout::WriteDescriptorSet(this, pDescriptorWrites[i]);
}
for(uint32_t i = 0; i < descriptorCopyCount; i++)
@@ -221,4 +246,25 @@
}
}
+Device::SamplingRoutineCache* Device::getSamplingRoutineCache() const
+{
+ return samplingRoutineCache.get();
+}
+
+std::shared_ptr<rr::Routine> Device::findInConstCache(const SamplingRoutineCache::Key& key) const
+{
+ return samplingRoutineCache->queryConst(key);
+}
+
+void Device::updateSamplingRoutineConstCache()
+{
+ std::unique_lock<std::mutex> lock(samplingRoutineCacheMutex);
+ samplingRoutineCache->updateConstCache();
+}
+
+std::mutex& Device::getSamplingRoutineCacheMutex()
+{
+ return samplingRoutineCacheMutex;
+}
+
} // namespace vk
diff --git a/src/Vulkan/VkDevice.hpp b/src/Vulkan/VkDevice.hpp
index 3e262d3..721dda2 100644
--- a/src/Vulkan/VkDevice.hpp
+++ b/src/Vulkan/VkDevice.hpp
@@ -16,6 +16,10 @@
#define VK_DEVICE_HPP_
#include "VkObject.hpp"
+#include "Device/LRUCache.hpp"
+#include "Reactor/Routine.hpp"
+#include <memory>
+#include <mutex>
namespace sw
{
@@ -48,19 +52,49 @@
void updateDescriptorSets(uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites,
uint32_t descriptorCopyCount, const VkCopyDescriptorSet* pDescriptorCopies);
const VkPhysicalDeviceFeatures &getEnabledFeatures() const { return enabledFeatures; }
- sw::Blitter* getBlitter() const { return blitter; }
+ sw::Blitter* getBlitter() const { return blitter.get(); }
+
+ class SamplingRoutineCache
+ {
+ public:
+ SamplingRoutineCache() : cache(1024) {}
+ ~SamplingRoutineCache() {}
+
+ struct Key
+ {
+ uint32_t instruction;
+ uint32_t sampler;
+ uint32_t imageView;
+ };
+
+ std::shared_ptr<rr::Routine> query(const Key& key) const;
+ void add(const Key& key, const std::shared_ptr<rr::Routine>& routine);
+
+ std::shared_ptr<rr::Routine> queryConst(const Key& key) const;
+ void updateConstCache();
+
+ static std::size_t hash(const Key &key);
+
+ private:
+ sw::LRUConstCache<std::size_t, std::shared_ptr<rr::Routine>> cache;
+ };
+
+ SamplingRoutineCache* getSamplingRoutineCache() const;
+ std::mutex& getSamplingRoutineCacheMutex();
+ std::shared_ptr<rr::Routine> findInConstCache(const SamplingRoutineCache::Key& key) const;
+ void updateSamplingRoutineConstCache();
private:
PhysicalDevice *const physicalDevice = nullptr;
Queue *const queues = nullptr;
uint32_t queueCount = 0;
-
- const uint32_t enabledExtensionCount = 0;
+ std::unique_ptr<sw::Blitter> blitter;
+ std::unique_ptr<SamplingRoutineCache> samplingRoutineCache;
+ std::mutex samplingRoutineCacheMutex;
+ uint32_t enabledExtensionCount = 0;
typedef char ExtensionName[VK_MAX_EXTENSION_NAME_SIZE];
ExtensionName* extensions = nullptr;
const VkPhysicalDeviceFeatures enabledFeatures = {};
-
- sw::Blitter* blitter = nullptr;
};
using DispatchableDevice = DispatchableObject<Device, VkDevice>;
diff --git a/src/Vulkan/VkGetProcAddress.cpp b/src/Vulkan/VkGetProcAddress.cpp
index 4bfb07e..c132e7f 100644
--- a/src/Vulkan/VkGetProcAddress.cpp
+++ b/src/Vulkan/VkGetProcAddress.cpp
@@ -93,6 +93,10 @@
MAKE_VULKAN_INSTANCE_ENTRY(vkCreateXlibSurfaceKHR),
MAKE_VULKAN_INSTANCE_ENTRY(vkGetPhysicalDeviceXlibPresentationSupportKHR),
#endif
+#ifdef VK_USE_PLATFORM_MACOS_MVK
+ // VK_MVK_macos_surface
+ MAKE_VULKAN_INSTANCE_ENTRY(vkCreateMacOSSurfaceMVK),
+#endif
};
#undef MAKE_VULKAN_INSTANCE_ENTRY
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index ba1b8db..e6bb5be 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -262,7 +262,7 @@
// TODO(b/119409619): use allocator.
auto program = std::make_shared<sw::ComputeProgram>(key.getShader(), key.getLayout(), descriptorSets);
program->generate();
- program->finalize(vk::ReactorOptimizationLevel);
+ program->finalize();
return program;
}
diff --git a/src/Vulkan/VkPipelineLayout.cpp b/src/Vulkan/VkPipelineLayout.cpp
index cd47bab..da0d3ae 100644
--- a/src/Vulkan/VkPipelineLayout.cpp
+++ b/src/Vulkan/VkPipelineLayout.cpp
@@ -40,9 +40,10 @@
uint32_t dynamicOffsetBase = 0;
for (uint32_t i = 0; i < setLayoutCount; i++)
{
- ASSERT_OR_RETURN(dynamicOffsetBase < MAX_DESCRIPTOR_SET_COMBINED_BUFFERS_DYNAMIC);
- dynamicOffsetBases[i] = dynamicOffsetBase;
- dynamicOffsetBase += setLayouts[i]->getDynamicDescriptorCount();
+ uint32_t dynamicDescriptorCount = setLayouts[i]->getDynamicDescriptorCount();
+ ASSERT_OR_RETURN((dynamicOffsetBase + dynamicDescriptorCount) <= MAX_DESCRIPTOR_SET_COMBINED_BUFFERS_DYNAMIC);
+ dynamicOffsetBases[i] = dynamicOffsetBase;
+ dynamicOffsetBase += dynamicDescriptorCount;
}
}
diff --git a/src/Vulkan/VkQueue.cpp b/src/Vulkan/VkQueue.cpp
index 4c03198..3aee60a 100644
--- a/src/Vulkan/VkQueue.cpp
+++ b/src/Vulkan/VkQueue.cpp
@@ -74,7 +74,7 @@
namespace vk
{
-Queue::Queue() : renderer()
+Queue::Queue(Device* device) : renderer(device)
{
queueThread = std::thread(TaskLoop, this);
}
diff --git a/src/Vulkan/VkQueue.hpp b/src/Vulkan/VkQueue.hpp
index cfa462b..2926e10 100644
--- a/src/Vulkan/VkQueue.hpp
+++ b/src/Vulkan/VkQueue.hpp
@@ -31,6 +31,7 @@
namespace vk
{
+class Device;
class Fence;
class Queue
@@ -38,7 +39,7 @@
VK_LOADER_DATA loaderData = { ICD_LOADER_MAGIC };
public:
- Queue();
+ Queue(Device* device);
~Queue();
operator VkQueue()
diff --git a/src/Vulkan/libVulkan.cpp b/src/Vulkan/libVulkan.cpp
index fc1e210..ef70b3f 100644
--- a/src/Vulkan/libVulkan.cpp
+++ b/src/Vulkan/libVulkan.cpp
@@ -42,6 +42,10 @@
#include "VkShaderModule.hpp"
#include "VkRenderPass.hpp"
+#ifdef VK_USE_PLATFORM_MACOS_MVK
+#include "WSI/MacOSSurfaceMVK.h"
+#endif
+
#ifdef VK_USE_PLATFORM_XLIB_KHR
#include "WSI/XlibSurfaceKHR.hpp"
#endif
@@ -54,6 +58,8 @@
#include "WSI/VkSwapchainKHR.hpp"
+#include "Reactor/Nucleus.hpp"
+
#include <algorithm>
#include <cstring>
#include <string>
@@ -75,6 +81,34 @@
return false;
}
+// setReactorDefaultConfig() sets the default configuration for Vulkan's use of
+// Reactor.
+void setReactorDefaultConfig()
+{
+ auto cfg = rr::Config::Edit()
+ .set(rr::Optimization::Level::Default)
+ .clearOptimizationPasses()
+ .add(rr::Optimization::Pass::ScalarReplAggregates)
+ .add(rr::Optimization::Pass::SCCP)
+ .add(rr::Optimization::Pass::CFGSimplification)
+ .add(rr::Optimization::Pass::EarlyCSEPass)
+ .add(rr::Optimization::Pass::CFGSimplification)
+ .add(rr::Optimization::Pass::InstructionCombining);
+
+ rr::Nucleus::adjustDefaultConfig(cfg);
+}
+
+// initializeLibrary() is called by vkCreateInstance() to perform one-off global
+// initialization of the swiftshader driver.
+void initializeLibrary()
+{
+ static bool doOnce = [] {
+ setReactorDefaultConfig();
+ return true;
+ }();
+ (void)doOnce;
+}
+
}
extern "C"
@@ -105,6 +139,9 @@
#ifdef VK_USE_PLATFORM_XLIB_KHR
{ VK_KHR_XLIB_SURFACE_EXTENSION_NAME, VK_KHR_XLIB_SURFACE_SPEC_VERSION },
#endif
+#ifdef VK_USE_PLATFORM_MACOS_MVK
+ { VK_MVK_MACOS_SURFACE_EXTENSION_NAME, VK_MVK_MACOS_SURFACE_SPEC_VERSION },
+#endif
};
static const VkExtensionProperties deviceExtensionProperties[] =
@@ -139,6 +176,8 @@
TRACE("(const VkInstanceCreateInfo* pCreateInfo = %p, const VkAllocationCallbacks* pAllocator = %p, VkInstance* pInstance = %p)",
pCreateInfo, pAllocator, pInstance);
+ initializeLibrary();
+
if(pCreateInfo->enabledLayerCount)
{
UNIMPLEMENTED("pCreateInfo->enabledLayerCount");
@@ -2526,7 +2565,7 @@
TRACE("(VkDevice device = %p, VkDescriptorSet descriptorSet = %p, VkDescriptorUpdateTemplate descriptorUpdateTemplate = %p, const void* pData = %p)",
device, static_cast<void*>(descriptorSet), static_cast<void*>(descriptorUpdateTemplate), pData);
- vk::Cast(descriptorUpdateTemplate)->updateDescriptorSet(descriptorSet, pData);
+ vk::Cast(descriptorUpdateTemplate)->updateDescriptorSet(vk::Cast(device), descriptorSet, pData);
}
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfo* pExternalBufferInfo, VkExternalBufferProperties* pExternalBufferProperties)
@@ -2579,6 +2618,16 @@
}
#endif
+#ifdef VK_USE_PLATFORM_MACOS_MVK
+VKAPI_ATTR VkResult VKAPI_CALL vkCreateMacOSSurfaceMVK(VkInstance instance, const VkMacOSSurfaceCreateInfoMVK* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface)
+{
+ TRACE("(VkInstance instance = %p, VkMacOSSurfaceCreateInfoMVK* pCreateInfo = %p, VkAllocationCallbacks* pAllocator = %p, VkSurface* pSurface = %p)",
+ instance, pCreateInfo, pAllocator, pSurface);
+
+ return vk::MacOSSurfaceMVK::Create(pAllocator, pCreateInfo, pSurface);
+}
+#endif
+
#ifndef __ANDROID__
VKAPI_ATTR void VKAPI_CALL vkDestroySurfaceKHR(VkInstance instance, VkSurfaceKHR surface, const VkAllocationCallbacks* pAllocator)
{
diff --git a/src/Vulkan/vulkan.vcxproj b/src/Vulkan/vulkan.vcxproj
index 9da32ff..e6c5020 100644
--- a/src/Vulkan/vulkan.vcxproj
+++ b/src/Vulkan/vulkan.vcxproj
@@ -67,6 +67,7 @@
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@@ -93,6 +94,7 @@
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
<TreatSpecificWarningsAsErrors>4018;5038;4838</TreatSpecificWarningsAsErrors>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<ModuleDefinitionFile>libvk_swiftshader.def</ModuleDefinitionFile>
@@ -303,4 +305,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/src/WSI/MacOSSurfaceMVK.h b/src/WSI/MacOSSurfaceMVK.h
new file mode 100644
index 0000000..7822fb6
--- /dev/null
+++ b/src/WSI/MacOSSurfaceMVK.h
@@ -0,0 +1,45 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef SWIFTSHADER_MACOSSURFACEMVK_HPP
+#define SWIFTSHADER_MACOSSURFACEMVK_HPP
+
+#include "Vulkan/VkObject.hpp"
+#include "VkSurfaceKHR.hpp"
+#include "vulkan/vulkan_macos.h"
+
+namespace vk {
+
+class MetalLayer;
+
+class MacOSSurfaceMVK : public SurfaceKHR, public ObjectBase<MacOSSurfaceMVK, VkSurfaceKHR> {
+public:
+ MacOSSurfaceMVK(const VkMacOSSurfaceCreateInfoMVK *pCreateInfo, void *mem);
+
+ void destroySurface(const VkAllocationCallbacks *pAllocator) override;
+
+ static size_t ComputeRequiredAllocationSize(const VkMacOSSurfaceCreateInfoMVK *pCreateInfo);
+
+ void getSurfaceCapabilities(VkSurfaceCapabilitiesKHR *pSurfaceCapabilities) const override;
+
+ virtual void attachImage(PresentImage* image) override {}
+ virtual void detachImage(PresentImage* image) override {}
+ void present(PresentImage* image) override;
+
+private:
+ MetalLayer* metalLayer = nullptr;
+};
+
+}
+#endif //SWIFTSHADER_MACOSSURFACEMVK_HPP
diff --git a/src/WSI/MacOSSurfaceMVK.mm b/src/WSI/MacOSSurfaceMVK.mm
new file mode 100644
index 0000000..090cc20
--- /dev/null
+++ b/src/WSI/MacOSSurfaceMVK.mm
@@ -0,0 +1,144 @@
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "MacOSSurfaceMVK.h"
+#include "Vulkan/VkDeviceMemory.hpp"
+#include "Vulkan/VkImage.hpp"
+
+#include <Metal/Metal.h>
+#include <QuartzCore/CAMetalLayer.h>
+#include <AppKit/NSView.h>
+
+namespace vk {
+
+class MetalLayer
+{
+public:
+ void init(const void* pView)
+ {
+ view = nullptr;
+ layer = nullptr;
+
+ id<NSObject> obj = (id<NSObject>)pView;
+
+ if([obj isKindOfClass: [NSView class]])
+ {
+ if(!NSThread.isMainThread)
+ {
+ UNREACHABLE("MetalLayer::init(): not called from main thread");
+ }
+ view = (NSView*)[obj retain];
+
+ obj = view.layer;
+ if ([obj isKindOfClass: [CAMetalLayer class]])
+ {
+ layer = (CAMetalLayer*)[obj retain];
+ }
+ else
+ {
+ UNREACHABLE("MetalLayer::init(): view doesn't have metal backed layer");
+ }
+ }
+ }
+
+ void release()
+ {
+ if(layer)
+ {
+ [layer release];
+ }
+
+ if(view)
+ {
+ [view release];
+ }
+ }
+
+ VkExtent2D getExtent() const
+ {
+ if(layer)
+ {
+ CGSize drawSize = layer.bounds.size;
+ CGFloat scaleFactor = layer.contentsScale;
+ drawSize.width = trunc(drawSize.width * scaleFactor);
+ drawSize.height = trunc(drawSize.height * scaleFactor);
+ return { static_cast<uint32_t>(drawSize.width), static_cast<uint32_t>(drawSize.height) };
+ }
+ else
+ {
+ return { 0, 0 };
+ }
+ }
+
+ id<CAMetalDrawable> getNextDrawable() const
+ {
+ if(layer)
+ {
+ return [layer nextDrawable];
+ }
+
+ return nil;
+ }
+
+private:
+ NSView* view;
+ CAMetalLayer* layer;
+};
+
+MacOSSurfaceMVK::MacOSSurfaceMVK(const VkMacOSSurfaceCreateInfoMVK *pCreateInfo, void *mem) :
+ metalLayer(reinterpret_cast<MetalLayer*>(mem))
+{
+ metalLayer->init(pCreateInfo->pView);
+}
+
+void MacOSSurfaceMVK::destroySurface(const VkAllocationCallbacks *pAllocator)
+{
+ if(metalLayer)
+ {
+ metalLayer->release();
+ }
+
+ vk::deallocate(metalLayer, pAllocator);
+}
+
+size_t MacOSSurfaceMVK::ComputeRequiredAllocationSize(const VkMacOSSurfaceCreateInfoMVK *pCreateInfo)
+{
+ return sizeof(MetalLayer);
+}
+
+void MacOSSurfaceMVK::getSurfaceCapabilities(VkSurfaceCapabilitiesKHR *pSurfaceCapabilities) const
+{
+ SurfaceKHR::getSurfaceCapabilities(pSurfaceCapabilities);
+
+ VkExtent2D extent = metalLayer->getExtent();
+ pSurfaceCapabilities->currentExtent = extent;
+ pSurfaceCapabilities->minImageExtent = extent;
+ pSurfaceCapabilities->maxImageExtent = extent;
+}
+
+void MacOSSurfaceMVK::present(PresentImage* image)
+{
+ auto drawable = metalLayer->getNextDrawable();
+ if(drawable)
+ {
+ VkExtent3D extent = image->getImage()->getMipLevelExtent(VK_IMAGE_ASPECT_COLOR_BIT, 0);
+ [drawable.texture replaceRegion:MTLRegionMake2D(0, 0, extent.width, extent.height)
+ mipmapLevel:0
+ withBytes:image->getImageMemory()->getOffsetPointer(0)
+ bytesPerRow:image->getImage()->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0)];
+ [drawable present];
+ }
+}
+
+}
diff --git a/src/WSI/VkSurfaceKHR.cpp b/src/WSI/VkSurfaceKHR.cpp
index 92fd426..f704ceb 100644
--- a/src/WSI/VkSurfaceKHR.cpp
+++ b/src/WSI/VkSurfaceKHR.cpp
@@ -24,6 +24,7 @@
static const VkSurfaceFormatKHR surfaceFormats[] =
{
{VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR},
+ {VK_FORMAT_B8G8R8A8_SRGB, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR},
};
static const VkPresentModeKHR presentModes[] =
diff --git a/tests/GLESUnitTests/unittests.cpp b/tests/GLESUnitTests/unittests.cpp
index db4de08..7951e27 100644
--- a/tests/GLESUnitTests/unittests.cpp
+++ b/tests/GLESUnitTests/unittests.cpp
@@ -2612,8 +2612,8 @@
Uninitialize();
}
-// Test IOSurface pbuffers cannot be made current
-TEST_F(IOSurfaceClientBufferTest, MakeCurrentDisallowed)
+// Test IOSurface pbuffers can be made current
+TEST_F(IOSurfaceClientBufferTest, MakeCurrentAllowed)
{
Initialize(3, false);
@@ -2623,8 +2623,8 @@
EGLSurface pbuffer = createIOSurfacePbuffer(clientBufferWrapper.getClientBuffer(), 10, 10, 0, GL_BGRA_EXT, GL_UNSIGNED_BYTE);
EGLBoolean result = eglMakeCurrent(getDisplay(), pbuffer, pbuffer, getContext());
- EXPECT_EQ((EGLBoolean)EGL_FALSE, result);
- EXPECT_EQ(EGL_BAD_SURFACE, eglGetError());
+ EXPECT_EQ((EGLBoolean)EGL_TRUE, result);
+ EXPECT_NO_EGL_ERROR();
}
Uninitialize();
diff --git a/tests/VulkanUnitTests/VulkanUnitTests.vcxproj b/tests/VulkanUnitTests/VulkanUnitTests.vcxproj
index df85535..099dc28 100644
--- a/tests/VulkanUnitTests/VulkanUnitTests.vcxproj
+++ b/tests/VulkanUnitTests/VulkanUnitTests.vcxproj
@@ -70,6 +70,7 @@
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@@ -93,6 +94,7 @@
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
diff --git a/tests/fuzzers/VertexRoutineFuzzer.cpp b/tests/fuzzers/VertexRoutineFuzzer.cpp
index d63e293..28327b1 100644
--- a/tests/fuzzers/VertexRoutineFuzzer.cpp
+++ b/tests/fuzzers/VertexRoutineFuzzer.cpp
@@ -203,11 +203,10 @@
sw::VertexProgram program(state, bytecodeShader.get());
program.generate();
- sw::Routine *routine = program("VertexRoutine");
+ auto routine = program("VertexRoutine");
assert(routine);
const void *entry = routine->getEntry();
assert(entry); (void)entry;
- delete routine;
return 0;
}
diff --git a/tests/fuzzers/VertexRoutineFuzzer.vcxproj b/tests/fuzzers/VertexRoutineFuzzer.vcxproj
index a3aec3b..1c75243 100644
--- a/tests/fuzzers/VertexRoutineFuzzer.vcxproj
+++ b/tests/fuzzers/VertexRoutineFuzzer.vcxproj
@@ -97,6 +97,7 @@
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalDependencies>WS2_32.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
@@ -151,6 +152,7 @@
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<AdditionalOptions>/permissive- %(AdditionalOptions)</AdditionalOptions>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <DisableSpecificWarnings>4267</DisableSpecificWarnings>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
diff --git a/tests/regres/main.go b/tests/regres/main.go
index 0549874..fcfeea7 100644
--- a/tests/regres/main.go
+++ b/tests/regres/main.go
@@ -993,9 +993,9 @@
continue
}
switch {
- case old.Status.Passing() && new.Status.Failing():
+ case !old.Status.Failing() && new.Status.Failing():
broken = append(broken, test)
- case old.Status.Failing() && new.Status.Passing():
+ case !old.Status.Passing() && new.Status.Passing():
fixed = append(fixed, test)
case old.Status != new.Status:
changed = append(changed, test)