Reactor: Add support for specifying and modifying default configuration settings. rr::Config holds the full reactor configuration state. rr::Config::Edit holds edits on a config, which can be applied on top of the current defaults. Default configurations are updated atomically, preventing modifications to the default state from tearing. Bug: b/137167988 Change-Id: Ib05f2cfc31ab22fb9a891a267fffe33c18691028 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33768 Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Tested-by: Ben Clayton <bclayton@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp index 2ca5f3f..e5b41fc 100644 --- a/src/Device/Blitter.cpp +++ b/src/Device/Blitter.cpp
@@ -1535,7 +1535,7 @@ } } - return function(vk::ReactorOptimizationLevel, "BlitRoutine"); + return function(vk::ReactorConfig, "BlitRoutine"); } Routine *Blitter::getBlitRoutine(const State &state) @@ -1890,7 +1890,7 @@ } } - return function(vk::ReactorOptimizationLevel, "BlitRoutine"); + return function(vk::ReactorConfig, "BlitRoutine"); } void Blitter::updateBorders(vk::Image* image, const VkImageSubresourceLayers& subresourceLayers)
diff --git a/src/Device/PixelProcessor.cpp b/src/Device/PixelProcessor.cpp index 51f4517..286880f 100644 --- a/src/Device/PixelProcessor.cpp +++ b/src/Device/PixelProcessor.cpp
@@ -238,7 +238,7 @@ { QuadRasterizer *generator = new PixelProgram(state, pipelineLayout, pixelShader, descriptorSets); generator->generate(); - routine = (*generator)(vk::ReactorOptimizationLevel, "PixelRoutine_%0.8X", state.shaderID); + routine = (*generator)(vk::ReactorConfig, "PixelRoutine_%0.8X", state.shaderID); delete generator; routineCache->add(state, routine);
diff --git a/src/Device/VertexProcessor.cpp b/src/Device/VertexProcessor.cpp index e8796c3..6dc4367 100644 --- a/src/Device/VertexProcessor.cpp +++ b/src/Device/VertexProcessor.cpp
@@ -105,7 +105,7 @@ { VertexRoutine *generator = new VertexProgram(state, pipelineLayout, vertexShader, descriptorSets); generator->generate(); - routine = (*generator)(vk::ReactorOptimizationLevel, "VertexRoutine_%0.8X", state.shaderID); + routine = (*generator)(vk::ReactorConfig, "VertexRoutine_%0.8X", state.shaderID); delete generator; routineCache->add(state, routine);
diff --git a/src/Main/SwiftConfig.cpp b/src/Main/SwiftConfig.cpp index aa17aa8..145ff5c 100644 --- a/src/Main/SwiftConfig.cpp +++ b/src/Main/SwiftConfig.cpp
@@ -401,19 +401,19 @@ html += "<h2><em>Compiler optimizations</em></h2>\n"; html += "<table>\n"; - for(int pass = 0; pass < 10; pass++) + for(size_t pass = 0; pass < config.optimization.size(); pass++) { html += "<tr><td>Optimization pass " + itoa(pass + 1) + ":</td><td><select name='optimization" + itoa(pass + 1) + "' title='An optimization pass for the shader compiler.'>\n"; - html += "<option value='0'" + (config.optimization[pass] == 0 ? selected : empty) + ">Disabled" + (pass > 0 ? " (default)" : "") + "</option>\n"; - html += "<option value='1'" + (config.optimization[pass] == 1 ? selected : empty) + ">Instruction Combining" + (pass == 0 ? " (default)" : "") + "</option>\n"; - html += "<option value='2'" + (config.optimization[pass] == 2 ? selected : empty) + ">Control Flow Simplification</option>\n"; - html += "<option value='3'" + (config.optimization[pass] == 3 ? selected : empty) + ">Loop Invariant Code Motion</option>\n"; - html += "<option value='4'" + (config.optimization[pass] == 4 ? selected : empty) + ">Aggressive Dead Code Elimination</option>\n"; - html += "<option value='5'" + (config.optimization[pass] == 5 ? selected : empty) + ">Global Value Numbering</option>\n"; - html += "<option value='6'" + (config.optimization[pass] == 6 ? selected : empty) + ">Commutative Expressions Reassociation</option>\n"; - html += "<option value='7'" + (config.optimization[pass] == 7 ? selected : empty) + ">Dead Store Elimination</option>\n"; - html += "<option value='8'" + (config.optimization[pass] == 8 ? selected : empty) + ">Sparse Conditional Copy Propagation</option>\n"; - html += "<option value='9'" + (config.optimization[pass] == 9 ? selected : empty) + ">Scalar Replacement of Aggregates</option>\n"; + html += "<option value='0'" + (config.optimization[pass] == rr::Optimization::Pass::Disabled ? selected : empty) + ">Disabled" + (pass > 0 ? " (default)" : "") + "</option>\n"; + html += "<option value='1'" + (config.optimization[pass] == rr::Optimization::Pass::InstructionCombining ? selected : empty) + ">Instruction Combining" + (pass == 0 ? " (default)" : "") + "</option>\n"; + html += "<option value='2'" + (config.optimization[pass] == rr::Optimization::Pass::CFGSimplification ? selected : empty) + ">Control Flow Simplification</option>\n"; + html += "<option value='3'" + (config.optimization[pass] == rr::Optimization::Pass::LICM ? selected : empty) + ">Loop Invariant Code Motion</option>\n"; + html += "<option value='4'" + (config.optimization[pass] == rr::Optimization::Pass::AggressiveDCE ? selected : empty) + ">Aggressive Dead Code Elimination</option>\n"; + html += "<option value='5'" + (config.optimization[pass] == rr::Optimization::Pass::GVN ? selected : empty) + ">Global Value Numbering</option>\n"; + html += "<option value='6'" + (config.optimization[pass] == rr::Optimization::Pass::Reassociate ? selected : empty) + ">Commutative Expressions Reassociation</option>\n"; + html += "<option value='7'" + (config.optimization[pass] == rr::Optimization::Pass::DeadStoreElimination ? selected : empty) + ">Dead Store Elimination</option>\n"; + html += "<option value='8'" + (config.optimization[pass] == rr::Optimization::Pass::SCCP ? selected : empty) + ">Sparse Conditional Copy Propagation</option>\n"; + html += "<option value='9'" + (config.optimization[pass] == rr::Optimization::Pass::ScalarReplAggregates ? selected : empty) + ">Scalar Replacement of Aggregates</option>\n"; html += "</select></td></tr>\n"; } @@ -652,7 +652,7 @@ } else if(sscanf(post, "optimization%d=%d", &index, &integer)) { - config.optimization[index - 1] = (rr::Optimization)integer; + config.optimization[index - 1] = (rr::Optimization::Pass)integer; } else if(strstr(post, "disableServer=on")) { @@ -737,9 +737,10 @@ config.enableSSSE3 = ini.getBoolean("Processor", "EnableSSSE3", true); config.enableSSE4_1 = ini.getBoolean("Processor", "EnableSSE4_1", true); - for(int pass = 0; pass < 10; pass++) + for(size_t pass = 0; pass < config.optimization.size(); pass++) { - config.optimization[pass] = (rr::Optimization)ini.getInteger("Optimization", "OptimizationPass" + itoa(pass + 1), pass == 0 ? rr::InstructionCombining : rr::Disabled); + auto def = pass == 0 ? rr::Optimization::Pass::InstructionCombining : rr::Optimization::Pass::Disabled; + config.optimization[pass] = (rr::Optimization::Pass)ini.getInteger("Optimization", "OptimizationPass" + itoa(pass + 1), (int)def); } config.disableServer = ini.getBoolean("Testing", "DisableServer", false); @@ -795,9 +796,9 @@ ini.addValue("Processor", "EnableSSSE3", itoa(config.enableSSSE3)); ini.addValue("Processor", "EnableSSE4_1", itoa(config.enableSSE4_1)); - for(int pass = 0; pass < 10; pass++) + for(size_t pass = 0; pass < config.optimization.size(); pass++) { - ini.addValue("Optimization", "OptimizationPass" + itoa(pass + 1), itoa(config.optimization[pass])); + ini.addValue("Optimization", "OptimizationPass" + itoa(pass + 1), itoa((int)config.optimization[pass])); } ini.addValue("Testing", "DisableServer", itoa(config.disableServer));
diff --git a/src/Main/SwiftConfig.hpp b/src/Main/SwiftConfig.hpp index ad3dcb5..a40648c 100644 --- a/src/Main/SwiftConfig.hpp +++ b/src/Main/SwiftConfig.hpp
@@ -21,6 +21,7 @@ #include "Common/MutexLock.hpp" #include "Common/Socket.hpp" +#include <array> #include <string> namespace sw @@ -48,7 +49,7 @@ bool enableSSE3; bool enableSSSE3; bool enableSSE4_1; - rr::Optimization optimization[10]; + std::array<rr::Optimization::Pass, 10> optimization; bool disableServer; bool keepSystemCursor; bool forceWindowed;
diff --git a/src/Pipeline/SetupRoutine.cpp b/src/Pipeline/SetupRoutine.cpp index 1973a71..6bd887d 100644 --- a/src/Pipeline/SetupRoutine.cpp +++ b/src/Pipeline/SetupRoutine.cpp
@@ -453,7 +453,7 @@ Return(1); } - routine = function(vk::ReactorOptimizationLevel, "SetupRoutine"); + routine = function(vk::ReactorConfig, "SetupRoutine"); } void SetupRoutine::setupGradient(Pointer<Byte> &primitive, Pointer<Byte> &triangle, Float4 &w012, Float4 (&m)[3], Pointer<Byte> &v0, Pointer<Byte> &v1, Pointer<Byte> &v2, int attribute, int planeEquation, bool flat, bool perspective, int component)
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp index e02c32a..b7d8c66 100644 --- a/src/Pipeline/SpirvShaderSampling.cpp +++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -231,7 +231,7 @@ } } - return (ImageSampler*)function(vk::ReactorOptimizationLevel, "sampler")->getEntry(); + return (ImageSampler*)function(vk::ReactorConfig, "sampler")->getEntry(); } sw::TextureType SpirvShader::convertTextureType(VkImageViewType imageViewType)
diff --git a/src/Reactor/Coroutine.hpp b/src/Reactor/Coroutine.hpp index 993e7e0..3eadec2 100644 --- a/src/Reactor/Coroutine.hpp +++ b/src/Reactor/Coroutine.hpp
@@ -133,7 +133,7 @@ // called without building a new rr::Function or rr::Coroutine. // While automatically called by operator(), finalize() should be called // as early as possible to release the global Reactor mutex lock. - inline void finalize(OptimizationLevel optLevel = OptimizationLevel::Default); + inline void finalize(const Config::Edit &cfg = Config::Edit::None); // Starts execution of the coroutine and returns a unique_ptr to a // Stream<> that exposes the await() function for obtaining yielded @@ -164,11 +164,11 @@ } template<typename Return, typename... Arguments> - void Coroutine<Return(Arguments...)>::finalize(OptimizationLevel optLevel /* = OptimizationLevel::Default */) + void Coroutine<Return(Arguments...)>::finalize(const Config::Edit &cfg /* = Config::Edit::None */) { if(core != nullptr) { - routine.reset(core->acquireCoroutine("coroutine", optLevel)); + routine.reset(core->acquireCoroutine("coroutine", cfg)); core.reset(nullptr); } }
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp index c2c7ae7..2e4e5ba 100644 --- a/src/Reactor/LLVMReactor.cpp +++ b/src/Reactor/LLVMReactor.cpp
@@ -105,6 +105,20 @@ namespace { + // Default configuration settings. Must be accessed under mutex lock. + std::mutex defaultConfigLock; + rr::Config &defaultConfig() + { + // This uses a static in a function to avoid the cost of a global static + // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html + static rr::Config config = rr::Config::Edit() + .set(rr::Optimization::Level::Default) + .add(rr::Optimization::Pass::ScalarReplAggregates) + .add(rr::Optimization::Pass::InstructionCombining) + .apply({}); + return config; + } + class LLVMInitializer { protected: @@ -228,7 +242,7 @@ std::unique_ptr<llvm::Module> module, llvm::Function **funcs, size_t count, - rr::OptimizationLevel optLevel) : + const rr::Config &config) : resolver(createLegacyLookupResolver( session, [&](const std::string &name) { @@ -251,7 +265,7 @@ #ifdef ENABLE_RR_DEBUG_INFO .setOptLevel(llvm::CodeGenOpt::None) #else - .setOptLevel(toLLVM(optLevel)) + .setOptLevel(toLLVM(config.getOptimization().getLevel())) #endif // ENABLE_RR_DEBUG_INFO .setMCPU(JITGlobals::get()->mcpu) .setMArch(JITGlobals::get()->march) @@ -318,15 +332,15 @@ } private: - static ::llvm::CodeGenOpt::Level toLLVM(rr::OptimizationLevel level) + static ::llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level) { switch (level) { - case rr::OptimizationLevel::None: return ::llvm::CodeGenOpt::None; - case rr::OptimizationLevel::Less: return ::llvm::CodeGenOpt::Less; - case rr::OptimizationLevel::Default: return ::llvm::CodeGenOpt::Default; - case rr::OptimizationLevel::Aggressive: return ::llvm::CodeGenOpt::Aggressive; - default: UNREACHABLE("Unknown OptimizationLevel %d", int(level)); + case rr::Optimization::Level::None: return ::llvm::CodeGenOpt::None; + case rr::Optimization::Level::Less: return ::llvm::CodeGenOpt::Less; + case rr::Optimization::Level::Default: return ::llvm::CodeGenOpt::Default; + case rr::Optimization::Level::Aggressive: return ::llvm::CodeGenOpt::Aggressive; + default: UNREACHABLE("Unknown Optimization Level %d", int(level)); } return ::llvm::CodeGenOpt::Default; } @@ -343,15 +357,17 @@ class JITBuilder { public: - JITBuilder(): + JITBuilder(const rr::Config &config) : + config(config), module(new llvm::Module("", context)), builder(new llvm::IRBuilder<>(context)) { module->setDataLayout(JITGlobals::get()->dataLayout); } - void optimize() + void optimize(const rr::Config &cfg) { + #ifdef ENABLE_RR_DEBUG_INFO if (debugInfo != nullptr) { @@ -362,36 +378,35 @@ std::unique_ptr<llvm::legacy::PassManager> passManager( new llvm::legacy::PassManager()); - passManager->add(llvm::createSROAPass()); - - for(int pass = 0; pass < 10 && rr::optimization[pass] != rr::Disabled; pass++) + for(auto pass : cfg.getOptimization().getPasses()) { - switch(rr::optimization[pass]) + switch(pass) { - case rr::Disabled: break; - case rr::CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break; - case rr::LICM: passManager->add(llvm::createLICMPass()); break; - case rr::AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break; - case rr::GVN: passManager->add(llvm::createGVNPass()); break; - case rr::InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break; - case rr::Reassociate: passManager->add(llvm::createReassociatePass()); break; - case rr::DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break; - case rr::SCCP: passManager->add(llvm::createSCCPPass()); break; - case rr::ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break; + case rr::Optimization::Pass::Disabled: break; + case rr::Optimization::Pass::CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break; + case rr::Optimization::Pass::LICM: passManager->add(llvm::createLICMPass()); break; + case rr::Optimization::Pass::AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break; + case rr::Optimization::Pass::GVN: passManager->add(llvm::createGVNPass()); break; + case rr::Optimization::Pass::InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break; + case rr::Optimization::Pass::Reassociate: passManager->add(llvm::createReassociatePass()); break; + case rr::Optimization::Pass::DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break; + case rr::Optimization::Pass::SCCP: passManager->add(llvm::createSCCPPass()); break; + case rr::Optimization::Pass::ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break; default: - UNREACHABLE("optimization[pass]: %d, pass: %d", int(rr::optimization[pass]), int(pass)); + UNREACHABLE("pass: %d", int(pass)); } } passManager->run(*module); } - rr::Routine *acquireRoutine(llvm::Function **funcs, size_t count, rr::OptimizationLevel optLevel) + rr::Routine *acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg) { ASSERT(module); - return new JITRoutine(std::move(module), funcs, count, optLevel); + return new JITRoutine(std::move(module), funcs, count, cfg); } + const rr::Config config; llvm::LLVMContext context; std::unique_ptr<llvm::Module> module; std::unique_ptr<llvm::IRBuilder<>> builder; @@ -1002,8 +1017,6 @@ return it->second; } - Optimization optimization[10] = {InstructionCombining, Disabled}; - // The abstract Type* types are implemented as LLVM types, except that // 64-bit vectors are emulated using 128-bit ones to avoid use of MMX in x86 // and VFP in ARM, and eliminate the overhead of converting them to explicit @@ -1134,7 +1147,7 @@ ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe ASSERT(jit == nullptr); - jit.reset(new JITBuilder()); + jit.reset(new JITBuilder(Nucleus::getDefaultConfig())); } Nucleus::~Nucleus() @@ -1143,8 +1156,29 @@ ::codegenMutex.unlock(); } - Routine *Nucleus::acquireRoutine(const char *name, OptimizationLevel optimizationLevel) + void Nucleus::setDefaultConfig(const Config &cfg) { + std::unique_lock<std::mutex> lock(::defaultConfigLock); + ::defaultConfig() = cfg; + } + + void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit) + { + std::unique_lock<std::mutex> lock(::defaultConfigLock); + auto &config = ::defaultConfig(); + config = cfgEdit.apply(config); + } + + Config Nucleus::getDefaultConfig() + { + std::unique_lock<std::mutex> lock(::defaultConfigLock); + return ::defaultConfig(); + } + + Routine *Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */) + { + auto cfg = cfgEdit.apply(jit->config); + if(jit->builder->GetInsertBlock()->empty() || !jit->builder->GetInsertBlock()->back().isTerminator()) { llvm::Type *type = jit->function->getReturnType(); @@ -1181,7 +1215,7 @@ } #endif // defined(ENABLE_RR_LLVM_IR_VERIFICATION) || !defined(NDEBUG) - optimize(); + jit->optimize(cfg); if(false) { @@ -1190,17 +1224,12 @@ jit->module->print(file, 0); } - auto routine = jit->acquireRoutine(&jit->function, 1, optimizationLevel); + auto routine = jit->acquireRoutine(&jit->function, 1, cfg); jit.reset(); return routine; } - void Nucleus::optimize() - { - jit->optimize(); - } - Value *Nucleus::allocateStackVariable(Type *type, int arraySize) { // Need to allocate it in the entry block for mem2reg to work @@ -4678,7 +4707,7 @@ jit->builder->SetInsertPoint(resumeBlock); } -Routine* Nucleus::acquireCoroutine(const char *name, OptimizationLevel optimizationLevel) +Routine* Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */) { ASSERT_MSG(jit->coroutine.id != nullptr, "acquireCoroutine() called without a call to createCoroutine()"); @@ -4707,7 +4736,8 @@ pm.add(llvm::createCoroCleanupPass()); pm.run(*jit->module); - optimize(); + auto cfg = cfgEdit.apply(jit->config); + jit->optimize(cfg); if(false) { @@ -4720,7 +4750,7 @@ funcs[Nucleus::CoroutineEntryBegin] = jit->function; funcs[Nucleus::CoroutineEntryAwait] = jit->coroutine.await; funcs[Nucleus::CoroutineEntryDestroy] = jit->coroutine.destroy; - auto routine = jit->acquireRoutine(funcs, Nucleus::CoroutineEntryCount, optimizationLevel); + auto routine = jit->acquireRoutine(funcs, Nucleus::CoroutineEntryCount, cfg); jit.reset(); return routine;
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp index 819d100..0bc953b 100644 --- a/src/Reactor/Nucleus.hpp +++ b/src/Reactor/Nucleus.hpp
@@ -33,30 +33,85 @@ class BasicBlock; class Routine; - enum Optimization + // Optimization holds the optimization settings for code generation. + class Optimization { - Disabled = 0, - InstructionCombining = 1, - CFGSimplification = 2, - LICM = 3, - AggressiveDCE = 4, - GVN = 5, - Reassociate = 6, - DeadStoreElimination = 7, - SCCP = 8, - ScalarReplAggregates = 9, + public: + enum class Level + { + None, + Less, + Default, + Aggressive, + }; - OptimizationCount + enum class Pass + { + Disabled, + InstructionCombining, + CFGSimplification, + LICM, + AggressiveDCE, + GVN, + Reassociate, + DeadStoreElimination, + SCCP, + ScalarReplAggregates, + + Count, + }; + + using Passes = std::vector<Pass>; + + Optimization() = default; + Optimization(Level level, const Passes & passes) : level(level), passes(passes) {} + + Level getLevel() const { return level; } + const Passes & getPasses() const { return passes; } + + private: + Level level = Level::Default; + Passes passes; }; - extern Optimization optimization[10]; - - enum class OptimizationLevel + // Config holds the Reactor configuration settings. + class Config { - None, - Less, - Default, - Aggressive, + public: + // Edit holds a number of modifications to a config, that can be applied + // on an existing Config to produce a new Config with the specified + // changes. + class Edit + { + public: + static const Edit None; + + Edit & set(Optimization::Level level) { optLevel = level; optLevelChanged = true; return *this; } + Edit & add(Optimization::Pass pass) { optPassEdits.push_back({ListEdit::Add, pass}); return *this; } + Edit & remove(Optimization::Pass pass) { optPassEdits.push_back({ListEdit::Remove, pass}); return *this; } + Edit & clearOptimizationPasses() { optPassEdits.push_back({ListEdit::Clear, Optimization::Pass::Disabled}); return *this; } + + Config apply(const Config &cfg) const; + + private: + enum class ListEdit { Add, Remove, Clear }; + using OptPassesEdit = std::pair<ListEdit, Optimization::Pass>; + + template <typename T> + void apply(const std::vector<std::pair<ListEdit, T>> & edits, std::vector<T>& list) const; + + Optimization::Level optLevel; + bool optLevelChanged = false; + std::vector<OptPassesEdit> optPassEdits; + }; + + Config() = default; + Config(const Optimization & optimization) : optimization(optimization) {} + + const Optimization & getOptimization() const { return optimization; } + + private: + Optimization optimization; }; class Nucleus @@ -66,7 +121,13 @@ virtual ~Nucleus(); - Routine *acquireRoutine(const char *name, OptimizationLevel optimizationLevel); + // Default configuration to use when no other configuration is specified. + // The new configuration will be applied to subsequent reactor calls. + static void setDefaultConfig(const Config &cfg); + static void adjustDefaultConfig(const Config::Edit &cfgEdit); + static Config getDefaultConfig(); + + Routine *acquireRoutine(const char *name, const Config::Edit &cfgEdit = Config::Edit::None); static Value *allocateStackVariable(Type *type, int arraySize = 0); static BasicBlock *createBasicBlock(); @@ -93,7 +154,7 @@ }; static void createCoroutine(Type *ReturnType, std::vector<Type*> &Params); - Routine *acquireCoroutine(const char *name, OptimizationLevel optimizationLevel); + Routine *acquireCoroutine(const char *name, const Config::Edit &cfg = Config::Edit::None); static void yield(Value*); // Terminators @@ -219,9 +280,6 @@ static Value *createConstantVector(const double *constants, Type *type); static Type *getPointerType(Type *elementType); - - private: - void optimize(); }; }
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp index bb94cf5..60ee656 100644 --- a/src/Reactor/Reactor.cpp +++ b/src/Reactor/Reactor.cpp
@@ -21,8 +21,59 @@ #define REACTOR_MATERIALIZE_LVALUES_ON_DEFINITION 0 #endif +namespace +{ + // Introduced in C++20. + template <class ForwardIterator, class UnaryPredicate> + ForwardIterator remove_if(ForwardIterator first, ForwardIterator last, + UnaryPredicate pred) + { + ForwardIterator result = first; + while (first!=last) { + if (!pred(*first)) { + *result = std::move(*first); + ++result; + } + ++first; + } + return result; + } +} + namespace rr { + const Config::Edit Config::Edit::None = {}; + + Config Config::Edit::apply(const Config &cfg) const + { + if (this == &None) { return cfg; } + + auto level = optLevelChanged ? optLevel : cfg.optimization.getLevel(); + auto passes = cfg.optimization.getPasses(); + apply(optPassEdits, passes); + return Config{ Optimization{level, passes} }; + } + + template <typename T> + void rr::Config::Edit::apply(const std::vector<std::pair<ListEdit, T>> & edits, std::vector<T>& list) const + { + for (auto & edit : edits) + { + switch (edit.first) + { + case ListEdit::Add: + list.push_back(edit.second); + break; + case ListEdit::Remove: + ::remove_if(list.begin(), list.end(), [&](T item) { return item == edit.second; }); + break; + case ListEdit::Clear: + list.clear(); + break; + } + } + } + // Set of variables that do not have a stack location yet. std::unordered_set<Variable*> Variable::unmaterializedVariables;
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp index 1391275..4c82780 100644 --- a/src/Reactor/Reactor.hpp +++ b/src/Reactor/Reactor.hpp
@@ -2465,7 +2465,7 @@ } Routine *operator()(const char *name, ...); - Routine *operator()(OptimizationLevel optLevel, const char *name, ...); + Routine *operator()(const Config::Edit &cfg, const char *name, ...); protected: Nucleus *core; @@ -3040,11 +3040,11 @@ vsnprintf(fullName, 1024, name, vararg); va_end(vararg); - return core->acquireRoutine(fullName, OptimizationLevel::Default); + return core->acquireRoutine(fullName, Config::Edit::None); } template<typename Return, typename... Arguments> - Routine *Function<Return(Arguments...)>::operator()(OptimizationLevel optLevel, const char *name, ...) + Routine *Function<Return(Arguments...)>::operator()(const Config::Edit &cfg, const char *name, ...) { char fullName[1024 + 1]; @@ -3053,7 +3053,7 @@ vsnprintf(fullName, 1024, name, vararg); va_end(vararg); - return core->acquireRoutine(fullName, optLevel); + return core->acquireRoutine(fullName, cfg); } template<class T, class S>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp index bcc2f7e..178c075 100644 --- a/src/Reactor/SubzeroReactor.cpp +++ b/src/Reactor/SubzeroReactor.cpp
@@ -55,6 +55,18 @@ namespace { + // Default configuration settings. Must be accessed under mutex lock. + std::mutex defaultConfigLock; + rr::Config &defaultConfig() + { + // This uses a static in a function to avoid the cost of a global static + // initializer. See http://neugierig.org/software/chromium/notes/2011/08/static-initializers.html + static rr::Config config = rr::Config::Edit() + .set(rr::Optimization::Level::Default) + .apply({}); + return config; + } + Ice::GlobalContext *context = nullptr; Ice::Cfg *function = nullptr; Ice::CfgNode *basicBlock = nullptr; @@ -77,6 +89,19 @@ #define __x86_64__ 1 #endif + static Ice::OptLevel toIce(rr::Optimization::Level level) + { + switch (level) + { + case rr::Optimization::Level::None: return Ice::Opt_0; + case rr::Optimization::Level::Less: return Ice::Opt_1; + case rr::Optimization::Level::Default: return Ice::Opt_2; + case rr::Optimization::Level::Aggressive: return Ice::Opt_2; + default: UNREACHABLE("Unknown Optimization Level %d", int(level)); + } + return Ice::Opt_2; + } + class CPUID { public: @@ -204,8 +229,6 @@ return Ice::typeWidthInBytes(T(type)); } - Optimization optimization[10] = {InstructionCombining, Disabled}; - using ElfHeader = std::conditional<sizeof(void*) == 8, Elf64_Ehdr, Elf32_Ehdr>::type; using SectionHeader = std::conditional<sizeof(void*) == 8, Elf64_Shdr, Elf32_Shdr>::type; @@ -548,7 +571,7 @@ Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2); #endif Flags.setOutFileType(Ice::FT_Elf); - Flags.setOptLevel(Ice::Opt_2); + Flags.setOptLevel(toIce(getDefaultConfig().getOptimization().getLevel())); Flags.setApplicationBinaryInterface(Ice::ABI_Platform); Flags.setVerbose(false ? Ice::IceV_Most : Ice::IceV_None); Flags.setDisableHybridAssembly(true); @@ -585,7 +608,26 @@ ::codegenMutex.unlock(); } - Routine *Nucleus::acquireRoutine(const char *name, OptimizationLevel optimizationLevel) + void Nucleus::setDefaultConfig(const Config &cfg) + { + std::unique_lock<std::mutex> lock(::defaultConfigLock); + ::defaultConfig() = cfg; + } + + void Nucleus::adjustDefaultConfig(const Config::Edit &cfgEdit) + { + std::unique_lock<std::mutex> lock(::defaultConfigLock); + auto &config = ::defaultConfig(); + config = cfgEdit.apply(config); + } + + Config Nucleus::getDefaultConfig() + { + std::unique_lock<std::mutex> lock(::defaultConfigLock); + return ::defaultConfig(); + } + + Routine *Nucleus::acquireRoutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */) { if(basicBlock->getInsts().empty() || basicBlock->getInsts().back().getKind() != Ice::Inst::Ret) { @@ -594,7 +636,7 @@ ::function->setFunctionName(Ice::GlobalString::createWithString(::context, name)); - optimize(); + rr::optimize(::function); ::function->translate(); ASSERT(!::function->hasError()); @@ -624,11 +666,6 @@ return handoffRoutine; } - void Nucleus::optimize() - { - rr::optimize(::function); - } - Value *Nucleus::allocateStackVariable(Type *t, int arraySize) { Ice::Type type = T(t); @@ -3506,7 +3543,7 @@ void FlushDebug() {} void Nucleus::createCoroutine(Type *YieldType, std::vector<Type*> &Params) { UNIMPLEMENTED("createCoroutine"); } - Routine* Nucleus::acquireCoroutine(const char *name, OptimizationLevel optimizationLevel) { UNIMPLEMENTED("acquireCoroutine"); return nullptr; } + Routine* Nucleus::acquireCoroutine(const char *name, const Config::Edit &cfgEdit /* = Config::Edit::None */) { UNIMPLEMENTED("acquireCoroutine"); return nullptr; } void Nucleus::yield(Value* val) { UNIMPLEMENTED("Yield"); } }
diff --git a/src/Renderer/Renderer.cpp b/src/Renderer/Renderer.cpp index 87b8dd1..fe9f52e 100644 --- a/src/Renderer/Renderer.cpp +++ b/src/Renderer/Renderer.cpp
@@ -2855,10 +2855,13 @@ CPUID::setEnableSSE2(configuration.enableSSE2); CPUID::setEnableSSE(configuration.enableSSE); - for(int pass = 0; pass < 10; pass++) + rr::Config::Edit cfg; + cfg.clearOptimizationPasses(); + for(auto pass : configuration.optimization) { - optimization[pass] = configuration.optimization[pass]; + if (pass != rr::Optimization::Pass::Disabled) { cfg.add(pass); } } + rr::Nucleus::adjustDefaultConfig(cfg); forceWindowed = configuration.forceWindowed; complementaryDepthBuffer = configuration.complementaryDepthBuffer;
diff --git a/src/Vulkan/VkConfig.h b/src/Vulkan/VkConfig.h index adaa353..f6465b0 100644 --- a/src/Vulkan/VkConfig.h +++ b/src/Vulkan/VkConfig.h
@@ -17,7 +17,7 @@ #include "Version.h" -#include "Reactor/Nucleus.hpp" // ReactorOptimizationLevel +#include "Reactor/Nucleus.hpp" // ReactorConfig #include <Vulkan/VulkanPlatform.h> @@ -79,8 +79,8 @@ MAX_POINT_SIZE = 1, // Large points are not supported. If/when we turn this on, must be >= 64. }; -// Optimization level to use for JIT functions. -static constexpr auto ReactorOptimizationLevel = rr::OptimizationLevel::Default; +// Configuration to use for JIT functions. +static const auto ReactorConfig = rr::Config::Edit().set(rr::Optimization::Level::Default); }
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp index ba1b8db..a9f1cec 100644 --- a/src/Vulkan/VkPipeline.cpp +++ b/src/Vulkan/VkPipeline.cpp
@@ -262,7 +262,7 @@ // TODO(b/119409619): use allocator. auto program = std::make_shared<sw::ComputeProgram>(key.getShader(), key.getLayout(), descriptorSets); program->generate(); - program->finalize(vk::ReactorOptimizationLevel); + program->finalize(vk::ReactorConfig); return program; }