Optimizer optimization
Optimizer::optimizeStoresInSingleBasicBlock() was taking an extremely long time due to
looping through all the instructions of a single basic block at every iteration. By
creating a much smaller list of only load/store operations the first time a single basic
block is encountered and reusing that list when the same block is encountered again,
this function now runs ~10X faster.
In my test case:
out/Default/cc_unittests --gtest_filter=ImageBackgroundFilter.BackgroundFilterRotated_GL
The sw::optimize function went from taking almost 16s to roughly 1.5s.
Bug b/67872293
Change-Id: I7e2e2aa8dc1bf2663988fb59b58d72d9ee986e36
Reviewed-on: https://swiftshader-review.googlesource.com/19408
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Reactor/Optimizer.cpp b/src/Reactor/Optimizer.cpp
index 2d4ac82..83ea5f2 100644
--- a/src/Reactor/Optimizer.cpp
+++ b/src/Reactor/Optimizer.cpp
@@ -266,6 +266,22 @@
{
Ice::CfgNode *entryBlock = function->getEntryNode();
+ struct LoadStoreInst
+ {
+ LoadStoreInst(Ice::Inst* inst, bool isStore)
+ : inst(inst),
+ address(isStore ? storeAddress(inst) : loadAddress(inst)),
+ isStore(isStore)
+ {
+ }
+
+ Ice::Inst* inst;
+ Ice::Operand *address;
+ bool isStore;
+ };
+
+ std::unordered_map<Ice::CfgNode*, std::vector<LoadStoreInst> > loadStoreMap;
+
for(Ice::Inst &alloca : entryBlock->getInsts())
{
if(alloca.isDeleted())
@@ -301,56 +317,65 @@
if(singleBasicBlock)
{
- auto &insts = singleBasicBlock->getInsts();
- Ice::Inst *store = nullptr;
- Ice::Operand *storeValue = nullptr;
- bool unmatchedLoads = false;
-
- for(Ice::Inst &inst : insts)
+ if(loadStoreMap.find(singleBasicBlock) == loadStoreMap.end())
{
- if(inst.isDeleted())
+ std::vector<LoadStoreInst> &loadStoreVector = loadStoreMap[singleBasicBlock];
+ for(Ice::Inst &inst : singleBasicBlock->getInsts())
{
- continue;
- }
-
- if(isStore(inst))
- {
- if(storeAddress(&inst) != address)
+ if(inst.isDeleted())
{
continue;
}
+ bool isStoreInst = isStore(inst);
+ bool isLoadInst = isLoad(inst);
+
+ if(isStoreInst || isLoadInst)
+ {
+ loadStoreVector.push_back(LoadStoreInst(&inst, isStoreInst));
+ }
+ }
+ }
+
+ Ice::Inst *store = nullptr;
+ Ice::Operand *storeValue = nullptr;
+ bool unmatchedLoads = false;
+
+ for (auto& loadStoreInst : loadStoreMap[singleBasicBlock])
+ {
+ Ice::Inst* inst = loadStoreInst.inst;
+
+ if((loadStoreInst.address != address) || inst->isDeleted())
+ {
+ continue;
+ }
+
+ if(loadStoreInst.isStore)
+ {
// New store found. If we had a previous one, try to eliminate it.
if(store && !unmatchedLoads)
{
// If the previous store is wider than the new one, we can't eliminate it
// because there could be a wide load reading its non-overwritten data.
- if(storeSize(&inst) >= storeSize(store))
+ if(storeSize(inst) >= storeSize(store))
{
deleteInstruction(store);
}
}
- store = &inst;
+ store = inst;
storeValue = storeData(store);
unmatchedLoads = false;
}
- else if(isLoad(inst))
+ else
{
- Ice::Inst *load = &inst;
-
- if(loadAddress(load) != address)
- {
- continue;
- }
-
- if(!loadTypeMatchesStore(load, store))
+ if(!loadTypeMatchesStore(inst, store))
{
unmatchedLoads = true;
continue;
}
- replace(load, storeValue);
+ replace(inst, storeValue);
}
}
}