Std:unordered_map removed from Optimizer for improved performance
The use of std::unordered_map was the main source of slowdowns in
the optimizer code, so it was re-written without any maps. In order
to do so, the information originally carried by the maps was moved
to user-defined information stored within Subzero classes. The
optimizer now manages the memory used to store this information.
Bug swiftshader:69
Bug b/67872293
Change-Id: I2757169f0d3d467766317af6e00e149b4317fb9c
Reviewed-on: https://swiftshader-review.googlesource.com/19508
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Reactor/Optimizer.cpp b/src/Reactor/Optimizer.cpp
index 83ea5f2..15c5f84 100644
--- a/src/Reactor/Optimizer.cpp
+++ b/src/Reactor/Optimizer.cpp
@@ -17,7 +17,6 @@
#include "src/IceCfg.h"
#include "src/IceCfgNode.h"
-#include <unordered_map>
#include <vector>
namespace
@@ -61,9 +60,35 @@
std::vector<Ice::Inst*> stores;
};
- std::unordered_map<Ice::Operand*, Uses> uses;
- std::unordered_map<Ice::Inst*, Ice::CfgNode*> node;
- std::unordered_map<Ice::Variable*, Ice::Inst*> definition;
+ struct LoadStoreInst
+ {
+ LoadStoreInst(Ice::Inst* inst, bool isStore)
+ : inst(inst),
+ address(isStore ? storeAddress(inst) : loadAddress(inst)),
+ isStore(isStore)
+ {
+ }
+
+ Ice::Inst* inst;
+ Ice::Operand *address;
+ bool isStore;
+ };
+
+ Optimizer::Uses* getUses(Ice::Operand*);
+ void setUses(Ice::Operand*, Optimizer::Uses*);
+ bool hasUses(Ice::Operand*) const;
+
+ Ice::CfgNode* getNode(Ice::Inst*);
+ void setNode(Ice::Inst*, Ice::CfgNode*);
+
+ Ice::Inst* getDefinition(Ice::Variable*);
+ void setDefinition(Ice::Variable*, Ice::Inst*);
+
+ const std::vector<LoadStoreInst>& getLoadStoreInsts(Ice::CfgNode*);
+ void setLoadStoreInsts(Ice::CfgNode*, std::vector<LoadStoreInst>*);
+ bool hasLoadStoreInsts(Ice::CfgNode* node) const;
+
+ std::vector<Optimizer::Uses*> allocatedUses;
};
void Optimizer::run(Ice::Cfg *function)
@@ -78,6 +103,12 @@
eliminateLoadsFollowingSingleStore();
optimizeStoresInSingleBasicBlock();
eliminateDeadCode();
+
+ for(auto uses : allocatedUses)
+ {
+ delete uses;
+ }
+ allocatedUses.clear();
}
void Optimizer::eliminateDeadCode()
@@ -123,8 +154,13 @@
}
Ice::Operand *address = alloca.getDest();
- const auto &addressEntry = uses.find(address);
- const auto &addressUses = addressEntry->second;
+
+ if(!hasUses(address))
+ {
+ return;
+ }
+
+ const auto &addressUses = *getUses(address);
if(!addressUses.areOnlyLoadStore())
{
@@ -137,26 +173,29 @@
{
Ice::Variable *loadData = load->getDest();
- for(Ice::Inst *use : uses[loadData])
+ if(hasUses(loadData))
{
- for(Ice::SizeT i = 0; i < use->getSrcSize(); i++)
+ for(Ice::Inst *use : *getUses(loadData))
{
- if(use->getSrc(i) == loadData)
+ for(Ice::SizeT i = 0; i < use->getSrcSize(); i++)
{
- auto *undef = context->getConstantUndef(loadData->getType());
+ if(use->getSrc(i) == loadData)
+ {
+ auto *undef = context->getConstantUndef(loadData->getType());
- use->replaceSource(i, undef);
+ use->replaceSource(i, undef);
+ }
}
}
- }
- uses.erase(loadData);
+ setUses(loadData, nullptr);
+ }
load->setDeleted();
}
alloca.setDeleted();
- uses.erase(addressEntry);
+ setUses(address, nullptr);
}
}
}
@@ -178,8 +217,13 @@
}
Ice::Operand *address = alloca.getDest();
- const auto &addressEntry = uses.find(address);
- auto &addressUses = addressEntry->second;
+
+ if(!hasUses(address))
+ {
+ return;
+ }
+
+ auto &addressUses = *getUses(address);
if(!addressUses.areOnlyLoadStore())
{
@@ -236,23 +280,26 @@
alloca.setDeleted();
store->setDeleted();
- uses.erase(address);
+ setUses(address, nullptr);
- auto &valueUses = uses[storeValue];
-
- for(size_t i = 0; i < valueUses.size(); i++)
+ if(hasUses(storeValue))
{
- if(valueUses[i] == store)
+ auto &valueUses = *getUses(storeValue);
+
+ for(size_t i = 0; i < valueUses.size(); i++)
{
- valueUses[i] = valueUses.back();
- valueUses.pop_back();
- break;
+ if(valueUses[i] == store)
+ {
+ valueUses[i] = valueUses.back();
+ valueUses.pop_back();
+ break;
+ }
}
- }
- if(valueUses.empty())
- {
- uses.erase(storeValue);
+ if(valueUses.empty())
+ {
+ setUses(storeValue, nullptr);
+ }
}
break;
@@ -266,21 +313,7 @@
{
Ice::CfgNode *entryBlock = function->getEntryNode();
- struct LoadStoreInst
- {
- LoadStoreInst(Ice::Inst* inst, bool isStore)
- : inst(inst),
- address(isStore ? storeAddress(inst) : loadAddress(inst)),
- isStore(isStore)
- {
- }
-
- Ice::Inst* inst;
- Ice::Operand *address;
- bool isStore;
- };
-
- std::unordered_map<Ice::CfgNode*, std::vector<LoadStoreInst> > loadStoreMap;
+ std::vector<std::vector<LoadStoreInst>* > allocatedVectors;
for(Ice::Inst &alloca : entryBlock->getInsts())
{
@@ -295,20 +328,25 @@
}
Ice::Operand *address = alloca.getDest();
- const auto &addressEntry = uses.find(address);
- const auto &addressUses = addressEntry->second;
+
+ if(!hasUses(address))
+ {
+ return;
+ }
+
+ const auto &addressUses = *getUses(address);
if(!addressUses.areOnlyLoadStore())
{
continue;
}
- Ice::CfgNode *singleBasicBlock = node[addressUses.stores[0]];
+ Ice::CfgNode *singleBasicBlock = getNode(addressUses.stores[0]);
for(size_t i = 1; i < addressUses.stores.size(); i++)
{
Ice::Inst *store = addressUses.stores[i];
- if(node[store] != singleBasicBlock)
+ if(getNode(store) != singleBasicBlock)
{
singleBasicBlock = nullptr;
break;
@@ -317,9 +355,11 @@
if(singleBasicBlock)
{
- if(loadStoreMap.find(singleBasicBlock) == loadStoreMap.end())
+ if(!hasLoadStoreInsts(singleBasicBlock))
{
- std::vector<LoadStoreInst> &loadStoreVector = loadStoreMap[singleBasicBlock];
+ std::vector<LoadStoreInst>* loadStoreInstVector = new std::vector<LoadStoreInst>();
+ setLoadStoreInsts(singleBasicBlock, loadStoreInstVector);
+ allocatedVectors.push_back(loadStoreInstVector);
for(Ice::Inst &inst : singleBasicBlock->getInsts())
{
if(inst.isDeleted())
@@ -332,7 +372,7 @@
if(isStoreInst || isLoadInst)
{
- loadStoreVector.push_back(LoadStoreInst(&inst, isStoreInst));
+ loadStoreInstVector->push_back(LoadStoreInst(&inst, isStoreInst));
}
}
}
@@ -341,7 +381,7 @@
Ice::Operand *storeValue = nullptr;
bool unmatchedLoads = false;
- for (auto& loadStoreInst : loadStoreMap[singleBasicBlock])
+ for (auto& loadStoreInst : getLoadStoreInsts(singleBasicBlock))
{
Ice::Inst* inst = loadStoreInst.inst;
@@ -380,14 +420,15 @@
}
}
}
+
+ for(auto loadStoreInstVector : allocatedVectors)
+ {
+ delete loadStoreInstVector;
+ }
}
void Optimizer::analyzeUses(Ice::Cfg *function)
{
- uses.clear();
- node.clear();
- definition.clear();
-
for(Ice::CfgNode *basicBlock : function->getNodes())
{
for(Ice::Inst &instruction : basicBlock->getInsts())
@@ -397,8 +438,11 @@
continue;
}
- node[&instruction] = basicBlock;
- definition[instruction.getDest()] = &instruction;
+ setNode(&instruction, basicBlock);
+ if(instruction.getDest())
+ {
+ setDefinition(instruction.getDest(), &instruction);
+ }
for(Ice::SizeT i = 0; i < instruction.getSrcSize(); i++)
{
@@ -414,7 +458,7 @@
if(i == unique)
{
Ice::Operand *src = instruction.getSrc(i);
- uses[src].insert(src, &instruction);
+ getUses(src)->insert(src, &instruction);
}
}
}
@@ -430,23 +474,26 @@
newValue = context->getConstantUndef(oldValue->getType());
}
- for(Ice::Inst *use : uses[oldValue])
+ if(hasUses(oldValue))
{
- assert(!use->isDeleted()); // Should have been removed from uses already
-
- for(Ice::SizeT i = 0; i < use->getSrcSize(); i++)
+ for(Ice::Inst *use : *getUses(oldValue))
{
- if(use->getSrc(i) == oldValue)
+ assert(!use->isDeleted()); // Should have been removed from uses already
+
+ for(Ice::SizeT i = 0; i < use->getSrcSize(); i++)
{
- use->replaceSource(i, newValue);
+ if(use->getSrc(i) == oldValue)
+ {
+ use->replaceSource(i, newValue);
+ }
}
+
+ getUses(newValue)->insert(newValue, use);
}
- uses[newValue].insert(newValue, use);
+ setUses(oldValue, nullptr);
}
- uses.erase(oldValue);
-
deleteInstruction(instruction);
}
@@ -463,21 +510,19 @@
{
Ice::Operand *src = instruction->getSrc(i);
- const auto &srcEntry = uses.find(src);
-
- if(srcEntry != uses.end())
+ if(hasUses(src))
{
- auto &srcUses = srcEntry->second;
+ auto &srcUses = *getUses(src);
srcUses.erase(instruction);
if(srcUses.empty())
{
- uses.erase(srcEntry);
+ setUses(src, nullptr);
if(Ice::Variable *var = llvm::dyn_cast<Ice::Variable>(src))
{
- deleteInstruction(definition[var]);
+ deleteInstruction(getDefinition(var));
}
}
}
@@ -490,17 +535,25 @@
if(dest)
{
- return uses[dest].empty() && !instruction->hasSideEffects();
+ return (!hasUses(dest) || getUses(dest)->empty()) && !instruction->hasSideEffects();
}
else if(isStore(*instruction))
{
if(Ice::Variable *address = llvm::dyn_cast<Ice::Variable>(storeAddress(instruction)))
{
- Ice::Inst *def = definition[address];
+ Ice::Inst *def = getDefinition(address);
if(def && llvm::isa<Ice::InstAlloca>(def))
{
- return uses[address].size() == uses[address].stores.size(); // Dead if all uses are stores
+ if(hasUses(address))
+ {
+ Optimizer::Uses* uses = getUses(address);
+ return uses->size() == uses->stores.size(); // Dead if all uses are stores
+ }
+ else
+ {
+ return true; // No uses
+ }
}
}
}
@@ -654,6 +707,63 @@
return false;
}
+ Optimizer::Uses* Optimizer::getUses(Ice::Operand* operand)
+ {
+ Optimizer::Uses* uses = (Optimizer::Uses*)operand->Ice::Operand::getExternalData();
+ if(!uses)
+ {
+ uses = new Optimizer::Uses;
+ setUses(operand, uses);
+ allocatedUses.push_back(uses);
+ }
+ return uses;
+ }
+
+ void Optimizer::setUses(Ice::Operand* operand, Optimizer::Uses* uses)
+ {
+ operand->Ice::Operand::setExternalData(uses);
+ }
+
+ bool Optimizer::hasUses(Ice::Operand* operand) const
+ {
+ return operand->Ice::Operand::getExternalData() != nullptr;
+ }
+
+ Ice::CfgNode* Optimizer::getNode(Ice::Inst* inst)
+ {
+ return (Ice::CfgNode*)inst->Ice::Inst::getExternalData();
+ }
+
+ void Optimizer::setNode(Ice::Inst* inst, Ice::CfgNode* node)
+ {
+ inst->Ice::Inst::setExternalData(node);
+ }
+
+ Ice::Inst* Optimizer::getDefinition(Ice::Variable* var)
+ {
+ return (Ice::Inst*)var->Ice::Variable::getExternalData();
+ }
+
+ void Optimizer::setDefinition(Ice::Variable* var, Ice::Inst* inst)
+ {
+ var->Ice::Variable::setExternalData(inst);
+ }
+
+ const std::vector<Optimizer::LoadStoreInst>& Optimizer::getLoadStoreInsts(Ice::CfgNode* node)
+ {
+ return *((const std::vector<LoadStoreInst>*)node->Ice::CfgNode::getExternalData());
+ }
+
+ void Optimizer::setLoadStoreInsts(Ice::CfgNode* node, std::vector<LoadStoreInst>* insts)
+ {
+ node->Ice::CfgNode::setExternalData(insts);
+ }
+
+ bool Optimizer::hasLoadStoreInsts(Ice::CfgNode* node) const
+ {
+ return node->Ice::CfgNode::getExternalData() != nullptr;
+ }
+
bool Optimizer::Uses::areOnlyLoadStore() const
{
return size() == (loads.size() + stores.size());
diff --git a/third_party/subzero/src/IceCfgNode.h b/third_party/subzero/src/IceCfgNode.h
index cf20e13..b2d2de8 100644
--- a/third_party/subzero/src/IceCfgNode.h
+++ b/third_party/subzero/src/IceCfgNode.h
@@ -127,6 +127,9 @@
}
CfgNode *shortCircuit();
+ inline void* getExternalData() const { return externalData; }
+ inline void setExternalData(void* data) { externalData = data; }
+
private:
CfgNode(Cfg *Func, SizeT Number)
: Func(Func), Number(Number), NumberOrig(Number),
@@ -145,6 +148,11 @@
NodeList OutEdges; /// in no particular order
PhiList Phis; /// unordered set of phi instructions
InstList Insts; /// ordered list of non-phi instructions
+
+ /// External data can be set by an optimizer to compute and retain any
+ /// information related to the current node. All the memory used to
+ /// store this information must be managed by the optimizer.
+ void* externalData = nullptr;
};
} // end of namespace Ice
diff --git a/third_party/subzero/src/IceInst.h b/third_party/subzero/src/IceInst.h
index 187c16d..8ffb1d2 100644
--- a/third_party/subzero/src/IceInst.h
+++ b/third_party/subzero/src/IceInst.h
@@ -199,6 +199,9 @@
llvm::report_fatal_error("Inst unexpectedly deleted");
}
+ inline void* getExternalData() const { return externalData; }
+ inline void setExternalData(void* data) { externalData = data; }
+
protected:
Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest);
void addSource(Operand *Src) {
@@ -236,6 +239,10 @@
/// live range recorded in a basic block has at most one start and at most one
/// end.
bool IsDestRedefined = false;
+ /// External data can be set by an optimizer to compute and retain any
+ /// information related to the current instruction. All the memory used to
+ /// store this information must be managed by the optimizer.
+ void* externalData = nullptr;
Variable *Dest;
const SizeT MaxSrcs; // only used for assert
diff --git a/third_party/subzero/src/IceOperand.h b/third_party/subzero/src/IceOperand.h
index 7e55ac0..5501f47 100644
--- a/third_party/subzero/src/IceOperand.h
+++ b/third_party/subzero/src/IceOperand.h
@@ -106,6 +106,9 @@
return 0;
}
+ inline void* getExternalData() const { return externalData; }
+ inline void setExternalData(void* data) { externalData = data; }
+
protected:
Operand(OperandKind Kind, Type Ty) : Ty(Ty), Kind(Kind) {
// It is undefined behavior to have a larger value in the enum
@@ -117,6 +120,11 @@
/// Vars and NumVars are initialized by the derived class.
SizeT NumVars = 0;
Variable **Vars = nullptr;
+
+ /// External data can be set by an optimizer to compute and retain any
+ /// information related to the current operand. All the memory used to
+ /// store this information must be managed by the optimizer.
+ void* externalData = nullptr;
};
template <class StreamType>
@@ -854,6 +862,9 @@
SizeT hashValue() const override { return std::hash<SizeT>()(getIndex()); }
+ inline void* getExternalData() const { return externalData; }
+ inline void setExternalData(void* data) { externalData = data; }
+
protected:
Variable(const Cfg *Func, OperandKind K, Type Ty, SizeT Index)
: Operand(K, Ty), Number(Index),
@@ -895,6 +906,10 @@
/// This Variable may be "linked" to another Variable, such that if neither
/// Variable gets a register, they are guaranteed to share a stack location.
Variable *LinkedTo = nullptr;
+ /// External data can be set by an optimizer to compute and retain any
+ /// information related to the current variable. All the memory used to
+ /// store this information must be managed by the optimizer.
+ void* externalData = nullptr;
};
// Variable64On32 represents a 64-bit variable on a 32-bit architecture. In