Std:unordered_map removed from Optimizer for improved performance

The use of std::unordered_map was the main source of slowdowns in
the optimizer code, so it was re-written without any maps. In order
to do so, the information originally carried by the maps was moved
to user-defined information stored within Subzero classes. The
optimizer now manages the memory used to store this information.

Bug swiftshader:69
Bug b/67872293

Change-Id: I2757169f0d3d467766317af6e00e149b4317fb9c
Reviewed-on: https://swiftshader-review.googlesource.com/19508
Tested-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Reactor/Optimizer.cpp b/src/Reactor/Optimizer.cpp
index 83ea5f2..15c5f84 100644
--- a/src/Reactor/Optimizer.cpp
+++ b/src/Reactor/Optimizer.cpp
@@ -17,7 +17,6 @@
 #include "src/IceCfg.h"
 #include "src/IceCfgNode.h"
 
-#include <unordered_map>
 #include <vector>
 
 namespace
@@ -61,9 +60,35 @@
 			std::vector<Ice::Inst*> stores;
 		};
 
-		std::unordered_map<Ice::Operand*, Uses> uses;
-		std::unordered_map<Ice::Inst*, Ice::CfgNode*> node;
-		std::unordered_map<Ice::Variable*, Ice::Inst*> definition;
+		struct LoadStoreInst
+		{
+			LoadStoreInst(Ice::Inst* inst, bool isStore)
+			  : inst(inst),
+			    address(isStore ? storeAddress(inst) : loadAddress(inst)),
+			    isStore(isStore)
+			{
+			}
+
+			Ice::Inst* inst;
+			Ice::Operand *address;
+			bool isStore;
+		};
+
+		Optimizer::Uses* getUses(Ice::Operand*);
+		void setUses(Ice::Operand*, Optimizer::Uses*);
+		bool hasUses(Ice::Operand*) const;
+
+		Ice::CfgNode* getNode(Ice::Inst*);
+		void setNode(Ice::Inst*, Ice::CfgNode*);
+
+		Ice::Inst* getDefinition(Ice::Variable*);
+		void setDefinition(Ice::Variable*, Ice::Inst*);
+
+		const std::vector<LoadStoreInst>& getLoadStoreInsts(Ice::CfgNode*);
+		void setLoadStoreInsts(Ice::CfgNode*, std::vector<LoadStoreInst>*);
+		bool hasLoadStoreInsts(Ice::CfgNode* node) const;
+
+		std::vector<Optimizer::Uses*> allocatedUses;
 	};
 
 	void Optimizer::run(Ice::Cfg *function)
@@ -78,6 +103,12 @@
 		eliminateLoadsFollowingSingleStore();
 		optimizeStoresInSingleBasicBlock();
 		eliminateDeadCode();
+
+		for(auto uses : allocatedUses)
+		{
+			delete uses;
+		}
+		allocatedUses.clear();
 	}
 
 	void Optimizer::eliminateDeadCode()
@@ -123,8 +154,13 @@
 			}
 
 			Ice::Operand *address = alloca.getDest();
-			const auto &addressEntry = uses.find(address);
-			const auto &addressUses = addressEntry->second;
+
+			if(!hasUses(address))
+			{
+				return;
+			}
+
+			const auto &addressUses = *getUses(address);
 
 			if(!addressUses.areOnlyLoadStore())
 			{
@@ -137,26 +173,29 @@
 				{
 					Ice::Variable *loadData = load->getDest();
 
-					for(Ice::Inst *use : uses[loadData])
+					if(hasUses(loadData))
 					{
-						for(Ice::SizeT i = 0; i < use->getSrcSize(); i++)
+						for(Ice::Inst *use : *getUses(loadData))
 						{
-							if(use->getSrc(i) == loadData)
+							for(Ice::SizeT i = 0; i < use->getSrcSize(); i++)
 							{
-								auto *undef = context->getConstantUndef(loadData->getType());
+								if(use->getSrc(i) == loadData)
+								{
+									auto *undef = context->getConstantUndef(loadData->getType());
 
-								use->replaceSource(i, undef);
+									use->replaceSource(i, undef);
+								}
 							}
 						}
-					}
 
-					uses.erase(loadData);
+						setUses(loadData, nullptr);
+					}
 
 					load->setDeleted();
 				}
 
 				alloca.setDeleted();
-				uses.erase(addressEntry);
+				setUses(address, nullptr);
 			}
 		}
 	}
@@ -178,8 +217,13 @@
 			}
 
 			Ice::Operand *address = alloca.getDest();
-			const auto &addressEntry = uses.find(address);
-			auto &addressUses = addressEntry->second;
+
+			if(!hasUses(address))
+			{
+				return;
+			}
+
+			auto &addressUses = *getUses(address);
 
 			if(!addressUses.areOnlyLoadStore())
 			{
@@ -236,23 +280,26 @@
 
 						alloca.setDeleted();
 						store->setDeleted();
-						uses.erase(address);
+						setUses(address, nullptr);
 
-						auto &valueUses = uses[storeValue];
-
-						for(size_t i = 0; i < valueUses.size(); i++)
+						if(hasUses(storeValue))
 						{
-							if(valueUses[i] == store)
+							auto &valueUses = *getUses(storeValue);
+
+							for(size_t i = 0; i < valueUses.size(); i++)
 							{
-								valueUses[i] = valueUses.back();
-								valueUses.pop_back();
-								break;
+								if(valueUses[i] == store)
+								{
+									valueUses[i] = valueUses.back();
+									valueUses.pop_back();
+									break;
+								}
 							}
-						}
 
-						if(valueUses.empty())
-						{
-							uses.erase(storeValue);
+							if(valueUses.empty())
+							{
+								setUses(storeValue, nullptr);
+							}
 						}
 
 						break;
@@ -266,21 +313,7 @@
 	{
 		Ice::CfgNode *entryBlock = function->getEntryNode();
 
-		struct LoadStoreInst
-		{
-			LoadStoreInst(Ice::Inst* inst, bool isStore)
-			 : inst(inst),
-			   address(isStore ? storeAddress(inst) : loadAddress(inst)),
-			   isStore(isStore)
-			{
-			}
-
-			Ice::Inst* inst;
-			Ice::Operand *address;
-			bool isStore;
-		};
-
-		std::unordered_map<Ice::CfgNode*, std::vector<LoadStoreInst> > loadStoreMap;
+		std::vector<std::vector<LoadStoreInst>* > allocatedVectors;
 
 		for(Ice::Inst &alloca : entryBlock->getInsts())
 		{
@@ -295,20 +328,25 @@
 			}
 
 			Ice::Operand *address = alloca.getDest();
-			const auto &addressEntry = uses.find(address);
-			const auto &addressUses = addressEntry->second;
+
+			if(!hasUses(address))
+			{
+				return;
+			}
+
+			const auto &addressUses = *getUses(address);
 
 			if(!addressUses.areOnlyLoadStore())
 			{
 				continue;
 			}
 
-			Ice::CfgNode *singleBasicBlock = node[addressUses.stores[0]];
+			Ice::CfgNode *singleBasicBlock = getNode(addressUses.stores[0]);
 
 			for(size_t i = 1; i < addressUses.stores.size(); i++)
 			{
 				Ice::Inst *store = addressUses.stores[i];
-				if(node[store] != singleBasicBlock)
+				if(getNode(store) != singleBasicBlock)
 				{
 					singleBasicBlock = nullptr;
 					break;
@@ -317,9 +355,11 @@
 
 			if(singleBasicBlock)
 			{
-				if(loadStoreMap.find(singleBasicBlock) == loadStoreMap.end())
+				if(!hasLoadStoreInsts(singleBasicBlock))
 				{
-					std::vector<LoadStoreInst> &loadStoreVector = loadStoreMap[singleBasicBlock];
+					std::vector<LoadStoreInst>* loadStoreInstVector = new std::vector<LoadStoreInst>();
+					setLoadStoreInsts(singleBasicBlock, loadStoreInstVector);
+					allocatedVectors.push_back(loadStoreInstVector);
 					for(Ice::Inst &inst : singleBasicBlock->getInsts())
 					{
 						if(inst.isDeleted())
@@ -332,7 +372,7 @@
 
 						if(isStoreInst || isLoadInst)
 						{
-							loadStoreVector.push_back(LoadStoreInst(&inst, isStoreInst));
+							loadStoreInstVector->push_back(LoadStoreInst(&inst, isStoreInst));
 						}
 					}
 				}
@@ -341,7 +381,7 @@
 				Ice::Operand *storeValue = nullptr;
 				bool unmatchedLoads = false;
 
-				for (auto& loadStoreInst : loadStoreMap[singleBasicBlock])
+				for (auto& loadStoreInst : getLoadStoreInsts(singleBasicBlock))
 				{
 					Ice::Inst* inst = loadStoreInst.inst;
 
@@ -380,14 +420,15 @@
 				}
 			}
 		}
+
+		for(auto loadStoreInstVector : allocatedVectors)
+		{
+			delete loadStoreInstVector;
+		}
 	}
 
 	void Optimizer::analyzeUses(Ice::Cfg *function)
 	{
-		uses.clear();
-		node.clear();
-		definition.clear();
-
 		for(Ice::CfgNode *basicBlock : function->getNodes())
 		{
 			for(Ice::Inst &instruction : basicBlock->getInsts())
@@ -397,8 +438,11 @@
 					continue;
 				}
 
-				node[&instruction] = basicBlock;
-				definition[instruction.getDest()] = &instruction;
+				setNode(&instruction, basicBlock);
+				if(instruction.getDest())
+				{
+					setDefinition(instruction.getDest(), &instruction);
+				}
 
 				for(Ice::SizeT i = 0; i < instruction.getSrcSize(); i++)
 				{
@@ -414,7 +458,7 @@
 					if(i == unique)
 					{
 						Ice::Operand *src = instruction.getSrc(i);
-						uses[src].insert(src, &instruction);
+						getUses(src)->insert(src, &instruction);
 					}
 				}
 			}
@@ -430,23 +474,26 @@
 			newValue = context->getConstantUndef(oldValue->getType());
 		}
 
-		for(Ice::Inst *use : uses[oldValue])
+		if(hasUses(oldValue))
 		{
-			assert(!use->isDeleted());   // Should have been removed from uses already
-
-			for(Ice::SizeT i = 0; i < use->getSrcSize(); i++)
+			for(Ice::Inst *use : *getUses(oldValue))
 			{
-				if(use->getSrc(i) == oldValue)
+				assert(!use->isDeleted());   // Should have been removed from uses already
+
+				for(Ice::SizeT i = 0; i < use->getSrcSize(); i++)
 				{
-					use->replaceSource(i, newValue);
+					if(use->getSrc(i) == oldValue)
+					{
+						use->replaceSource(i, newValue);
+					}
 				}
+
+				getUses(newValue)->insert(newValue, use);
 			}
 
-			uses[newValue].insert(newValue, use);
+			setUses(oldValue, nullptr);
 		}
 
-		uses.erase(oldValue);
-
 		deleteInstruction(instruction);
 	}
 
@@ -463,21 +510,19 @@
 		{
 			Ice::Operand *src = instruction->getSrc(i);
 
-			const auto &srcEntry = uses.find(src);
-
-			if(srcEntry != uses.end())
+			if(hasUses(src))
 			{
-				auto &srcUses = srcEntry->second;
+				auto &srcUses = *getUses(src);
 
 				srcUses.erase(instruction);
 
 				if(srcUses.empty())
 				{
-					uses.erase(srcEntry);
+					setUses(src, nullptr);
 
 					if(Ice::Variable *var = llvm::dyn_cast<Ice::Variable>(src))
 					{
-						deleteInstruction(definition[var]);
+						deleteInstruction(getDefinition(var));
 					}
 				}
 			}
@@ -490,17 +535,25 @@
 
 		if(dest)
 		{
-			return uses[dest].empty() && !instruction->hasSideEffects();
+			return (!hasUses(dest) || getUses(dest)->empty()) && !instruction->hasSideEffects();
 		}
 		else if(isStore(*instruction))
 		{
 			if(Ice::Variable *address = llvm::dyn_cast<Ice::Variable>(storeAddress(instruction)))
 			{
-				Ice::Inst *def = definition[address];
+				Ice::Inst *def = getDefinition(address);
 
 				if(def && llvm::isa<Ice::InstAlloca>(def))
 				{
-					return uses[address].size() == uses[address].stores.size();   // Dead if all uses are stores
+					if(hasUses(address))
+					{
+						Optimizer::Uses* uses = getUses(address);
+						return uses->size() == uses->stores.size();   // Dead if all uses are stores
+					}
+					else
+					{
+						return true; // No uses
+					}
 				}
 			}
 		}
@@ -654,6 +707,63 @@
 		return false;
 	}
 
+	Optimizer::Uses* Optimizer::getUses(Ice::Operand* operand)
+	{
+		Optimizer::Uses* uses = (Optimizer::Uses*)operand->Ice::Operand::getExternalData();
+		if(!uses)
+		{
+			uses = new Optimizer::Uses;
+			setUses(operand, uses);
+			allocatedUses.push_back(uses);
+		}
+		return uses;
+	}
+
+	void Optimizer::setUses(Ice::Operand* operand, Optimizer::Uses* uses)
+	{
+		operand->Ice::Operand::setExternalData(uses);
+	}
+
+	bool Optimizer::hasUses(Ice::Operand* operand) const
+	{
+		return operand->Ice::Operand::getExternalData() != nullptr;
+	}
+
+	Ice::CfgNode* Optimizer::getNode(Ice::Inst* inst)
+	{
+		return (Ice::CfgNode*)inst->Ice::Inst::getExternalData();
+	}
+
+	void Optimizer::setNode(Ice::Inst* inst, Ice::CfgNode* node)
+	{
+		inst->Ice::Inst::setExternalData(node);
+	}
+
+	Ice::Inst* Optimizer::getDefinition(Ice::Variable* var)
+	{
+		return (Ice::Inst*)var->Ice::Variable::getExternalData();
+	}
+
+	void Optimizer::setDefinition(Ice::Variable* var, Ice::Inst* inst)
+	{
+		var->Ice::Variable::setExternalData(inst);
+	}
+
+	const std::vector<Optimizer::LoadStoreInst>& Optimizer::getLoadStoreInsts(Ice::CfgNode* node)
+	{
+		return *((const std::vector<LoadStoreInst>*)node->Ice::CfgNode::getExternalData());
+	}
+
+	void Optimizer::setLoadStoreInsts(Ice::CfgNode* node, std::vector<LoadStoreInst>* insts)
+	{
+		node->Ice::CfgNode::setExternalData(insts);
+	}
+
+	bool Optimizer::hasLoadStoreInsts(Ice::CfgNode* node) const
+	{
+		return node->Ice::CfgNode::getExternalData() != nullptr;
+	}
+
 	bool Optimizer::Uses::areOnlyLoadStore() const
 	{
 		return size() == (loads.size() + stores.size());
diff --git a/third_party/subzero/src/IceCfgNode.h b/third_party/subzero/src/IceCfgNode.h
index cf20e13..b2d2de8 100644
--- a/third_party/subzero/src/IceCfgNode.h
+++ b/third_party/subzero/src/IceCfgNode.h
@@ -127,6 +127,9 @@
   }
   CfgNode *shortCircuit();
 
+  inline void* getExternalData() const { return externalData; }
+  inline void setExternalData(void* data) { externalData = data; }
+
 private:
   CfgNode(Cfg *Func, SizeT Number)
       : Func(Func), Number(Number), NumberOrig(Number),
@@ -145,6 +148,11 @@
   NodeList OutEdges;                 /// in no particular order
   PhiList Phis;                      /// unordered set of phi instructions
   InstList Insts;                    /// ordered list of non-phi instructions
+
+  /// External data can be set by an optimizer to compute and retain any
+  /// information related to the current node. All the memory used to
+  /// store this information must be managed by the optimizer.
+  void* externalData = nullptr;
 };
 
 } // end of namespace Ice
diff --git a/third_party/subzero/src/IceInst.h b/third_party/subzero/src/IceInst.h
index 187c16d..8ffb1d2 100644
--- a/third_party/subzero/src/IceInst.h
+++ b/third_party/subzero/src/IceInst.h
@@ -199,6 +199,9 @@
     llvm::report_fatal_error("Inst unexpectedly deleted");
   }
 
+  inline void* getExternalData() const { return externalData; }
+  inline void setExternalData(void* data) { externalData = data; }
+
 protected:
   Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest);
   void addSource(Operand *Src) {
@@ -236,6 +239,10 @@
   /// live range recorded in a basic block has at most one start and at most one
   /// end.
   bool IsDestRedefined = false;
+  /// External data can be set by an optimizer to compute and retain any
+  /// information related to the current instruction. All the memory used to
+  /// store this information must be managed by the optimizer.
+  void* externalData = nullptr;
 
   Variable *Dest;
   const SizeT MaxSrcs; // only used for assert
diff --git a/third_party/subzero/src/IceOperand.h b/third_party/subzero/src/IceOperand.h
index 7e55ac0..5501f47 100644
--- a/third_party/subzero/src/IceOperand.h
+++ b/third_party/subzero/src/IceOperand.h
@@ -106,6 +106,9 @@
     return 0;
   }
 
+  inline void* getExternalData() const { return externalData; }
+  inline void setExternalData(void* data) { externalData = data; }
+
 protected:
   Operand(OperandKind Kind, Type Ty) : Ty(Ty), Kind(Kind) {
     // It is undefined behavior to have a larger value in the enum
@@ -117,6 +120,11 @@
   /// Vars and NumVars are initialized by the derived class.
   SizeT NumVars = 0;
   Variable **Vars = nullptr;
+
+  /// External data can be set by an optimizer to compute and retain any
+  /// information related to the current operand. All the memory used to
+  /// store this information must be managed by the optimizer.
+  void* externalData = nullptr;
 };
 
 template <class StreamType>
@@ -854,6 +862,9 @@
 
   SizeT hashValue() const override { return std::hash<SizeT>()(getIndex()); }
 
+  inline void* getExternalData() const { return externalData; }
+  inline void setExternalData(void* data) { externalData = data; }
+
 protected:
   Variable(const Cfg *Func, OperandKind K, Type Ty, SizeT Index)
       : Operand(K, Ty), Number(Index),
@@ -895,6 +906,10 @@
   /// This Variable may be "linked" to another Variable, such that if neither
   /// Variable gets a register, they are guaranteed to share a stack location.
   Variable *LinkedTo = nullptr;
+  /// External data can be set by an optimizer to compute and retain any
+  /// information related to the current variable. All the memory used to
+  /// store this information must be managed by the optimizer.
+  void* externalData = nullptr;
 };
 
 // Variable64On32 represents a 64-bit variable on a 32-bit architecture. In