third_party/subzero/src/IceLoopAnalyzer.cpp - SwiftShader - Git at Google

 //===- subzero/src/IceLoopAnalyzer.cpp - Loop Analysis --------------------===//
 //
 //                        The Subzero Code Generator
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 ///
 /// \file
 /// \brief Implements the loop analysis on the CFG.
 ///
 //===----------------------------------------------------------------------===//
 #include "IceLoopAnalyzer.h"

 #include "IceCfg.h"
 #include "IceCfgNode.h"

 #include <algorithm>

 namespace Ice {
 class LoopAnalyzer {
 public:
   explicit LoopAnalyzer(Cfg *Func);

   /// Use Tarjan's strongly connected components algorithm to identify outermost
   /// to innermost loops. By deleting the head of the loop from the graph, inner
   /// loops can be found. This assumes that the head node is not shared between
   /// loops but instead all paths to the head come from 'continue' constructs.
   ///
   /// This only computes the loop nest depth within the function and does not
   /// take into account whether the function was called from within a loop.
   // TODO(ascull): this currently uses a extension of Tarjan's algorithm with
   // is bounded linear. ncbray suggests another algorithm which is linear in
   // practice but not bounded linear. I think it also finds dominators.
   // http://lenx.100871.net/papers/loop-SAS.pdf

   CfgVector<CfgUnorderedSet<SizeT>> getLoopBodies() { return Loops; }

 private:
   LoopAnalyzer() = delete;
   LoopAnalyzer(const LoopAnalyzer &) = delete;
   LoopAnalyzer &operator=(const LoopAnalyzer &) = delete;
   void computeLoopNestDepth();

   using IndexT = uint32_t;
   static constexpr IndexT UndefinedIndex = 0;
   static constexpr IndexT FirstDefinedIndex = 1;

   // TODO(ascull): classify the other fields
   class LoopNode {
     LoopNode() = delete;
     LoopNode operator=(const LoopNode &) = delete;

   public:
     explicit LoopNode(CfgNode *BB) : BB(BB) { reset(); }
     LoopNode(const LoopNode &) = default;

     void reset();

     NodeList::const_iterator successorsEnd() const;
     NodeList::const_iterator currentSuccessor() const { return Succ; }
     void nextSuccessor() { ++Succ; }

     void visit(IndexT VisitIndex) { Index = LowLink = VisitIndex; }
     bool isVisited() const { return Index != UndefinedIndex; }
     IndexT getIndex() const { return Index; }

     void tryLink(IndexT NewLink) {
       if (NewLink < LowLink)
         LowLink = NewLink;
     }
     IndexT getLowLink() const { return LowLink; }

     void setOnStack(bool NewValue = true) { OnStack = NewValue; }
     bool isOnStack() const { return OnStack; }

     void setDeleted() { Deleted = true; }
     bool isDeleted() const { return Deleted; }

     void incrementLoopNestDepth();
     bool hasSelfEdge() const;

     CfgNode *getNode() { return BB; }

   private:
     CfgNode *BB;
     NodeList::const_iterator Succ;
     IndexT Index;
     IndexT LowLink;
     bool OnStack;
     bool Deleted = false;
   };

   using LoopNodeList = CfgVector<LoopNode>;
   using LoopNodePtrList = CfgVector<LoopNode *>;

   /// Process the node as part as part of Tarjan's algorithm and return either a
   /// node to recurse into or nullptr when the node has been fully processed.
   LoopNode *processNode(LoopNode &Node);

   /// The function to analyze for loops.
   Cfg *const Func;
   /// A list of decorated nodes in the same order as Func->getNodes() which
   /// means the node's index will also be valid in this list.
   LoopNodeList AllNodes;
   /// This is used as a replacement for the call stack.
   LoopNodePtrList WorkStack;
   /// Track which loop a node belongs to.
   LoopNodePtrList LoopStack;
   /// The index to assign to the next visited node.
   IndexT NextIndex = FirstDefinedIndex;
   /// The number of nodes which have been marked deleted. This is used to track
   /// when the iteration should end.
   LoopNodePtrList::size_type NumDeletedNodes = 0;

   /// All the Loops, in descending order of size
   CfgVector<CfgUnorderedSet<SizeT>> Loops;
 };
 void LoopAnalyzer::LoopNode::reset() {
   if (Deleted)
     return;
   Succ = BB->getOutEdges().begin();
   Index = LowLink = UndefinedIndex;
   OnStack = false;
 }

 NodeList::const_iterator LoopAnalyzer::LoopNode::successorsEnd() const {
   return BB->getOutEdges().end();
 }

 void LoopAnalyzer::LoopNode::incrementLoopNestDepth() {
   BB->incrementLoopNestDepth();
 }

 bool LoopAnalyzer::LoopNode::hasSelfEdge() const {
   for (CfgNode *Succ : BB->getOutEdges()) {
     if (Succ == BB)
       return true;
   }
   return false;
 }

 LoopAnalyzer::LoopAnalyzer(Cfg *Fn) : Func(Fn) {
   const NodeList &Nodes = Func->getNodes();

   // Allocate memory ahead of time. This is why a vector is used instead of a
   // stack which doesn't support reserving (or bulk erasure used below).
   AllNodes.reserve(Nodes.size());
   WorkStack.reserve(Nodes.size());
   LoopStack.reserve(Nodes.size());

   // Create the LoopNodes from the function's CFG
   for (CfgNode *Node : Nodes)
     AllNodes.emplace_back(Node);
   computeLoopNestDepth();
 }

 void LoopAnalyzer::computeLoopNestDepth() {
   assert(AllNodes.size() == Func->getNodes().size());
   assert(NextIndex == FirstDefinedIndex);
   assert(NumDeletedNodes == 0);

   while (NumDeletedNodes < AllNodes.size()) {
     // Prepare to run Tarjan's
     for (LoopNode &Node : AllNodes)
       Node.reset();

     assert(WorkStack.empty());
     assert(LoopStack.empty());

     for (LoopNode &Node : AllNodes) {
       if (Node.isDeleted() || Node.isVisited())
         continue;

       WorkStack.push_back(&Node);

       while (!WorkStack.empty()) {
         LoopNode &WorkNode = *WorkStack.back();
         if (LoopNode *Succ = processNode(WorkNode))
           WorkStack.push_back(Succ);
         else
           WorkStack.pop_back();
       }
     }
   }
 }

 LoopAnalyzer::LoopNode *
 LoopAnalyzer::processNode(LoopAnalyzer::LoopNode &Node) {
   if (!Node.isVisited()) {
     Node.visit(NextIndex++);
     LoopStack.push_back(&Node);
     Node.setOnStack();
   } else {
     // Returning to a node after having recursed into Succ so continue
     // iterating through successors after using the Succ.LowLink value that was
     // computed in the recursion.
     LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];
     Node.tryLink(Succ.getLowLink());
     Node.nextSuccessor();
   }

   // Visit the successors and recurse into unvisited nodes. The recursion could
   // cause the iteration to be suspended but it will resume as the stack is
   // unwound.
   auto SuccEnd = Node.successorsEnd();
   for (; Node.currentSuccessor() != SuccEnd; Node.nextSuccessor()) {
     LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];

     if (Succ.isDeleted())
       continue;

     if (!Succ.isVisited())
       return &Succ;
     else if (Succ.isOnStack())
       Node.tryLink(Succ.getIndex());
   }

   if (Node.getLowLink() != Node.getIndex())
     return nullptr;

   // Single node means no loop in the CFG
   if (LoopStack.back() == &Node) {
     LoopStack.back()->setOnStack(false);
     if (Node.hasSelfEdge())
       LoopStack.back()->incrementLoopNestDepth();
     LoopStack.back()->setDeleted();
     ++NumDeletedNodes;
     LoopStack.pop_back();
     return nullptr;
   }

   // Reaching here means a loop has been found! It consists of the nodes on the
   // top of the stack, down until the current node being processed, Node, is
   // found.
   for (auto It = LoopStack.rbegin(); It != LoopStack.rend(); ++It) {
     (*It)->setOnStack(false);
     (*It)->incrementLoopNestDepth();
     // Remove the loop from the stack and delete the head node
     if (*It == &Node) {
       (*It)->setDeleted();
       ++NumDeletedNodes;
       CfgUnorderedSet<SizeT> LoopNodes;
       for (auto LoopIter = It.base() - 1; LoopIter != LoopStack.end();
            ++LoopIter) {
         LoopNodes.insert((*LoopIter)->getNode()->getIndex());
       }
       Loops.push_back(LoopNodes);
       LoopStack.erase(It.base() - 1, LoopStack.end());
       break;
     }
   }

   return nullptr;
 }
 CfgVector<Loop> ComputeLoopInfo(Cfg *Func) {
   auto LoopBodies = LoopAnalyzer(Func).getLoopBodies();

   CfgVector<Loop> Loops;
   Loops.reserve(LoopBodies.size());
   std::sort(
       LoopBodies.begin(), LoopBodies.end(),
       [](const CfgUnorderedSet<SizeT> &A, const CfgUnorderedSet<SizeT> &B) {
         return A.size() > B.size();
       });
   for (auto &LoopBody : LoopBodies) {
     CfgNode *Header = nullptr;
     bool IsSimpleLoop = true;
     for (auto NodeIndex : LoopBody) {
       CfgNode *Cur = Func->getNodes()[NodeIndex];
       for (auto *Prev : Cur->getInEdges()) {
         if (LoopBody.find(Prev->getIndex()) ==
             LoopBody.end()) { // coming from outside
           if (Header == nullptr) {
             Header = Cur;
           } else {
             Header = nullptr;
             IsSimpleLoop = false;
             break;
           }
         }
       }
       if (!IsSimpleLoop) {
         break;
       }
     }
     if (!IsSimpleLoop)
       continue; // To next potential loop

     CfgNode *PreHeader = nullptr;
     for (auto *Prev : Header->getInEdges()) {
       if (LoopBody.find(Prev->getIndex()) == LoopBody.end()) {
         if (PreHeader == nullptr) {
           PreHeader = Prev;
         } else {
           PreHeader = nullptr;
           break;
         }
       }
     }

     Loops.emplace_back(Header, PreHeader, LoopBody);
   }
   return Loops;
 }

 } // end of namespace Ice
	//===- subzero/src/IceLoopAnalyzer.cpp - Loop Analysis --------------------===//
	//
	// The Subzero Code Generator
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// \brief Implements the loop analysis on the CFG.
	///
	//===----------------------------------------------------------------------===//
	#include "IceLoopAnalyzer.h"

	#include "IceCfg.h"
	#include "IceCfgNode.h"

	#include <algorithm>

	namespace Ice {
	class LoopAnalyzer {
	public:
	explicit LoopAnalyzer(Cfg *Func);

	/// Use Tarjan's strongly connected components algorithm to identify outermost
	/// to innermost loops. By deleting the head of the loop from the graph, inner
	/// loops can be found. This assumes that the head node is not shared between
	/// loops but instead all paths to the head come from 'continue' constructs.
	///
	/// This only computes the loop nest depth within the function and does not
	/// take into account whether the function was called from within a loop.
	// TODO(ascull): this currently uses a extension of Tarjan's algorithm with
	// is bounded linear. ncbray suggests another algorithm which is linear in
	// practice but not bounded linear. I think it also finds dominators.
	// http://lenx.100871.net/papers/loop-SAS.pdf

	CfgVector<CfgUnorderedSet<SizeT>> getLoopBodies() { return Loops; }

	private:
	LoopAnalyzer() = delete;
	LoopAnalyzer(const LoopAnalyzer &) = delete;
	LoopAnalyzer &operator=(const LoopAnalyzer &) = delete;
	void computeLoopNestDepth();

	using IndexT = uint32_t;
	static constexpr IndexT UndefinedIndex = 0;
	static constexpr IndexT FirstDefinedIndex = 1;

	// TODO(ascull): classify the other fields
	class LoopNode {
	LoopNode() = delete;
	LoopNode operator=(const LoopNode &) = delete;

	public:
	explicit LoopNode(CfgNode *BB) : BB(BB) { reset(); }
	LoopNode(const LoopNode &) = default;

	void reset();

	NodeList::const_iterator successorsEnd() const;
	NodeList::const_iterator currentSuccessor() const { return Succ; }
	void nextSuccessor() { ++Succ; }

	void visit(IndexT VisitIndex) { Index = LowLink = VisitIndex; }
	bool isVisited() const { return Index != UndefinedIndex; }
	IndexT getIndex() const { return Index; }

	void tryLink(IndexT NewLink) {
	if (NewLink < LowLink)
	LowLink = NewLink;
	}
	IndexT getLowLink() const { return LowLink; }

	void setOnStack(bool NewValue = true) { OnStack = NewValue; }
	bool isOnStack() const { return OnStack; }

	void setDeleted() { Deleted = true; }
	bool isDeleted() const { return Deleted; }

	void incrementLoopNestDepth();
	bool hasSelfEdge() const;

	CfgNode *getNode() { return BB; }

	private:
	CfgNode *BB;
	NodeList::const_iterator Succ;
	IndexT Index;
	IndexT LowLink;
	bool OnStack;
	bool Deleted = false;
	};

	using LoopNodeList = CfgVector<LoopNode>;
	using LoopNodePtrList = CfgVector<LoopNode *>;

	/// Process the node as part as part of Tarjan's algorithm and return either a
	/// node to recurse into or nullptr when the node has been fully processed.
	LoopNode *processNode(LoopNode &Node);

	/// The function to analyze for loops.
	Cfg *const Func;
	/// A list of decorated nodes in the same order as Func->getNodes() which
	/// means the node's index will also be valid in this list.
	LoopNodeList AllNodes;
	/// This is used as a replacement for the call stack.
	LoopNodePtrList WorkStack;
	/// Track which loop a node belongs to.
	LoopNodePtrList LoopStack;
	/// The index to assign to the next visited node.
	IndexT NextIndex = FirstDefinedIndex;
	/// The number of nodes which have been marked deleted. This is used to track
	/// when the iteration should end.
	LoopNodePtrList::size_type NumDeletedNodes = 0;

	/// All the Loops, in descending order of size
	CfgVector<CfgUnorderedSet<SizeT>> Loops;
	};
	void LoopAnalyzer::LoopNode::reset() {
	if (Deleted)
	return;
	Succ = BB->getOutEdges().begin();
	Index = LowLink = UndefinedIndex;
	OnStack = false;
	}

	NodeList::const_iterator LoopAnalyzer::LoopNode::successorsEnd() const {
	return BB->getOutEdges().end();
	}

	void LoopAnalyzer::LoopNode::incrementLoopNestDepth() {
	BB->incrementLoopNestDepth();
	}

	bool LoopAnalyzer::LoopNode::hasSelfEdge() const {
	for (CfgNode *Succ : BB->getOutEdges()) {
	if (Succ == BB)
	return true;
	}
	return false;
	}

	LoopAnalyzer::LoopAnalyzer(Cfg *Fn) : Func(Fn) {
	const NodeList &Nodes = Func->getNodes();

	// Allocate memory ahead of time. This is why a vector is used instead of a
	// stack which doesn't support reserving (or bulk erasure used below).
	AllNodes.reserve(Nodes.size());
	WorkStack.reserve(Nodes.size());
	LoopStack.reserve(Nodes.size());

	// Create the LoopNodes from the function's CFG
	for (CfgNode *Node : Nodes)
	AllNodes.emplace_back(Node);
	computeLoopNestDepth();
	}

	void LoopAnalyzer::computeLoopNestDepth() {
	assert(AllNodes.size() == Func->getNodes().size());
	assert(NextIndex == FirstDefinedIndex);
	assert(NumDeletedNodes == 0);

	while (NumDeletedNodes < AllNodes.size()) {
	// Prepare to run Tarjan's
	for (LoopNode &Node : AllNodes)
	Node.reset();

	assert(WorkStack.empty());
	assert(LoopStack.empty());

	for (LoopNode &Node : AllNodes) {
	if (Node.isDeleted() \|\| Node.isVisited())
	continue;

	WorkStack.push_back(&Node);

	while (!WorkStack.empty()) {
	LoopNode &WorkNode = *WorkStack.back();
	if (LoopNode *Succ = processNode(WorkNode))
	WorkStack.push_back(Succ);
	else
	WorkStack.pop_back();
	}
	}
	}
	}

	LoopAnalyzer::LoopNode *
	LoopAnalyzer::processNode(LoopAnalyzer::LoopNode &Node) {
	if (!Node.isVisited()) {
	Node.visit(NextIndex++);
	LoopStack.push_back(&Node);
	Node.setOnStack();
	} else {
	// Returning to a node after having recursed into Succ so continue
	// iterating through successors after using the Succ.LowLink value that was
	// computed in the recursion.
	LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];
	Node.tryLink(Succ.getLowLink());
	Node.nextSuccessor();
	}

	// Visit the successors and recurse into unvisited nodes. The recursion could
	// cause the iteration to be suspended but it will resume as the stack is
	// unwound.
	auto SuccEnd = Node.successorsEnd();
	for (; Node.currentSuccessor() != SuccEnd; Node.nextSuccessor()) {
	LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()];

	if (Succ.isDeleted())
	continue;

	if (!Succ.isVisited())
	return &Succ;
	else if (Succ.isOnStack())
	Node.tryLink(Succ.getIndex());
	}

	if (Node.getLowLink() != Node.getIndex())
	return nullptr;

	// Single node means no loop in the CFG
	if (LoopStack.back() == &Node) {
	LoopStack.back()->setOnStack(false);
	if (Node.hasSelfEdge())
	LoopStack.back()->incrementLoopNestDepth();
	LoopStack.back()->setDeleted();
	++NumDeletedNodes;
	LoopStack.pop_back();
	return nullptr;
	}

	// Reaching here means a loop has been found! It consists of the nodes on the
	// top of the stack, down until the current node being processed, Node, is
	// found.
	for (auto It = LoopStack.rbegin(); It != LoopStack.rend(); ++It) {
	(*It)->setOnStack(false);
	(*It)->incrementLoopNestDepth();
	// Remove the loop from the stack and delete the head node
	if (*It == &Node) {
	(*It)->setDeleted();
	++NumDeletedNodes;
	CfgUnorderedSet<SizeT> LoopNodes;
	for (auto LoopIter = It.base() - 1; LoopIter != LoopStack.end();
	++LoopIter) {
	LoopNodes.insert((*LoopIter)->getNode()->getIndex());
	}
	Loops.push_back(LoopNodes);
	LoopStack.erase(It.base() - 1, LoopStack.end());
	break;
	}
	}

	return nullptr;
	}
	CfgVector<Loop> ComputeLoopInfo(Cfg *Func) {
	auto LoopBodies = LoopAnalyzer(Func).getLoopBodies();

	CfgVector<Loop> Loops;
	Loops.reserve(LoopBodies.size());
	std::sort(
	LoopBodies.begin(), LoopBodies.end(),
	[](const CfgUnorderedSet<SizeT> &A, const CfgUnorderedSet<SizeT> &B) {
	return A.size() > B.size();
	});
	for (auto &LoopBody : LoopBodies) {
	CfgNode *Header = nullptr;
	bool IsSimpleLoop = true;
	for (auto NodeIndex : LoopBody) {
	CfgNode *Cur = Func->getNodes()[NodeIndex];
	for (auto *Prev : Cur->getInEdges()) {
	if (LoopBody.find(Prev->getIndex()) ==
	LoopBody.end()) { // coming from outside
	if (Header == nullptr) {
	Header = Cur;
	} else {
	Header = nullptr;
	IsSimpleLoop = false;
	break;
	}
	}
	}
	if (!IsSimpleLoop) {
	break;
	}
	}
	if (!IsSimpleLoop)
	continue; // To next potential loop

	CfgNode *PreHeader = nullptr;
	for (auto *Prev : Header->getInEdges()) {
	if (LoopBody.find(Prev->getIndex()) == LoopBody.end()) {
	if (PreHeader == nullptr) {
	PreHeader = Prev;
	} else {
	PreHeader = nullptr;
	break;
	}
	}
	}

	Loops.emplace_back(Header, PreHeader, LoopBody);
	}
	return Loops;
	}

	} // end of namespace Ice