| //===- subzero/src/IceLoopAnalyzer.cpp - Loop Analysis --------------------===// |
| // |
| // The Subzero Code Generator |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// \brief Implements the loop analysis on the CFG. |
| /// |
| //===----------------------------------------------------------------------===// |
| #include "IceLoopAnalyzer.h" |
| |
| #include "IceCfg.h" |
| #include "IceCfgNode.h" |
| |
| #include <algorithm> |
| |
| namespace Ice { |
| class LoopAnalyzer { |
| public: |
| explicit LoopAnalyzer(Cfg *Func); |
| |
| /// Use Tarjan's strongly connected components algorithm to identify outermost |
| /// to innermost loops. By deleting the head of the loop from the graph, inner |
| /// loops can be found. This assumes that the head node is not shared between |
| /// loops but instead all paths to the head come from 'continue' constructs. |
| /// |
| /// This only computes the loop nest depth within the function and does not |
| /// take into account whether the function was called from within a loop. |
| // TODO(ascull): this currently uses a extension of Tarjan's algorithm with |
| // is bounded linear. ncbray suggests another algorithm which is linear in |
| // practice but not bounded linear. I think it also finds dominators. |
| // http://lenx.100871.net/papers/loop-SAS.pdf |
| |
| CfgVector<CfgUnorderedSet<SizeT>> getLoopBodies() { return Loops; } |
| |
| private: |
| LoopAnalyzer() = delete; |
| LoopAnalyzer(const LoopAnalyzer &) = delete; |
| LoopAnalyzer &operator=(const LoopAnalyzer &) = delete; |
| void computeLoopNestDepth(); |
| |
| using IndexT = uint32_t; |
| static constexpr IndexT UndefinedIndex = 0; |
| static constexpr IndexT FirstDefinedIndex = 1; |
| |
| // TODO(ascull): classify the other fields |
| class LoopNode { |
| LoopNode() = delete; |
| LoopNode operator=(const LoopNode &) = delete; |
| |
| public: |
| explicit LoopNode(CfgNode *BB) : BB(BB) { reset(); } |
| LoopNode(const LoopNode &) = default; |
| |
| void reset(); |
| |
| NodeList::const_iterator successorsEnd() const; |
| NodeList::const_iterator currentSuccessor() const { return Succ; } |
| void nextSuccessor() { ++Succ; } |
| |
| void visit(IndexT VisitIndex) { Index = LowLink = VisitIndex; } |
| bool isVisited() const { return Index != UndefinedIndex; } |
| IndexT getIndex() const { return Index; } |
| |
| void tryLink(IndexT NewLink) { |
| if (NewLink < LowLink) |
| LowLink = NewLink; |
| } |
| IndexT getLowLink() const { return LowLink; } |
| |
| void setOnStack(bool NewValue = true) { OnStack = NewValue; } |
| bool isOnStack() const { return OnStack; } |
| |
| void setDeleted() { Deleted = true; } |
| bool isDeleted() const { return Deleted; } |
| |
| void incrementLoopNestDepth(); |
| bool hasSelfEdge() const; |
| |
| CfgNode *getNode() { return BB; } |
| |
| private: |
| CfgNode *BB; |
| NodeList::const_iterator Succ; |
| IndexT Index; |
| IndexT LowLink; |
| bool OnStack; |
| bool Deleted = false; |
| }; |
| |
| using LoopNodeList = CfgVector<LoopNode>; |
| using LoopNodePtrList = CfgVector<LoopNode *>; |
| |
| /// Process the node as part as part of Tarjan's algorithm and return either a |
| /// node to recurse into or nullptr when the node has been fully processed. |
| LoopNode *processNode(LoopNode &Node); |
| |
| /// The function to analyze for loops. |
| Cfg *const Func; |
| /// A list of decorated nodes in the same order as Func->getNodes() which |
| /// means the node's index will also be valid in this list. |
| LoopNodeList AllNodes; |
| /// This is used as a replacement for the call stack. |
| LoopNodePtrList WorkStack; |
| /// Track which loop a node belongs to. |
| LoopNodePtrList LoopStack; |
| /// The index to assign to the next visited node. |
| IndexT NextIndex = FirstDefinedIndex; |
| /// The number of nodes which have been marked deleted. This is used to track |
| /// when the iteration should end. |
| LoopNodePtrList::size_type NumDeletedNodes = 0; |
| |
| /// All the Loops, in descending order of size |
| CfgVector<CfgUnorderedSet<SizeT>> Loops; |
| }; |
| void LoopAnalyzer::LoopNode::reset() { |
| if (Deleted) |
| return; |
| Succ = BB->getOutEdges().begin(); |
| Index = LowLink = UndefinedIndex; |
| OnStack = false; |
| } |
| |
| NodeList::const_iterator LoopAnalyzer::LoopNode::successorsEnd() const { |
| return BB->getOutEdges().end(); |
| } |
| |
| void LoopAnalyzer::LoopNode::incrementLoopNestDepth() { |
| BB->incrementLoopNestDepth(); |
| } |
| |
| bool LoopAnalyzer::LoopNode::hasSelfEdge() const { |
| for (CfgNode *Succ : BB->getOutEdges()) { |
| if (Succ == BB) |
| return true; |
| } |
| return false; |
| } |
| |
| LoopAnalyzer::LoopAnalyzer(Cfg *Fn) : Func(Fn) { |
| const NodeList &Nodes = Func->getNodes(); |
| |
| // Allocate memory ahead of time. This is why a vector is used instead of a |
| // stack which doesn't support reserving (or bulk erasure used below). |
| AllNodes.reserve(Nodes.size()); |
| WorkStack.reserve(Nodes.size()); |
| LoopStack.reserve(Nodes.size()); |
| |
| // Create the LoopNodes from the function's CFG |
| for (CfgNode *Node : Nodes) |
| AllNodes.emplace_back(Node); |
| computeLoopNestDepth(); |
| } |
| |
| void LoopAnalyzer::computeLoopNestDepth() { |
| assert(AllNodes.size() == Func->getNodes().size()); |
| assert(NextIndex == FirstDefinedIndex); |
| assert(NumDeletedNodes == 0); |
| |
| while (NumDeletedNodes < AllNodes.size()) { |
| // Prepare to run Tarjan's |
| for (LoopNode &Node : AllNodes) |
| Node.reset(); |
| |
| assert(WorkStack.empty()); |
| assert(LoopStack.empty()); |
| |
| for (LoopNode &Node : AllNodes) { |
| if (Node.isDeleted() || Node.isVisited()) |
| continue; |
| |
| WorkStack.push_back(&Node); |
| |
| while (!WorkStack.empty()) { |
| LoopNode &WorkNode = *WorkStack.back(); |
| if (LoopNode *Succ = processNode(WorkNode)) |
| WorkStack.push_back(Succ); |
| else |
| WorkStack.pop_back(); |
| } |
| } |
| } |
| } |
| |
| LoopAnalyzer::LoopNode * |
| LoopAnalyzer::processNode(LoopAnalyzer::LoopNode &Node) { |
| if (!Node.isVisited()) { |
| Node.visit(NextIndex++); |
| LoopStack.push_back(&Node); |
| Node.setOnStack(); |
| } else { |
| // Returning to a node after having recursed into Succ so continue |
| // iterating through successors after using the Succ.LowLink value that was |
| // computed in the recursion. |
| LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()]; |
| Node.tryLink(Succ.getLowLink()); |
| Node.nextSuccessor(); |
| } |
| |
| // Visit the successors and recurse into unvisited nodes. The recursion could |
| // cause the iteration to be suspended but it will resume as the stack is |
| // unwound. |
| auto SuccEnd = Node.successorsEnd(); |
| for (; Node.currentSuccessor() != SuccEnd; Node.nextSuccessor()) { |
| LoopNode &Succ = AllNodes[(*Node.currentSuccessor())->getIndex()]; |
| |
| if (Succ.isDeleted()) |
| continue; |
| |
| if (!Succ.isVisited()) |
| return &Succ; |
| else if (Succ.isOnStack()) |
| Node.tryLink(Succ.getIndex()); |
| } |
| |
| if (Node.getLowLink() != Node.getIndex()) |
| return nullptr; |
| |
| // Single node means no loop in the CFG |
| if (LoopStack.back() == &Node) { |
| LoopStack.back()->setOnStack(false); |
| if (Node.hasSelfEdge()) |
| LoopStack.back()->incrementLoopNestDepth(); |
| LoopStack.back()->setDeleted(); |
| ++NumDeletedNodes; |
| LoopStack.pop_back(); |
| return nullptr; |
| } |
| |
| // Reaching here means a loop has been found! It consists of the nodes on the |
| // top of the stack, down until the current node being processed, Node, is |
| // found. |
| for (auto It = LoopStack.rbegin(); It != LoopStack.rend(); ++It) { |
| (*It)->setOnStack(false); |
| (*It)->incrementLoopNestDepth(); |
| // Remove the loop from the stack and delete the head node |
| if (*It == &Node) { |
| (*It)->setDeleted(); |
| ++NumDeletedNodes; |
| CfgUnorderedSet<SizeT> LoopNodes; |
| for (auto LoopIter = It.base() - 1; LoopIter != LoopStack.end(); |
| ++LoopIter) { |
| LoopNodes.insert((*LoopIter)->getNode()->getIndex()); |
| } |
| Loops.push_back(LoopNodes); |
| LoopStack.erase(It.base() - 1, LoopStack.end()); |
| break; |
| } |
| } |
| |
| return nullptr; |
| } |
| CfgVector<Loop> ComputeLoopInfo(Cfg *Func) { |
| auto LoopBodies = LoopAnalyzer(Func).getLoopBodies(); |
| |
| CfgVector<Loop> Loops; |
| Loops.reserve(LoopBodies.size()); |
| std::sort( |
| LoopBodies.begin(), LoopBodies.end(), |
| [](const CfgUnorderedSet<SizeT> &A, const CfgUnorderedSet<SizeT> &B) { |
| return A.size() > B.size(); |
| }); |
| for (auto &LoopBody : LoopBodies) { |
| CfgNode *Header = nullptr; |
| bool IsSimpleLoop = true; |
| for (auto NodeIndex : LoopBody) { |
| CfgNode *Cur = Func->getNodes()[NodeIndex]; |
| for (auto *Prev : Cur->getInEdges()) { |
| if (LoopBody.find(Prev->getIndex()) == |
| LoopBody.end()) { // coming from outside |
| if (Header == nullptr) { |
| Header = Cur; |
| } else { |
| Header = nullptr; |
| IsSimpleLoop = false; |
| break; |
| } |
| } |
| } |
| if (!IsSimpleLoop) { |
| break; |
| } |
| } |
| if (!IsSimpleLoop) |
| continue; // To next potential loop |
| |
| CfgNode *PreHeader = nullptr; |
| for (auto *Prev : Header->getInEdges()) { |
| if (LoopBody.find(Prev->getIndex()) == LoopBody.end()) { |
| if (PreHeader == nullptr) { |
| PreHeader = Prev; |
| } else { |
| PreHeader = nullptr; |
| break; |
| } |
| } |
| } |
| |
| Loops.emplace_back(Header, PreHeader, LoopBody); |
| } |
| return Loops; |
| } |
| |
| } // end of namespace Ice |