| // Copyright (c) 2018 Google LLC. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "source/opt/loop_fission.h" |
| |
| #include <set> |
| |
| #include "source/opt/register_pressure.h" |
| |
| // Implement loop fission with an optional parameter to split only |
| // if the register pressure in a given loop meets a certain criteria. This is |
| // controlled via the constructors of LoopFissionPass. |
| // |
| // 1 - Build a list of loops to be split, these are top level loops (loops |
| // without child loops themselves) which meet the register pressure criteria, as |
| // determined by the ShouldSplitLoop method of LoopFissionPass. |
| // |
| // 2 - For each loop in the list, group each instruction into a set of related |
| // instructions by traversing each instructions users and operands recursively. |
| // We stop if we encounter an instruction we have seen before or an instruction |
| // which we don't consider relevent (i.e OpLoopMerge). We then group these |
| // groups into two different sets, one for the first loop and one for the |
| // second. |
| // |
| // 3 - We then run CanPerformSplit to check that it would be legal to split a |
| // loop using those two sets. We check that we haven't altered the relative |
| // order load/stores appear in the binary and that we aren't breaking any |
| // dependency between load/stores by splitting them into two loops. We also |
| // check that none of the OpBranch instructions are dependent on a load as we |
| // leave control flow structure intact and move only instructions in the body so |
| // we want to avoid any loads with side affects or aliasing. |
| // |
| // 4 - We then split the loop by calling SplitLoop. This function clones the |
| // loop and attaches it to the preheader and connects the new loops merge block |
| // to the current loop header block. We then use the two sets built in step 2 to |
| // remove instructions from each loop. If an instruction appears in the first |
| // set it is removed from the second loop and vice versa. |
| // |
| // 5 - If the multiple split passes flag is set we check if each of the loops |
| // still meet the register pressure criteria. If they do then we add them to the |
| // list of loops to be split (created in step one) to allow for loops to be |
| // split multiple times. |
| // |
| |
| namespace spvtools { |
| namespace opt { |
| |
| class LoopFissionImpl { |
| public: |
| LoopFissionImpl(IRContext* context, Loop* loop) |
| : context_(context), loop_(loop), load_used_in_condition_(false) {} |
| |
| // Group each instruction in the loop into sets of instructions related by |
| // their usedef chains. An instruction which uses another will appear in the |
| // same set. Then merge those sets into just two sets. Returns false if there |
| // was one or less sets created. |
| bool GroupInstructionsByUseDef(); |
| |
| // Check if the sets built by GroupInstructionsByUseDef violate any data |
| // dependence rules. |
| bool CanPerformSplit(); |
| |
| // Split the loop and return a pointer to the new loop. |
| Loop* SplitLoop(); |
| |
| // Checks if |inst| is safe to move. We can only move instructions which don't |
| // have any side effects and OpLoads and OpStores. |
| bool MovableInstruction(const Instruction& inst) const; |
| |
| private: |
| // Traverse the def use chain of |inst| and add the users and uses of |inst| |
| // which are in the same loop to the |returned_set|. |
| void TraverseUseDef(Instruction* inst, std::set<Instruction*>* returned_set, |
| bool ignore_phi_users = false, bool report_loads = false); |
| |
| // We group the instructions in the block into two different groups, the |
| // instructions to be kept in the original loop and the ones to be cloned into |
| // the new loop. As the cloned loop is attached to the preheader it will be |
| // the first loop and the second loop will be the original. |
| std::set<Instruction*> cloned_loop_instructions_; |
| std::set<Instruction*> original_loop_instructions_; |
| |
| // We need a set of all the instructions to be seen so we can break any |
| // recursion and also so we can ignore certain instructions by preemptively |
| // adding them to this set. |
| std::set<Instruction*> seen_instructions_; |
| |
| // A map of instructions to their relative position in the function. |
| std::map<Instruction*, size_t> instruction_order_; |
| |
| IRContext* context_; |
| |
| Loop* loop_; |
| |
| // This is set to true by TraverseUseDef when traversing the instructions |
| // related to the loop condition and any if conditions should any of those |
| // instructions be a load. |
| bool load_used_in_condition_; |
| }; |
| |
| bool LoopFissionImpl::MovableInstruction(const Instruction& inst) const { |
| return inst.opcode() == SpvOp::SpvOpLoad || |
| inst.opcode() == SpvOp::SpvOpStore || |
| inst.opcode() == SpvOp::SpvOpSelectionMerge || |
| inst.opcode() == SpvOp::SpvOpPhi || inst.IsOpcodeCodeMotionSafe(); |
| } |
| |
| void LoopFissionImpl::TraverseUseDef(Instruction* inst, |
| std::set<Instruction*>* returned_set, |
| bool ignore_phi_users, bool report_loads) { |
| assert(returned_set && "Set to be returned cannot be null."); |
| |
| analysis::DefUseManager* def_use = context_->get_def_use_mgr(); |
| std::set<Instruction*>& inst_set = *returned_set; |
| |
| // We create this functor to traverse the use def chain to build the |
| // grouping of related instructions. The lambda captures the std::function |
| // to allow it to recurse. |
| std::function<void(Instruction*)> traverser_functor; |
| traverser_functor = [this, def_use, &inst_set, &traverser_functor, |
| ignore_phi_users, report_loads](Instruction* user) { |
| // If we've seen the instruction before or it is not inside the loop end the |
| // traversal. |
| if (!user || seen_instructions_.count(user) != 0 || |
| !context_->get_instr_block(user) || |
| !loop_->IsInsideLoop(context_->get_instr_block(user))) { |
| return; |
| } |
| |
| // Don't include labels or loop merge instructions in the instruction sets. |
| // Including them would mean we group instructions related only by using the |
| // same labels (i.e phis). We already preempt the inclusion of |
| // OpSelectionMerge by adding related instructions to the seen_instructions_ |
| // set. |
| if (user->opcode() == SpvOp::SpvOpLoopMerge || |
| user->opcode() == SpvOp::SpvOpLabel) |
| return; |
| |
| // If the |report_loads| flag is set, set the class field |
| // load_used_in_condition_ to false. This is used to check that none of the |
| // condition checks in the loop rely on loads. |
| if (user->opcode() == SpvOp::SpvOpLoad && report_loads) { |
| load_used_in_condition_ = true; |
| } |
| |
| // Add the instruction to the set of instructions already seen, this breaks |
| // recursion and allows us to ignore certain instructions. |
| seen_instructions_.insert(user); |
| |
| inst_set.insert(user); |
| |
| // Wrapper functor to traverse the operands of each instruction. |
| auto traverse_operand = [&traverser_functor, def_use](const uint32_t* id) { |
| traverser_functor(def_use->GetDef(*id)); |
| }; |
| user->ForEachInOperand(traverse_operand); |
| |
| // For the first traversal we want to ignore the users of the phi. |
| if (ignore_phi_users && user->opcode() == SpvOp::SpvOpPhi) return; |
| |
| // Traverse each user with this lambda. |
| def_use->ForEachUser(user, traverser_functor); |
| |
| // Wrapper functor for the use traversal. |
| auto traverse_use = [&traverser_functor](Instruction* use, uint32_t) { |
| traverser_functor(use); |
| }; |
| def_use->ForEachUse(user, traverse_use); |
| |
| }; |
| |
| // We start the traversal of the use def graph by invoking the above |
| // lambda with the |inst| parameter. |
| traverser_functor(inst); |
| } |
| |
| bool LoopFissionImpl::GroupInstructionsByUseDef() { |
| std::vector<std::set<Instruction*>> sets{}; |
| |
| // We want to ignore all the instructions stemming from the loop condition |
| // instruction. |
| BasicBlock* condition_block = loop_->FindConditionBlock(); |
| |
| if (!condition_block) return false; |
| Instruction* condition = &*condition_block->tail(); |
| |
| // We iterate over the blocks via iterating over all the blocks in the |
| // function, we do this so we are iterating in the same order which the blocks |
| // appear in the binary. |
| Function& function = *loop_->GetHeaderBlock()->GetParent(); |
| |
| // Create a temporary set to ignore certain groups of instructions within the |
| // loop. We don't want any instructions related to control flow to be removed |
| // from either loop only instructions within the control flow bodies. |
| std::set<Instruction*> instructions_to_ignore{}; |
| TraverseUseDef(condition, &instructions_to_ignore, true, true); |
| |
| // Traverse control flow instructions to ensure they are added to the |
| // seen_instructions_ set and will be ignored when it it called with actual |
| // sets. |
| for (BasicBlock& block : function) { |
| if (!loop_->IsInsideLoop(block.id())) continue; |
| |
| for (Instruction& inst : block) { |
| // Ignore all instructions related to control flow. |
| if (inst.opcode() == SpvOp::SpvOpSelectionMerge || inst.IsBranch()) { |
| TraverseUseDef(&inst, &instructions_to_ignore, true, true); |
| } |
| } |
| } |
| |
| // Traverse the instructions and generate the sets, automatically ignoring any |
| // instructions in instructions_to_ignore. |
| for (BasicBlock& block : function) { |
| if (!loop_->IsInsideLoop(block.id()) || |
| loop_->GetHeaderBlock()->id() == block.id()) |
| continue; |
| |
| for (Instruction& inst : block) { |
| // Record the order that each load/store is seen. |
| if (inst.opcode() == SpvOp::SpvOpLoad || |
| inst.opcode() == SpvOp::SpvOpStore) { |
| instruction_order_[&inst] = instruction_order_.size(); |
| } |
| |
| // Ignore instructions already seen in a traversal. |
| if (seen_instructions_.count(&inst) != 0) { |
| continue; |
| } |
| |
| // Build the set. |
| std::set<Instruction*> inst_set{}; |
| TraverseUseDef(&inst, &inst_set); |
| if (!inst_set.empty()) sets.push_back(std::move(inst_set)); |
| } |
| } |
| |
| // If we have one or zero sets return false to indicate that due to |
| // insufficient instructions we couldn't split the loop into two groups and |
| // thus the loop can't be split any further. |
| if (sets.size() < 2) { |
| return false; |
| } |
| |
| // Merge the loop sets into two different sets. In CanPerformSplit we will |
| // validate that we don't break the relative ordering of loads/stores by doing |
| // this. |
| for (size_t index = 0; index < sets.size() / 2; ++index) { |
| cloned_loop_instructions_.insert(sets[index].begin(), sets[index].end()); |
| } |
| for (size_t index = sets.size() / 2; index < sets.size(); ++index) { |
| original_loop_instructions_.insert(sets[index].begin(), sets[index].end()); |
| } |
| |
| return true; |
| } |
| |
| bool LoopFissionImpl::CanPerformSplit() { |
| // Return false if any of the condition instructions in the loop depend on a |
| // load. |
| if (load_used_in_condition_) { |
| return false; |
| } |
| |
| // Build a list of all parent loops of this loop. Loop dependence analysis |
| // needs this structure. |
| std::vector<const Loop*> loops; |
| Loop* parent_loop = loop_; |
| while (parent_loop) { |
| loops.push_back(parent_loop); |
| parent_loop = parent_loop->GetParent(); |
| } |
| |
| LoopDependenceAnalysis analysis{context_, loops}; |
| |
| // A list of all the stores in the cloned loop. |
| std::vector<Instruction*> set_one_stores{}; |
| |
| // A list of all the loads in the cloned loop. |
| std::vector<Instruction*> set_one_loads{}; |
| |
| // Populate the above lists. |
| for (Instruction* inst : cloned_loop_instructions_) { |
| if (inst->opcode() == SpvOp::SpvOpStore) { |
| set_one_stores.push_back(inst); |
| } else if (inst->opcode() == SpvOp::SpvOpLoad) { |
| set_one_loads.push_back(inst); |
| } |
| |
| // If we find any instruction which we can't move (such as a barrier), |
| // return false. |
| if (!MovableInstruction(*inst)) return false; |
| } |
| |
| // We need to calculate the depth of the loop to create the loop dependency |
| // distance vectors. |
| const size_t loop_depth = loop_->GetDepth(); |
| |
| // Check the dependencies between loads in the cloned loop and stores in the |
| // original and vice versa. |
| for (Instruction* inst : original_loop_instructions_) { |
| // If we find any instruction which we can't move (such as a barrier), |
| // return false. |
| if (!MovableInstruction(*inst)) return false; |
| |
| // Look at the dependency between the loads in the original and stores in |
| // the cloned loops. |
| if (inst->opcode() == SpvOp::SpvOpLoad) { |
| for (Instruction* store : set_one_stores) { |
| DistanceVector vec{loop_depth}; |
| |
| // If the store actually should appear after the load, return false. |
| // This means the store has been placed in the wrong grouping. |
| if (instruction_order_[store] > instruction_order_[inst]) { |
| return false; |
| } |
| // If not independent check the distance vector. |
| if (!analysis.GetDependence(store, inst, &vec)) { |
| for (DistanceEntry& entry : vec.GetEntries()) { |
| // A distance greater than zero means that the store in the cloned |
| // loop has a dependency on the load in the original loop. |
| if (entry.distance > 0) return false; |
| } |
| } |
| } |
| } else if (inst->opcode() == SpvOp::SpvOpStore) { |
| for (Instruction* load : set_one_loads) { |
| DistanceVector vec{loop_depth}; |
| |
| // If the load actually should appear after the store, return false. |
| if (instruction_order_[load] > instruction_order_[inst]) { |
| return false; |
| } |
| |
| // If not independent check the distance vector. |
| if (!analysis.GetDependence(inst, load, &vec)) { |
| for (DistanceEntry& entry : vec.GetEntries()) { |
| // A distance less than zero means the load in the cloned loop is |
| // dependent on the store instruction in the original loop. |
| if (entry.distance < 0) return false; |
| } |
| } |
| } |
| } |
| } |
| return true; |
| } |
| |
| Loop* LoopFissionImpl::SplitLoop() { |
| // Clone the loop. |
| LoopUtils util{context_, loop_}; |
| LoopUtils::LoopCloningResult clone_results; |
| Loop* cloned_loop = util.CloneAndAttachLoopToHeader(&clone_results); |
| |
| // Update the OpLoopMerge in the cloned loop. |
| cloned_loop->UpdateLoopMergeInst(); |
| |
| // Add the loop_ to the module. |
| // TODO(1841): Handle failure to create pre-header. |
| Function::iterator it = |
| util.GetFunction()->FindBlock(loop_->GetOrCreatePreHeaderBlock()->id()); |
| util.GetFunction()->AddBasicBlocks(clone_results.cloned_bb_.begin(), |
| clone_results.cloned_bb_.end(), ++it); |
| loop_->SetPreHeaderBlock(cloned_loop->GetMergeBlock()); |
| |
| std::vector<Instruction*> instructions_to_kill{}; |
| |
| // Kill all the instructions which should appear in the cloned loop but not in |
| // the original loop. |
| for (uint32_t id : loop_->GetBlocks()) { |
| BasicBlock* block = context_->cfg()->block(id); |
| |
| for (Instruction& inst : *block) { |
| // If the instruction appears in the cloned loop instruction group, kill |
| // it. |
| if (cloned_loop_instructions_.count(&inst) == 1 && |
| original_loop_instructions_.count(&inst) == 0) { |
| instructions_to_kill.push_back(&inst); |
| if (inst.opcode() == SpvOp::SpvOpPhi) { |
| context_->ReplaceAllUsesWith( |
| inst.result_id(), clone_results.value_map_[inst.result_id()]); |
| } |
| } |
| } |
| } |
| |
| // Kill all instructions which should appear in the original loop and not in |
| // the cloned loop. |
| for (uint32_t id : cloned_loop->GetBlocks()) { |
| BasicBlock* block = context_->cfg()->block(id); |
| for (Instruction& inst : *block) { |
| Instruction* old_inst = clone_results.ptr_map_[&inst]; |
| // If the instruction belongs to the original loop instruction group, kill |
| // it. |
| if (cloned_loop_instructions_.count(old_inst) == 0 && |
| original_loop_instructions_.count(old_inst) == 1) { |
| instructions_to_kill.push_back(&inst); |
| } |
| } |
| } |
| |
| for (Instruction* i : instructions_to_kill) { |
| context_->KillInst(i); |
| } |
| |
| return cloned_loop; |
| } |
| |
| LoopFissionPass::LoopFissionPass(const size_t register_threshold_to_split, |
| bool split_multiple_times) |
| : split_multiple_times_(split_multiple_times) { |
| // Split if the number of registers in the loop exceeds |
| // |register_threshold_to_split|. |
| split_criteria_ = |
| [register_threshold_to_split]( |
| const RegisterLiveness::RegionRegisterLiveness& liveness) { |
| return liveness.used_registers_ > register_threshold_to_split; |
| }; |
| } |
| |
| LoopFissionPass::LoopFissionPass() : split_multiple_times_(false) { |
| // Split by default. |
| split_criteria_ = [](const RegisterLiveness::RegionRegisterLiveness&) { |
| return true; |
| }; |
| } |
| |
| bool LoopFissionPass::ShouldSplitLoop(const Loop& loop, IRContext* c) { |
| LivenessAnalysis* analysis = c->GetLivenessAnalysis(); |
| |
| RegisterLiveness::RegionRegisterLiveness liveness{}; |
| |
| Function* function = loop.GetHeaderBlock()->GetParent(); |
| analysis->Get(function)->ComputeLoopRegisterPressure(loop, &liveness); |
| |
| return split_criteria_(liveness); |
| } |
| |
| Pass::Status LoopFissionPass::Process() { |
| bool changed = false; |
| |
| for (Function& f : *context()->module()) { |
| // We collect all the inner most loops in the function and run the loop |
| // splitting util on each. The reason we do this is to allow us to iterate |
| // over each, as creating new loops will invalidate the the loop iterator. |
| std::vector<Loop*> inner_most_loops{}; |
| LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(&f); |
| for (Loop& loop : loop_descriptor) { |
| if (!loop.HasChildren() && ShouldSplitLoop(loop, context())) { |
| inner_most_loops.push_back(&loop); |
| } |
| } |
| |
| // List of new loops which meet the criteria to be split again. |
| std::vector<Loop*> new_loops_to_split{}; |
| |
| while (!inner_most_loops.empty()) { |
| for (Loop* loop : inner_most_loops) { |
| LoopFissionImpl impl{context(), loop}; |
| |
| // Group the instructions in the loop into two different sets of related |
| // instructions. If we can't group the instructions into the two sets |
| // then we can't split the loop any further. |
| if (!impl.GroupInstructionsByUseDef()) { |
| continue; |
| } |
| |
| if (impl.CanPerformSplit()) { |
| Loop* second_loop = impl.SplitLoop(); |
| changed = true; |
| context()->InvalidateAnalysesExceptFor( |
| IRContext::kAnalysisLoopAnalysis); |
| |
| // If the newly created loop meets the criteria to be split, split it |
| // again. |
| if (ShouldSplitLoop(*second_loop, context())) |
| new_loops_to_split.push_back(second_loop); |
| |
| // If the original loop (now split) still meets the criteria to be |
| // split, split it again. |
| if (ShouldSplitLoop(*loop, context())) |
| new_loops_to_split.push_back(loop); |
| } |
| } |
| |
| // If the split multiple times flag has been set add the new loops which |
| // meet the splitting criteria into the list of loops to be split on the |
| // next iteration. |
| if (split_multiple_times_) { |
| inner_most_loops = std::move(new_loops_to_split); |
| } else { |
| break; |
| } |
| } |
| } |
| |
| return changed ? Pass::Status::SuccessWithChange |
| : Pass::Status::SuccessWithoutChange; |
| } |
| |
| } // namespace opt |
| } // namespace spvtools |