| //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines a hazard recognizer for the SystemZ scheduler. |
| // |
| // This class is used by the SystemZ scheduling strategy to maintain |
| // the state during scheduling, and provide cost functions for |
| // scheduling candidates. This includes: |
| // |
| // * Decoder grouping. A decoder group can maximally hold 3 uops, and |
| // instructions that always begin a new group should be scheduled when |
| // the current decoder group is empty. |
| // * Processor resources usage. It is beneficial to balance the use of |
| // resources. |
| // |
| // A goal is to consider all instructions, also those outside of any |
| // scheduling region. Such instructions are "advanced" past and include |
| // single instructions before a scheduling region, branches etc. |
| // |
| // A block that has only one predecessor continues scheduling with the state |
| // of it (which may be updated by emitting branches). |
| // |
| // ===---------------------------------------------------------------------===// |
| |
| #include "SystemZHazardRecognizer.h" |
| #include "llvm/ADT/Statistic.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "machine-scheduler" |
| |
| // This is the limit of processor resource usage at which the |
| // scheduler should try to look for other instructions (not using the |
| // critical resource). |
| static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden, |
| cl::desc("The OOO window for processor " |
| "resources during scheduling."), |
| cl::init(8)); |
| |
| unsigned SystemZHazardRecognizer:: |
| getNumDecoderSlots(SUnit *SU) const { |
| const MCSchedClassDesc *SC = getSchedClass(SU); |
| if (!SC->isValid()) |
| return 0; // IMPLICIT_DEF / KILL -- will not make impact in output. |
| |
| assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) && |
| "Only cracked instruction can have 2 uops."); |
| assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) && |
| "Expanded instructions always group alone."); |
| assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) && |
| "Expanded instructions fill the group(s)."); |
| |
| return SC->NumMicroOps; |
| } |
| |
| unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const { |
| unsigned Idx = CurrGroupSize; |
| if (GrpCount % 2) |
| Idx += 3; |
| |
| if (SU != nullptr && !fitsIntoCurrentGroup(SU)) { |
| if (Idx == 1 || Idx == 2) |
| Idx = 3; |
| else if (Idx == 4 || Idx == 5) |
| Idx = 0; |
| } |
| |
| return Idx; |
| } |
| |
| ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer:: |
| getHazardType(SUnit *m, int Stalls) { |
| return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard); |
| } |
| |
| void SystemZHazardRecognizer::Reset() { |
| CurrGroupSize = 0; |
| CurrGroupHas4RegOps = false; |
| clearProcResCounters(); |
| GrpCount = 0; |
| LastFPdOpCycleIdx = UINT_MAX; |
| LastEmittedMI = nullptr; |
| LLVM_DEBUG(CurGroupDbg = "";); |
| } |
| |
| bool |
| SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { |
| const MCSchedClassDesc *SC = getSchedClass(SU); |
| if (!SC->isValid()) |
| return true; |
| |
| // A cracked instruction only fits into schedule if the current |
| // group is empty. |
| if (SC->BeginGroup) |
| return (CurrGroupSize == 0); |
| |
| // An instruction with 4 register operands will not fit in last slot. |
| assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) && |
| "Current decoder group is already full!"); |
| if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) |
| return false; |
| |
| // Since a full group is handled immediately in EmitInstruction(), |
| // SU should fit into current group. NumSlots should be 1 or 0, |
| // since it is not a cracked or expanded instruction. |
| assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) && |
| "Expected normal instruction to fit in non-full group!"); |
| |
| return true; |
| } |
| |
| bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { |
| const MachineFunction &MF = *MI->getParent()->getParent(); |
| const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); |
| const MCInstrDesc &MID = MI->getDesc(); |
| unsigned Count = 0; |
| for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { |
| const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF); |
| if (RC == nullptr) |
| continue; |
| if (OpIdx >= MID.getNumDefs() && |
| MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) |
| continue; |
| Count++; |
| } |
| return Count >= 4; |
| } |
| |
| void SystemZHazardRecognizer::nextGroup() { |
| if (CurrGroupSize == 0) |
| return; |
| |
| LLVM_DEBUG(dumpCurrGroup("Completed decode group")); |
| LLVM_DEBUG(CurGroupDbg = "";); |
| |
| int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1); |
| assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) && |
| "Current decoder group bad."); |
| |
| // Reset counter for next group. |
| CurrGroupSize = 0; |
| CurrGroupHas4RegOps = false; |
| |
| GrpCount += ((unsigned) NumGroups); |
| |
| // Decrease counters for execution units. |
| for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) |
| ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups) |
| ? (ProcResourceCounters[i] - NumGroups) |
| : 0); |
| |
| // Clear CriticalResourceIdx if it is now below the threshold. |
| if (CriticalResourceIdx != UINT_MAX && |
| (ProcResourceCounters[CriticalResourceIdx] <= |
| ProcResCostLim)) |
| CriticalResourceIdx = UINT_MAX; |
| |
| LLVM_DEBUG(dumpState();); |
| } |
| |
| #ifndef NDEBUG // Debug output |
| void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { |
| OS << "SU(" << SU->NodeNum << "):"; |
| OS << TII->getName(SU->getInstr()->getOpcode()); |
| |
| const MCSchedClassDesc *SC = getSchedClass(SU); |
| if (!SC->isValid()) |
| return; |
| |
| for (TargetSchedModel::ProcResIter |
| PI = SchedModel->getWriteProcResBegin(SC), |
| PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { |
| const MCProcResourceDesc &PRD = |
| *SchedModel->getProcResource(PI->ProcResourceIdx); |
| std::string FU(PRD.Name); |
| // trim e.g. Z13_FXaUnit -> FXa |
| FU = FU.substr(FU.find("_") + 1); |
| size_t Pos = FU.find("Unit"); |
| if (Pos != std::string::npos) |
| FU.resize(Pos); |
| if (FU == "LS") // LSUnit -> LSU |
| FU = "LSU"; |
| OS << "/" << FU; |
| |
| if (PI->Cycles > 1) |
| OS << "(" << PI->Cycles << "cyc)"; |
| } |
| |
| if (SC->NumMicroOps > 1) |
| OS << "/" << SC->NumMicroOps << "uops"; |
| if (SC->BeginGroup && SC->EndGroup) |
| OS << "/GroupsAlone"; |
| else if (SC->BeginGroup) |
| OS << "/BeginsGroup"; |
| else if (SC->EndGroup) |
| OS << "/EndsGroup"; |
| if (SU->isUnbuffered) |
| OS << "/Unbuffered"; |
| if (has4RegOps(SU->getInstr())) |
| OS << "/4RegOps"; |
| } |
| |
| void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { |
| dbgs() << "++ " << Msg; |
| dbgs() << ": "; |
| |
| if (CurGroupDbg.empty()) |
| dbgs() << " <empty>\n"; |
| else { |
| dbgs() << "{ " << CurGroupDbg << " }"; |
| dbgs() << " (" << CurrGroupSize << " decoder slot" |
| << (CurrGroupSize > 1 ? "s":"") |
| << (CurrGroupHas4RegOps ? ", 4RegOps" : "") |
| << ")\n"; |
| } |
| } |
| |
| void SystemZHazardRecognizer::dumpProcResourceCounters() const { |
| bool any = false; |
| |
| for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) |
| if (ProcResourceCounters[i] > 0) { |
| any = true; |
| break; |
| } |
| |
| if (!any) |
| return; |
| |
| dbgs() << "++ | Resource counters: "; |
| for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) |
| if (ProcResourceCounters[i] > 0) |
| dbgs() << SchedModel->getProcResource(i)->Name |
| << ":" << ProcResourceCounters[i] << " "; |
| dbgs() << "\n"; |
| |
| if (CriticalResourceIdx != UINT_MAX) |
| dbgs() << "++ | Critical resource: " |
| << SchedModel->getProcResource(CriticalResourceIdx)->Name |
| << "\n"; |
| } |
| |
| void SystemZHazardRecognizer::dumpState() const { |
| dumpCurrGroup("| Current decoder group"); |
| dbgs() << "++ | Current cycle index: " |
| << getCurrCycleIdx() << "\n"; |
| dumpProcResourceCounters(); |
| if (LastFPdOpCycleIdx != UINT_MAX) |
| dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n"; |
| } |
| |
| #endif //NDEBUG |
| |
| void SystemZHazardRecognizer::clearProcResCounters() { |
| ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0); |
| CriticalResourceIdx = UINT_MAX; |
| } |
| |
| static inline bool isBranchRetTrap(MachineInstr *MI) { |
| return (MI->isBranch() || MI->isReturn() || |
| MI->getOpcode() == SystemZ::CondTrap); |
| } |
| |
| // Update state with SU as the next scheduled unit. |
| void SystemZHazardRecognizer:: |
| EmitInstruction(SUnit *SU) { |
| const MCSchedClassDesc *SC = getSchedClass(SU); |
| LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs()); |
| dbgs() << "\n";); |
| LLVM_DEBUG(dumpCurrGroup("Decode group before emission");); |
| |
| // If scheduling an SU that must begin a new decoder group, move on |
| // to next group. |
| if (!fitsIntoCurrentGroup(SU)) |
| nextGroup(); |
| |
| LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg); |
| if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd);); |
| |
| LastEmittedMI = SU->getInstr(); |
| |
| // After returning from a call, we don't know much about the state. |
| if (SU->isCall) { |
| LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";); |
| Reset(); |
| LastEmittedMI = SU->getInstr(); |
| return; |
| } |
| |
| // Increase counter for execution unit(s). |
| for (TargetSchedModel::ProcResIter |
| PI = SchedModel->getWriteProcResBegin(SC), |
| PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { |
| // Don't handle FPd together with the other resources. |
| if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1) |
| continue; |
| int &CurrCounter = |
| ProcResourceCounters[PI->ProcResourceIdx]; |
| CurrCounter += PI->Cycles; |
| // Check if this is now the new critical resource. |
| if ((CurrCounter > ProcResCostLim) && |
| (CriticalResourceIdx == UINT_MAX || |
| (PI->ProcResourceIdx != CriticalResourceIdx && |
| CurrCounter > |
| ProcResourceCounters[CriticalResourceIdx]))) { |
| LLVM_DEBUG( |
| dbgs() << "++ New critical resource: " |
| << SchedModel->getProcResource(PI->ProcResourceIdx)->Name |
| << "\n";); |
| CriticalResourceIdx = PI->ProcResourceIdx; |
| } |
| } |
| |
| // Make note of an instruction that uses a blocking resource (FPd). |
| if (SU->isUnbuffered) { |
| LastFPdOpCycleIdx = getCurrCycleIdx(SU); |
| LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx |
| << "\n";); |
| } |
| |
| // Insert SU into current group by increasing number of slots used |
| // in current group. |
| CurrGroupSize += getNumDecoderSlots(SU); |
| CurrGroupHas4RegOps |= has4RegOps(SU->getInstr()); |
| unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3); |
| assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU)) |
| && "SU does not fit into decoder group!"); |
| |
| // Check if current group is now full/ended. If so, move on to next |
| // group to be ready to evaluate more candidates. |
| if (CurrGroupSize >= GroupLim || SC->EndGroup) |
| nextGroup(); |
| } |
| |
| int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { |
| const MCSchedClassDesc *SC = getSchedClass(SU); |
| if (!SC->isValid()) |
| return 0; |
| |
| // If SU begins new group, it can either break a current group early |
| // or fit naturally if current group is empty (negative cost). |
| if (SC->BeginGroup) { |
| if (CurrGroupSize) |
| return 3 - CurrGroupSize; |
| return -1; |
| } |
| |
| // Similarly, a group-ending SU may either fit well (last in group), or |
| // end the group prematurely. |
| if (SC->EndGroup) { |
| unsigned resultingGroupSize = |
| (CurrGroupSize + getNumDecoderSlots(SU)); |
| if (resultingGroupSize < 3) |
| return (3 - resultingGroupSize); |
| return -1; |
| } |
| |
| // An instruction with 4 register operands will not fit in last slot. |
| if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) |
| return 1; |
| |
| // Most instructions can be placed in any decoder slot. |
| return 0; |
| } |
| |
| bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const { |
| assert (SU->isUnbuffered); |
| // If this is the first FPd op, it should be scheduled high. |
| if (LastFPdOpCycleIdx == UINT_MAX) |
| return true; |
| // If this is not the first PFd op, it should go into the other side |
| // of the processor to use the other FPd unit there. This should |
| // generally happen if two FPd ops are placed with 2 other |
| // instructions between them (modulo 6). |
| unsigned SUCycleIdx = getCurrCycleIdx(SU); |
| if (LastFPdOpCycleIdx > SUCycleIdx) |
| return ((LastFPdOpCycleIdx - SUCycleIdx) == 3); |
| return ((SUCycleIdx - LastFPdOpCycleIdx) == 3); |
| } |
| |
| int SystemZHazardRecognizer:: |
| resourcesCost(SUnit *SU) { |
| int Cost = 0; |
| |
| const MCSchedClassDesc *SC = getSchedClass(SU); |
| if (!SC->isValid()) |
| return 0; |
| |
| // For a FPd op, either return min or max value as indicated by the |
| // distance to any prior FPd op. |
| if (SU->isUnbuffered) |
| Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX); |
| // For other instructions, give a cost to the use of the critical resource. |
| else if (CriticalResourceIdx != UINT_MAX) { |
| for (TargetSchedModel::ProcResIter |
| PI = SchedModel->getWriteProcResBegin(SC), |
| PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) |
| if (PI->ProcResourceIdx == CriticalResourceIdx) |
| Cost = PI->Cycles; |
| } |
| |
| return Cost; |
| } |
| |
| void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI, |
| bool TakenBranch) { |
| // Make a temporary SUnit. |
| SUnit SU(MI, 0); |
| |
| // Set interesting flags. |
| SU.isCall = MI->isCall(); |
| |
| const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI); |
| for (const MCWriteProcResEntry &PRE : |
| make_range(SchedModel->getWriteProcResBegin(SC), |
| SchedModel->getWriteProcResEnd(SC))) { |
| switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) { |
| case 0: |
| SU.hasReservedResource = true; |
| break; |
| case 1: |
| SU.isUnbuffered = true; |
| break; |
| default: |
| break; |
| } |
| } |
| |
| unsigned GroupSizeBeforeEmit = CurrGroupSize; |
| EmitInstruction(&SU); |
| |
| if (!TakenBranch && isBranchRetTrap(MI)) { |
| // NT Branch on second slot ends group. |
| if (GroupSizeBeforeEmit == 1) |
| nextGroup(); |
| } |
| |
| if (TakenBranch && CurrGroupSize > 0) |
| nextGroup(); |
| |
| assert ((!MI->isTerminator() || isBranchRetTrap(MI)) && |
| "Scheduler: unhandled terminator!"); |
| } |
| |
| void SystemZHazardRecognizer:: |
| copyState(SystemZHazardRecognizer *Incoming) { |
| // Current decoder group |
| CurrGroupSize = Incoming->CurrGroupSize; |
| LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;); |
| |
| // Processor resources |
| ProcResourceCounters = Incoming->ProcResourceCounters; |
| CriticalResourceIdx = Incoming->CriticalResourceIdx; |
| |
| // FPd |
| LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx; |
| GrpCount = Incoming->GrpCount; |
| } |