| //===-- Latency.cpp ---------------------------------------------*- C++ -*-===// | 
 | // | 
 | //                     The LLVM Compiler Infrastructure | 
 | // | 
 | // This file is distributed under the University of Illinois Open Source | 
 | // License. See LICENSE.TXT for details. | 
 | // | 
 | //===----------------------------------------------------------------------===// | 
 |  | 
 | #include "Latency.h" | 
 |  | 
 | #include "Assembler.h" | 
 | #include "BenchmarkRunner.h" | 
 | #include "MCInstrDescView.h" | 
 | #include "PerfHelper.h" | 
 | #include "llvm/ADT/STLExtras.h" | 
 | #include "llvm/MC/MCInst.h" | 
 | #include "llvm/MC/MCInstBuilder.h" | 
 | #include "llvm/Support/FormatVariadic.h" | 
 |  | 
 | namespace exegesis { | 
 |  | 
 | static bool hasUnknownOperand(const llvm::MCOperandInfo &OpInfo) { | 
 |   return OpInfo.OperandType == llvm::MCOI::OPERAND_UNKNOWN; | 
 | } | 
 |  | 
 | // FIXME: Handle memory, see PR36905. | 
 | static bool hasMemoryOperand(const llvm::MCOperandInfo &OpInfo) { | 
 |   return OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY; | 
 | } | 
 |  | 
 | LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; | 
 |  | 
 | llvm::Error LatencyBenchmarkRunner::isInfeasible( | 
 |     const llvm::MCInstrDesc &MCInstrDesc) const { | 
 |   if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand)) | 
 |     return llvm::make_error<BenchmarkFailure>( | 
 |         "Infeasible : has unknown operands"); | 
 |   if (llvm::any_of(MCInstrDesc.operands(), hasMemoryOperand)) | 
 |     return llvm::make_error<BenchmarkFailure>( | 
 |         "Infeasible : has memory operands"); | 
 |   return llvm::Error::success(); | 
 | } | 
 |  | 
 | llvm::Expected<SnippetPrototype> | 
 | LatencyBenchmarkRunner::generateTwoInstructionPrototype( | 
 |     const Instruction &Instr) const { | 
 |   std::vector<unsigned> Opcodes; | 
 |   Opcodes.resize(State.getInstrInfo().getNumOpcodes()); | 
 |   std::iota(Opcodes.begin(), Opcodes.end(), 0U); | 
 |   std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator()); | 
 |   for (const unsigned OtherOpcode : Opcodes) { | 
 |     if (OtherOpcode == Instr.Description->Opcode) | 
 |       continue; | 
 |     const auto &OtherInstrDesc = State.getInstrInfo().get(OtherOpcode); | 
 |     if (auto E = isInfeasible(OtherInstrDesc)) { | 
 |       llvm::consumeError(std::move(E)); | 
 |       continue; | 
 |     } | 
 |     const Instruction OtherInstr(OtherInstrDesc, RATC); | 
 |     const AliasingConfigurations Forward(Instr, OtherInstr); | 
 |     const AliasingConfigurations Back(OtherInstr, Instr); | 
 |     if (Forward.empty() || Back.empty()) | 
 |       continue; | 
 |     InstructionInstance ThisII(Instr); | 
 |     InstructionInstance OtherII(OtherInstr); | 
 |     if (!Forward.hasImplicitAliasing()) | 
 |       setRandomAliasing(Forward, ThisII, OtherII); | 
 |     if (!Back.hasImplicitAliasing()) | 
 |       setRandomAliasing(Back, OtherII, ThisII); | 
 |     SnippetPrototype Prototype; | 
 |     Prototype.Explanation = | 
 |         llvm::formatv("creating cycle through {0}.", | 
 |                       State.getInstrInfo().getName(OtherOpcode)); | 
 |     Prototype.Snippet.push_back(std::move(ThisII)); | 
 |     Prototype.Snippet.push_back(std::move(OtherII)); | 
 |     return std::move(Prototype); | 
 |   } | 
 |   return llvm::make_error<BenchmarkFailure>( | 
 |       "Infeasible : Didn't find any scheme to make the instruction serial"); | 
 | } | 
 |  | 
 | llvm::Expected<SnippetPrototype> | 
 | LatencyBenchmarkRunner::generatePrototype(unsigned Opcode) const { | 
 |   const auto &InstrDesc = State.getInstrInfo().get(Opcode); | 
 |   if (auto E = isInfeasible(InstrDesc)) | 
 |     return std::move(E); | 
 |   const Instruction Instr(InstrDesc, RATC); | 
 |   if (auto SelfAliasingPrototype = generateSelfAliasingPrototype(Instr)) | 
 |     return SelfAliasingPrototype; | 
 |   else | 
 |     llvm::consumeError(SelfAliasingPrototype.takeError()); | 
 |   // No self aliasing, trying to create a dependency through another opcode. | 
 |   return generateTwoInstructionPrototype(Instr); | 
 | } | 
 |  | 
 | const char *LatencyBenchmarkRunner::getCounterName() const { | 
 |   if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo()) | 
 |     llvm::report_fatal_error("sched model is missing extra processor info!"); | 
 |   const char *CounterName = State.getSubtargetInfo() | 
 |                                 .getSchedModel() | 
 |                                 .getExtraProcessorInfo() | 
 |                                 .PfmCounters.CycleCounter; | 
 |   if (!CounterName) | 
 |     llvm::report_fatal_error("sched model does not define a cycle counter"); | 
 |   return CounterName; | 
 | } | 
 |  | 
 | std::vector<BenchmarkMeasure> | 
 | LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function, | 
 |                                         const unsigned NumRepetitions) const { | 
 |   // Cycle measurements include some overhead from the kernel. Repeat the | 
 |   // measure several times and take the minimum value. | 
 |   constexpr const int NumMeasurements = 30; | 
 |   int64_t MinLatency = std::numeric_limits<int64_t>::max(); | 
 |   const char *CounterName = getCounterName(); | 
 |   if (!CounterName) | 
 |     llvm::report_fatal_error("could not determine cycle counter name"); | 
 |   const pfm::PerfEvent CyclesPerfEvent(CounterName); | 
 |   if (!CyclesPerfEvent.valid()) | 
 |     llvm::report_fatal_error("invalid perf event"); | 
 |   for (size_t I = 0; I < NumMeasurements; ++I) { | 
 |     pfm::Counter Counter(CyclesPerfEvent); | 
 |     Counter.start(); | 
 |     Function(); | 
 |     Counter.stop(); | 
 |     const int64_t Value = Counter.read(); | 
 |     if (Value < MinLatency) | 
 |       MinLatency = Value; | 
 |   } | 
 |   return {{"latency", static_cast<double>(MinLatency) / NumRepetitions, ""}}; | 
 | } | 
 |  | 
 | } // namespace exegesis |