|  | //===-- Target.cpp ----------------------------------------------*- C++ -*-===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | #include "../Target.h" | 
|  |  | 
|  | #include "../Latency.h" | 
|  | #include "../Uops.h" | 
|  | #include "MCTargetDesc/X86BaseInfo.h" | 
|  | #include "MCTargetDesc/X86MCTargetDesc.h" | 
|  | #include "X86.h" | 
|  | #include "X86RegisterInfo.h" | 
|  | #include "X86Subtarget.h" | 
|  | #include "llvm/MC/MCInstBuilder.h" | 
|  |  | 
|  | namespace exegesis { | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | // Common code for X86 Uops and Latency runners. | 
|  | template <typename Impl> class X86BenchmarkRunner : public Impl { | 
|  | using Impl::Impl; | 
|  |  | 
|  | llvm::Expected<SnippetPrototype> | 
|  | generatePrototype(unsigned Opcode) const override { | 
|  | // Test whether we can generate a snippet for this instruction. | 
|  | const auto &InstrInfo = this->State.getInstrInfo(); | 
|  | const auto OpcodeName = InstrInfo.getName(Opcode); | 
|  | if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") || | 
|  | OpcodeName.startswith("ADJCALLSTACK")) { | 
|  | return llvm::make_error<BenchmarkFailure>( | 
|  | "Unsupported opcode: Push/Pop/AdjCallStack"); | 
|  | } | 
|  |  | 
|  | // Handle X87. | 
|  | const auto &InstrDesc = InstrInfo.get(Opcode); | 
|  | const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask; | 
|  | const Instruction Instr(InstrDesc, this->RATC); | 
|  | switch (FPInstClass) { | 
|  | case llvm::X86II::NotFP: | 
|  | break; | 
|  | case llvm::X86II::ZeroArgFP: | 
|  | return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP"); | 
|  | case llvm::X86II::OneArgFP: | 
|  | return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP"); | 
|  | case llvm::X86II::OneArgFPRW: | 
|  | case llvm::X86II::TwoArgFP: { | 
|  | // These are instructions like | 
|  | //   - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) | 
|  | //   - `ST(0) = ST(0) + ST(i)` (TwoArgFP) | 
|  | // They are intrinsically serial and do not modify the state of the stack. | 
|  | // We generate the same code for latency and uops. | 
|  | return this->generateSelfAliasingPrototype(Instr); | 
|  | } | 
|  | case llvm::X86II::CompareFP: | 
|  | return Impl::handleCompareFP(Instr); | 
|  | case llvm::X86II::CondMovFP: | 
|  | return Impl::handleCondMovFP(Instr); | 
|  | case llvm::X86II::SpecialFP: | 
|  | return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP"); | 
|  | default: | 
|  | llvm_unreachable("Unknown FP Type!"); | 
|  | } | 
|  |  | 
|  | // Fallback to generic implementation. | 
|  | return Impl::Base::generatePrototype(Opcode); | 
|  | } | 
|  | }; | 
|  |  | 
|  | class X86LatencyImpl : public LatencyBenchmarkRunner { | 
|  | protected: | 
|  | using Base = LatencyBenchmarkRunner; | 
|  | using Base::Base; | 
|  | llvm::Expected<SnippetPrototype> | 
|  | handleCompareFP(const Instruction &Instr) const { | 
|  | return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP"); | 
|  | } | 
|  | llvm::Expected<SnippetPrototype> | 
|  | handleCondMovFP(const Instruction &Instr) const { | 
|  | return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP"); | 
|  | } | 
|  | }; | 
|  |  | 
|  | class X86UopsImpl : public UopsBenchmarkRunner { | 
|  | protected: | 
|  | using Base = UopsBenchmarkRunner; | 
|  | using Base::Base; | 
|  | // We can compute uops for any FP instruction that does not grow or shrink the | 
|  | // stack (either do not touch the stack or push as much as they pop). | 
|  | llvm::Expected<SnippetPrototype> | 
|  | handleCompareFP(const Instruction &Instr) const { | 
|  | return generateUnconstrainedPrototype( | 
|  | Instr, "instruction does not grow/shrink the FP stack"); | 
|  | } | 
|  | llvm::Expected<SnippetPrototype> | 
|  | handleCondMovFP(const Instruction &Instr) const { | 
|  | return generateUnconstrainedPrototype( | 
|  | Instr, "instruction does not grow/shrink the FP stack"); | 
|  | } | 
|  | }; | 
|  |  | 
|  | class ExegesisX86Target : public ExegesisTarget { | 
|  | void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override { | 
|  | // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F. | 
|  | PM.add(llvm::createX86FloatingPointStackifierPass()); | 
|  | } | 
|  |  | 
|  | std::vector<llvm::MCInst> setRegToConstant(const llvm::MCSubtargetInfo &STI, | 
|  | unsigned Reg) const override { | 
|  | // GPR. | 
|  | if (llvm::X86::GR8RegClass.contains(Reg)) | 
|  | return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)}; | 
|  | if (llvm::X86::GR16RegClass.contains(Reg)) | 
|  | return {llvm::MCInstBuilder(llvm::X86::MOV16ri).addReg(Reg).addImm(1)}; | 
|  | if (llvm::X86::GR32RegClass.contains(Reg)) | 
|  | return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)}; | 
|  | if (llvm::X86::GR64RegClass.contains(Reg)) | 
|  | return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)}; | 
|  | // MMX. | 
|  | if (llvm::X86::VR64RegClass.contains(Reg)) | 
|  | return setVectorRegToConstant(Reg, 8, llvm::X86::MMX_MOVQ64rm); | 
|  | // {X,Y,Z}MM. | 
|  | if (llvm::X86::VR128XRegClass.contains(Reg)) { | 
|  | if (STI.getFeatureBits()[llvm::X86::FeatureAVX512]) | 
|  | return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQU32Z128rm); | 
|  | if (STI.getFeatureBits()[llvm::X86::FeatureAVX]) | 
|  | return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm); | 
|  | return setVectorRegToConstant(Reg, 16, llvm::X86::MOVDQUrm); | 
|  | } | 
|  | if (llvm::X86::VR256XRegClass.contains(Reg)) { | 
|  | if (STI.getFeatureBits()[llvm::X86::FeatureAVX512]) | 
|  | return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQU32Z256rm); | 
|  | return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm); | 
|  | } | 
|  | if (llvm::X86::VR512RegClass.contains(Reg)) | 
|  | return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU32Zrm); | 
|  | // X87. | 
|  | if (llvm::X86::RFP32RegClass.contains(Reg) || | 
|  | llvm::X86::RFP64RegClass.contains(Reg) || | 
|  | llvm::X86::RFP80RegClass.contains(Reg)) | 
|  | return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m); | 
|  | if (Reg == llvm::X86::EFLAGS) { | 
|  | // Set all flags to 0 but the bits that are "reserved and set to 1". | 
|  | constexpr const uint32_t kImmValue = 0x00007002u; | 
|  | std::vector<llvm::MCInst> Result; | 
|  | Result.push_back(allocateStackSpace(8)); | 
|  | Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue)); | 
|  | Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops. | 
|  | return Result; | 
|  | } | 
|  | return {}; | 
|  | } | 
|  |  | 
|  | std::unique_ptr<BenchmarkRunner> | 
|  | createLatencyBenchmarkRunner(const LLVMState &State) const override { | 
|  | return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(State); | 
|  | } | 
|  |  | 
|  | std::unique_ptr<BenchmarkRunner> | 
|  | createUopsBenchmarkRunner(const LLVMState &State) const override { | 
|  | return llvm::make_unique<X86BenchmarkRunner<X86UopsImpl>>(State); | 
|  | } | 
|  |  | 
|  | bool matchesArch(llvm::Triple::ArchType Arch) const override { | 
|  | return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86; | 
|  | } | 
|  |  | 
|  | private: | 
|  | // setRegToConstant() specialized for a vector register of size | 
|  | // `RegSizeBytes`. `RMOpcode` is the opcode used to do a memory -> vector | 
|  | // register load. | 
|  | static std::vector<llvm::MCInst> | 
|  | setVectorRegToConstant(const unsigned Reg, const unsigned RegSizeBytes, | 
|  | const unsigned RMOpcode) { | 
|  | // There is no instruction to directly set XMM, go through memory. | 
|  | // Since vector values can be interpreted as integers of various sizes (8 | 
|  | // to 64 bits) as well as floats and double, so we chose an immediate | 
|  | // value that has set bits for all byte values and is a normal float/ | 
|  | // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when | 
|  | // interpreted as a float. | 
|  | constexpr const uint32_t kImmValue = 0x40404040u; | 
|  | std::vector<llvm::MCInst> Result; | 
|  | Result.push_back(allocateStackSpace(RegSizeBytes)); | 
|  | constexpr const unsigned kMov32NumBytes = 4; | 
|  | for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) { | 
|  | Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue)); | 
|  | } | 
|  | Result.push_back(loadToReg(Reg, RMOpcode)); | 
|  | Result.push_back(releaseStackSpace(RegSizeBytes)); | 
|  | return Result; | 
|  | } | 
|  |  | 
|  | // Allocates scratch memory on the stack. | 
|  | static llvm::MCInst allocateStackSpace(unsigned Bytes) { | 
|  | return llvm::MCInstBuilder(llvm::X86::SUB64ri8) | 
|  | .addReg(llvm::X86::RSP) | 
|  | .addReg(llvm::X86::RSP) | 
|  | .addImm(Bytes); | 
|  | } | 
|  |  | 
|  | // Fills scratch memory at offset `OffsetBytes` with value `Imm`. | 
|  | static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes, | 
|  | uint64_t Imm) { | 
|  | return llvm::MCInstBuilder(MovOpcode) | 
|  | // Address = ESP | 
|  | .addReg(llvm::X86::RSP) // BaseReg | 
|  | .addImm(1)              // ScaleAmt | 
|  | .addReg(0)              // IndexReg | 
|  | .addImm(OffsetBytes)    // Disp | 
|  | .addReg(0)              // Segment | 
|  | // Immediate. | 
|  | .addImm(Imm); | 
|  | } | 
|  |  | 
|  | // Loads scratch memory into register `Reg` using opcode `RMOpcode`. | 
|  | static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) { | 
|  | return llvm::MCInstBuilder(RMOpcode) | 
|  | .addReg(Reg) | 
|  | // Address = ESP | 
|  | .addReg(llvm::X86::RSP) // BaseReg | 
|  | .addImm(1)              // ScaleAmt | 
|  | .addReg(0)              // IndexReg | 
|  | .addImm(0)              // Disp | 
|  | .addReg(0);             // Segment | 
|  | } | 
|  |  | 
|  | // Releases scratch memory. | 
|  | static llvm::MCInst releaseStackSpace(unsigned Bytes) { | 
|  | return llvm::MCInstBuilder(llvm::X86::ADD64ri8) | 
|  | .addReg(llvm::X86::RSP) | 
|  | .addReg(llvm::X86::RSP) | 
|  | .addImm(Bytes); | 
|  | } | 
|  | }; | 
|  |  | 
|  | } // namespace | 
|  |  | 
|  | static ExegesisTarget *getTheExegesisX86Target() { | 
|  | static ExegesisX86Target Target; | 
|  | return &Target; | 
|  | } | 
|  |  | 
|  | void InitializeX86ExegesisTarget() { | 
|  | ExegesisTarget::registerTarget(getTheExegesisX86Target()); | 
|  | } | 
|  |  | 
|  | } // namespace exegesis |