| //===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| //===----------------------------------------------------------------------===// |
| // Instruction scheduling annotations for in-order and out-of-order CPUs. |
| // These annotations are independent of the itinerary class defined below. |
| // Here we define the subtarget independent read/write per-operand resources. |
| // The subtarget schedule definitions will then map these to the subtarget's |
| // resource usages. |
| // For example: |
| // The instruction cycle timings table might contain an entry for an operation |
| // like the following: |
| // Rd <- ADD Rn, Rm, <shift> Rs |
| // Uops | Latency from register | Uops - resource requirements - latency |
| // 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3 |
| // | | uopc Rd, Rn, T0 - P01 - 1 |
| // This is telling us that the result will be available in destination register |
| // Rd after a minimum of three cycles after the result in Rm and Rs is available |
| // and one cycle after the result in Rn is available. The micro-ops can execute |
| // on resource P01. |
| // To model this, we need to express that we need to dispatch two micro-ops, |
| // that the resource P01 is needed and that the latency to Rn is different than |
| // the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by |
| // two. |
| // We will do this by assigning (abstract) resources to register defs/uses. |
| // ARMSchedule.td: |
| // def WriteALUsr : SchedWrite; |
| // def ReadAdvanceALUsr : ScheRead; |
| // |
| // ARMInstrInfo.td: |
| // def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault, |
| // ReadDefault]> { ...} |
| // ReadAdvance read resources allow us to define "pipeline by-passes" or |
| // shorter latencies to certain registers as needed in the example above. |
| // The "ReadDefault" can be omitted. |
| // Next, the subtarget td file assigns resources to the abstract resources |
| // defined here. |
| // ARMScheduleSubtarget.td: |
| // // Resources. |
| // def P01 : ProcResource<3>; // ALU unit (3 of it). |
| // ... |
| // // Resource usages. |
| // def : WriteRes<WriteALUsr, [P01, P01]> { |
| // Latency = 4; // Latency of 4. |
| // NumMicroOps = 2; // Dispatch 2 micro-ops. |
| // // The two instances of resource P01 are occupied for one cycle. It is one |
| // // cycle because these resources happen to be pipelined. |
| // ResourceCycles = [1, 1]; |
| // } |
| // def : ReadAdvance<ReadAdvanceALUsr, 3>; |
| |
| //===----------------------------------------------------------------------===// |
| // Sched definitions for integer pipeline instructions |
| // |
| // Basic ALU operation. |
| def WriteALU : SchedWrite; |
| def ReadALU : SchedRead; |
| |
| // Basic ALU with shifts. |
| def WriteALUsi : SchedWrite; // Shift by immediate. |
| def WriteALUsr : SchedWrite; // Shift by register. |
| def WriteALUSsr : SchedWrite; // Shift by register (flag setting). |
| def ReadALUsr : SchedRead; // Some operands are read later. |
| |
| // Compares. |
| def WriteCMP : SchedWrite; |
| def WriteCMPsi : SchedWrite; |
| def WriteCMPsr : SchedWrite; |
| |
| // Multiplys. |
| def WriteMUL16 : SchedWrite; // 16-bit multiply. |
| def WriteMUL32 : SchedWrite; // 32-bit multiply. |
| def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg. |
| def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg. |
| def ReadMUL : SchedRead; |
| |
| // Multiply-accumulates. |
| def WriteMAC16 : SchedWrite; // 16-bit mac. |
| def WriteMAC32 : SchedWrite; // 32-bit mac. |
| def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg. |
| def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg. |
| def ReadMAC : SchedRead; |
| |
| // Divisions. |
| def WriteDIV : SchedWrite; |
| |
| // Loads/Stores. |
| def WriteLd : SchedWrite; |
| def WritePreLd : SchedWrite; |
| def WriteST : SchedWrite; |
| |
| // Branches. |
| def WriteBr : SchedWrite; |
| def WriteBrL : SchedWrite; |
| def WriteBrTbl : SchedWrite; |
| |
| // Noop. |
| def WriteNoop : SchedWrite; |
| |
| //===----------------------------------------------------------------------===// |
| // Sched definitions for floating-point and neon instructions |
| // |
| // Floating point conversions |
| def WriteFPCVT : SchedWrite; |
| def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa |
| |
| // ALU operations (32/64-bit) |
| def WriteFPALU32 : SchedWrite; |
| def WriteFPALU64 : SchedWrite; |
| |
| // Multiplication |
| def WriteFPMUL32 : SchedWrite; |
| def WriteFPMUL64 : SchedWrite; |
| def ReadFPMUL : SchedRead; // multiplier read |
| def ReadFPMAC : SchedRead; // accumulator read |
| |
| // Multiply-accumulate |
| def WriteFPMAC32 : SchedWrite; |
| def WriteFPMAC64 : SchedWrite; |
| |
| // Division |
| def WriteFPDIV32 : SchedWrite; |
| def WriteFPDIV64 : SchedWrite; |
| |
| // Square-root |
| def WriteFPSQRT32 : SchedWrite; |
| def WriteFPSQRT64 : SchedWrite; |
| |
| // Vector load and stores |
| def WriteVLD1 : SchedWrite; |
| def WriteVLD2 : SchedWrite; |
| def WriteVLD3 : SchedWrite; |
| def WriteVLD4 : SchedWrite; |
| def WriteVST1 : SchedWrite; |
| def WriteVST2 : SchedWrite; |
| def WriteVST3 : SchedWrite; |
| def WriteVST4 : SchedWrite; |
| |
| |
| // Define TII for use in SchedVariant Predicates. |
| def : PredicateProlog<[{ |
| const ARMBaseInstrInfo *TII = |
| static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); |
| (void)TII; |
| const ARMSubtarget *STI = |
| static_cast<const ARMSubtarget*>(SchedModel->getSubtargetInfo()); |
| (void)STI; |
| }]>; |
| |
| def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>; |
| |
| //===----------------------------------------------------------------------===// |
| // Instruction Itinerary classes used for ARM |
| // |
| def IIC_iALUx : InstrItinClass; |
| def IIC_iALUi : InstrItinClass; |
| def IIC_iALUr : InstrItinClass; |
| def IIC_iALUsi : InstrItinClass; |
| def IIC_iALUsir : InstrItinClass; |
| def IIC_iALUsr : InstrItinClass; |
| def IIC_iBITi : InstrItinClass; |
| def IIC_iBITr : InstrItinClass; |
| def IIC_iBITsi : InstrItinClass; |
| def IIC_iBITsr : InstrItinClass; |
| def IIC_iUNAr : InstrItinClass; |
| def IIC_iUNAsi : InstrItinClass; |
| def IIC_iEXTr : InstrItinClass; |
| def IIC_iEXTAr : InstrItinClass; |
| def IIC_iEXTAsr : InstrItinClass; |
| def IIC_iCMPi : InstrItinClass; |
| def IIC_iCMPr : InstrItinClass; |
| def IIC_iCMPsi : InstrItinClass; |
| def IIC_iCMPsr : InstrItinClass; |
| def IIC_iTSTi : InstrItinClass; |
| def IIC_iTSTr : InstrItinClass; |
| def IIC_iTSTsi : InstrItinClass; |
| def IIC_iTSTsr : InstrItinClass; |
| def IIC_iMOVi : InstrItinClass; |
| def IIC_iMOVr : InstrItinClass; |
| def IIC_iMOVsi : InstrItinClass; |
| def IIC_iMOVsr : InstrItinClass; |
| def IIC_iMOVix2 : InstrItinClass; |
| def IIC_iMOVix2addpc : InstrItinClass; |
| def IIC_iMOVix2ld : InstrItinClass; |
| def IIC_iMVNi : InstrItinClass; |
| def IIC_iMVNr : InstrItinClass; |
| def IIC_iMVNsi : InstrItinClass; |
| def IIC_iMVNsr : InstrItinClass; |
| def IIC_iCMOVi : InstrItinClass; |
| def IIC_iCMOVr : InstrItinClass; |
| def IIC_iCMOVsi : InstrItinClass; |
| def IIC_iCMOVsr : InstrItinClass; |
| def IIC_iCMOVix2 : InstrItinClass; |
| def IIC_iMUL16 : InstrItinClass; |
| def IIC_iMAC16 : InstrItinClass; |
| def IIC_iMUL32 : InstrItinClass; |
| def IIC_iMAC32 : InstrItinClass; |
| def IIC_iMUL64 : InstrItinClass; |
| def IIC_iMAC64 : InstrItinClass; |
| def IIC_iDIV : InstrItinClass; |
| def IIC_iLoad_i : InstrItinClass; |
| def IIC_iLoad_r : InstrItinClass; |
| def IIC_iLoad_si : InstrItinClass; |
| def IIC_iLoad_iu : InstrItinClass; |
| def IIC_iLoad_ru : InstrItinClass; |
| def IIC_iLoad_siu : InstrItinClass; |
| def IIC_iLoad_bh_i : InstrItinClass; |
| def IIC_iLoad_bh_r : InstrItinClass; |
| def IIC_iLoad_bh_si : InstrItinClass; |
| def IIC_iLoad_bh_iu : InstrItinClass; |
| def IIC_iLoad_bh_ru : InstrItinClass; |
| def IIC_iLoad_bh_siu : InstrItinClass; |
| def IIC_iLoad_d_i : InstrItinClass; |
| def IIC_iLoad_d_r : InstrItinClass; |
| def IIC_iLoad_d_ru : InstrItinClass; |
| def IIC_iLoad_m : InstrItinClass; |
| def IIC_iLoad_mu : InstrItinClass; |
| def IIC_iLoad_mBr : InstrItinClass; |
| def IIC_iPop : InstrItinClass; |
| def IIC_iPop_Br : InstrItinClass; |
| def IIC_iLoadiALU : InstrItinClass; |
| def IIC_iStore_i : InstrItinClass; |
| def IIC_iStore_r : InstrItinClass; |
| def IIC_iStore_si : InstrItinClass; |
| def IIC_iStore_iu : InstrItinClass; |
| def IIC_iStore_ru : InstrItinClass; |
| def IIC_iStore_siu : InstrItinClass; |
| def IIC_iStore_bh_i : InstrItinClass; |
| def IIC_iStore_bh_r : InstrItinClass; |
| def IIC_iStore_bh_si : InstrItinClass; |
| def IIC_iStore_bh_iu : InstrItinClass; |
| def IIC_iStore_bh_ru : InstrItinClass; |
| def IIC_iStore_bh_siu : InstrItinClass; |
| def IIC_iStore_d_i : InstrItinClass; |
| def IIC_iStore_d_r : InstrItinClass; |
| def IIC_iStore_d_ru : InstrItinClass; |
| def IIC_iStore_m : InstrItinClass; |
| def IIC_iStore_mu : InstrItinClass; |
| def IIC_Preload : InstrItinClass; |
| def IIC_Br : InstrItinClass; |
| def IIC_fpSTAT : InstrItinClass; |
| def IIC_fpUNA16 : InstrItinClass; |
| def IIC_fpUNA32 : InstrItinClass; |
| def IIC_fpUNA64 : InstrItinClass; |
| def IIC_fpCMP16 : InstrItinClass; |
| def IIC_fpCMP32 : InstrItinClass; |
| def IIC_fpCMP64 : InstrItinClass; |
| def IIC_fpCVTSD : InstrItinClass; |
| def IIC_fpCVTDS : InstrItinClass; |
| def IIC_fpCVTSH : InstrItinClass; |
| def IIC_fpCVTHS : InstrItinClass; |
| def IIC_fpCVTIH : InstrItinClass; |
| def IIC_fpCVTIS : InstrItinClass; |
| def IIC_fpCVTID : InstrItinClass; |
| def IIC_fpCVTHI : InstrItinClass; |
| def IIC_fpCVTSI : InstrItinClass; |
| def IIC_fpCVTDI : InstrItinClass; |
| def IIC_fpMOVIS : InstrItinClass; |
| def IIC_fpMOVID : InstrItinClass; |
| def IIC_fpMOVSI : InstrItinClass; |
| def IIC_fpMOVDI : InstrItinClass; |
| def IIC_fpALU16 : InstrItinClass; |
| def IIC_fpALU32 : InstrItinClass; |
| def IIC_fpALU64 : InstrItinClass; |
| def IIC_fpMUL16 : InstrItinClass; |
| def IIC_fpMUL32 : InstrItinClass; |
| def IIC_fpMUL64 : InstrItinClass; |
| def IIC_fpMAC16 : InstrItinClass; |
| def IIC_fpMAC32 : InstrItinClass; |
| def IIC_fpMAC64 : InstrItinClass; |
| def IIC_fpFMAC16 : InstrItinClass; |
| def IIC_fpFMAC32 : InstrItinClass; |
| def IIC_fpFMAC64 : InstrItinClass; |
| def IIC_fpDIV16 : InstrItinClass; |
| def IIC_fpDIV32 : InstrItinClass; |
| def IIC_fpDIV64 : InstrItinClass; |
| def IIC_fpSQRT16 : InstrItinClass; |
| def IIC_fpSQRT32 : InstrItinClass; |
| def IIC_fpSQRT64 : InstrItinClass; |
| def IIC_fpLoad16 : InstrItinClass; |
| def IIC_fpLoad32 : InstrItinClass; |
| def IIC_fpLoad64 : InstrItinClass; |
| def IIC_fpLoad_m : InstrItinClass; |
| def IIC_fpLoad_mu : InstrItinClass; |
| def IIC_fpStore16 : InstrItinClass; |
| def IIC_fpStore32 : InstrItinClass; |
| def IIC_fpStore64 : InstrItinClass; |
| def IIC_fpStore_m : InstrItinClass; |
| def IIC_fpStore_mu : InstrItinClass; |
| def IIC_VLD1 : InstrItinClass; |
| def IIC_VLD1x2 : InstrItinClass; |
| def IIC_VLD1x3 : InstrItinClass; |
| def IIC_VLD1x4 : InstrItinClass; |
| def IIC_VLD1u : InstrItinClass; |
| def IIC_VLD1x2u : InstrItinClass; |
| def IIC_VLD1x3u : InstrItinClass; |
| def IIC_VLD1x4u : InstrItinClass; |
| def IIC_VLD1ln : InstrItinClass; |
| def IIC_VLD1lnu : InstrItinClass; |
| def IIC_VLD1dup : InstrItinClass; |
| def IIC_VLD1dupu : InstrItinClass; |
| def IIC_VLD2 : InstrItinClass; |
| def IIC_VLD2x2 : InstrItinClass; |
| def IIC_VLD2u : InstrItinClass; |
| def IIC_VLD2x2u : InstrItinClass; |
| def IIC_VLD2ln : InstrItinClass; |
| def IIC_VLD2lnu : InstrItinClass; |
| def IIC_VLD2dup : InstrItinClass; |
| def IIC_VLD2dupu : InstrItinClass; |
| def IIC_VLD3 : InstrItinClass; |
| def IIC_VLD3ln : InstrItinClass; |
| def IIC_VLD3u : InstrItinClass; |
| def IIC_VLD3lnu : InstrItinClass; |
| def IIC_VLD3dup : InstrItinClass; |
| def IIC_VLD3dupu : InstrItinClass; |
| def IIC_VLD4 : InstrItinClass; |
| def IIC_VLD4ln : InstrItinClass; |
| def IIC_VLD4u : InstrItinClass; |
| def IIC_VLD4lnu : InstrItinClass; |
| def IIC_VLD4dup : InstrItinClass; |
| def IIC_VLD4dupu : InstrItinClass; |
| def IIC_VST1 : InstrItinClass; |
| def IIC_VST1x2 : InstrItinClass; |
| def IIC_VST1x3 : InstrItinClass; |
| def IIC_VST1x4 : InstrItinClass; |
| def IIC_VST1u : InstrItinClass; |
| def IIC_VST1x2u : InstrItinClass; |
| def IIC_VST1x3u : InstrItinClass; |
| def IIC_VST1x4u : InstrItinClass; |
| def IIC_VST1ln : InstrItinClass; |
| def IIC_VST1lnu : InstrItinClass; |
| def IIC_VST2 : InstrItinClass; |
| def IIC_VST2x2 : InstrItinClass; |
| def IIC_VST2u : InstrItinClass; |
| def IIC_VST2x2u : InstrItinClass; |
| def IIC_VST2ln : InstrItinClass; |
| def IIC_VST2lnu : InstrItinClass; |
| def IIC_VST3 : InstrItinClass; |
| def IIC_VST3u : InstrItinClass; |
| def IIC_VST3ln : InstrItinClass; |
| def IIC_VST3lnu : InstrItinClass; |
| def IIC_VST4 : InstrItinClass; |
| def IIC_VST4u : InstrItinClass; |
| def IIC_VST4ln : InstrItinClass; |
| def IIC_VST4lnu : InstrItinClass; |
| def IIC_VUNAD : InstrItinClass; |
| def IIC_VUNAQ : InstrItinClass; |
| def IIC_VBIND : InstrItinClass; |
| def IIC_VBINQ : InstrItinClass; |
| def IIC_VPBIND : InstrItinClass; |
| def IIC_VFMULD : InstrItinClass; |
| def IIC_VFMULQ : InstrItinClass; |
| def IIC_VMOV : InstrItinClass; |
| def IIC_VMOVImm : InstrItinClass; |
| def IIC_VMOVD : InstrItinClass; |
| def IIC_VMOVQ : InstrItinClass; |
| def IIC_VMOVIS : InstrItinClass; |
| def IIC_VMOVID : InstrItinClass; |
| def IIC_VMOVISL : InstrItinClass; |
| def IIC_VMOVSI : InstrItinClass; |
| def IIC_VMOVDI : InstrItinClass; |
| def IIC_VMOVN : InstrItinClass; |
| def IIC_VPERMD : InstrItinClass; |
| def IIC_VPERMQ : InstrItinClass; |
| def IIC_VPERMQ3 : InstrItinClass; |
| def IIC_VMACD : InstrItinClass; |
| def IIC_VMACQ : InstrItinClass; |
| def IIC_VFMACD : InstrItinClass; |
| def IIC_VFMACQ : InstrItinClass; |
| def IIC_VRECSD : InstrItinClass; |
| def IIC_VRECSQ : InstrItinClass; |
| def IIC_VCNTiD : InstrItinClass; |
| def IIC_VCNTiQ : InstrItinClass; |
| def IIC_VUNAiD : InstrItinClass; |
| def IIC_VUNAiQ : InstrItinClass; |
| def IIC_VQUNAiD : InstrItinClass; |
| def IIC_VQUNAiQ : InstrItinClass; |
| def IIC_VBINiD : InstrItinClass; |
| def IIC_VBINiQ : InstrItinClass; |
| def IIC_VSUBiD : InstrItinClass; |
| def IIC_VSUBiQ : InstrItinClass; |
| def IIC_VBINi4D : InstrItinClass; |
| def IIC_VBINi4Q : InstrItinClass; |
| def IIC_VSUBi4D : InstrItinClass; |
| def IIC_VSUBi4Q : InstrItinClass; |
| def IIC_VABAD : InstrItinClass; |
| def IIC_VABAQ : InstrItinClass; |
| def IIC_VSHLiD : InstrItinClass; |
| def IIC_VSHLiQ : InstrItinClass; |
| def IIC_VSHLi4D : InstrItinClass; |
| def IIC_VSHLi4Q : InstrItinClass; |
| def IIC_VPALiD : InstrItinClass; |
| def IIC_VPALiQ : InstrItinClass; |
| def IIC_VMULi16D : InstrItinClass; |
| def IIC_VMULi32D : InstrItinClass; |
| def IIC_VMULi16Q : InstrItinClass; |
| def IIC_VMULi32Q : InstrItinClass; |
| def IIC_VMACi16D : InstrItinClass; |
| def IIC_VMACi32D : InstrItinClass; |
| def IIC_VMACi16Q : InstrItinClass; |
| def IIC_VMACi32Q : InstrItinClass; |
| def IIC_VEXTD : InstrItinClass; |
| def IIC_VEXTQ : InstrItinClass; |
| def IIC_VTB1 : InstrItinClass; |
| def IIC_VTB2 : InstrItinClass; |
| def IIC_VTB3 : InstrItinClass; |
| def IIC_VTB4 : InstrItinClass; |
| def IIC_VTBX1 : InstrItinClass; |
| def IIC_VTBX2 : InstrItinClass; |
| def IIC_VTBX3 : InstrItinClass; |
| def IIC_VTBX4 : InstrItinClass; |
| def IIC_VDOTPROD : InstrItinClass; |
| |
| //===----------------------------------------------------------------------===// |
| // Processor instruction itineraries. |
| |
| include "ARMScheduleV6.td" |
| include "ARMScheduleA8.td" |
| include "ARMScheduleA9.td" |
| include "ARMScheduleSwift.td" |
| include "ARMScheduleR52.td" |
| include "ARMScheduleA57.td" |
| include "ARMScheduleM4.td" |