|  | //===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This file defines a machine function pass that sets appropriate FP rounding | 
|  | // modes for all relevant instructions. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #define DEBUG_TYPE "ptx-fp-rounding-mode" | 
|  |  | 
|  | #include "PTX.h" | 
|  | #include "PTXTargetMachine.h" | 
|  | #include "llvm/ADT/DenseMap.h" | 
|  | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | #include "llvm/CodeGen/MachineRegisterInfo.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Support/ErrorHandling.h" | 
|  | #include "llvm/Support/raw_ostream.h" | 
|  |  | 
|  | // NOTE: PTXFPRoundingModePass should be executed just before emission. | 
|  |  | 
|  | namespace llvm { | 
|  | /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to | 
|  | /// all FP instructions. Essentially, this pass just looks for all FP | 
|  | /// instructions that have a rounding mode set to RndDefault, and sets an | 
|  | /// appropriate rounding mode based on the target device. | 
|  | /// | 
|  | class PTXFPRoundingModePass : public MachineFunctionPass { | 
|  | private: | 
|  | static char ID; | 
|  |  | 
|  | typedef std::pair<unsigned, unsigned> RndModeDesc; | 
|  |  | 
|  | PTXTargetMachine& TargetMachine; | 
|  | DenseMap<unsigned, RndModeDesc> Instrs; | 
|  |  | 
|  | public: | 
|  | PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) | 
|  | : MachineFunctionPass(ID), | 
|  | TargetMachine(TM) { | 
|  | initializeMap(); | 
|  | } | 
|  |  | 
|  | virtual bool runOnMachineFunction(MachineFunction &MF); | 
|  |  | 
|  | virtual const char *getPassName() const { | 
|  | return "PTX FP Rounding Mode Pass"; | 
|  | } | 
|  |  | 
|  | private: | 
|  |  | 
|  | void initializeMap(); | 
|  | void processInstruction(MachineInstr &MI); | 
|  | }; // class PTXFPRoundingModePass | 
|  | } // namespace llvm | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | char PTXFPRoundingModePass::ID = 0; | 
|  |  | 
|  | bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) { | 
|  | // Look at each basic block | 
|  | for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe; | 
|  | ++bbi) { | 
|  | MachineBasicBlock &MBB = *bbi; | 
|  | // Look at each instruction | 
|  | for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end(); | 
|  | ii != ie; ++ii) { | 
|  | MachineInstr &MI = *ii; | 
|  | processInstruction(MI); | 
|  | } | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | void PTXFPRoundingModePass::initializeMap() { | 
|  | using namespace PTXRoundingMode; | 
|  | const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>(); | 
|  |  | 
|  | // Build a map of default rounding mode for all instructions that need a | 
|  | // rounding mode. | 
|  | Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  |  | 
|  | Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone); | 
|  | Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone); | 
|  | Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone); | 
|  | Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone); | 
|  |  | 
|  | unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone; | 
|  | Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode); | 
|  | Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode); | 
|  | Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode); | 
|  | Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode); | 
|  |  | 
|  | unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone; | 
|  | Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode); | 
|  | Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode); | 
|  | Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode); | 
|  | Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode); | 
|  | Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode); | 
|  | Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode); | 
|  |  | 
|  | Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  |  | 
|  | Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox); | 
|  | Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox); | 
|  | Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox); | 
|  | Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox); | 
|  | Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox); | 
|  | Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox); | 
|  | Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox); | 
|  | Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox); | 
|  |  | 
|  | Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  | Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  | Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  | Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  | Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  | Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  | Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  | Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  | Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  | Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  | Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  | Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); | 
|  |  | 
|  | Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven); | 
|  | } | 
|  |  | 
|  | void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) { | 
|  | // Is this an instruction that needs a rounding mode? | 
|  | if (Instrs.count(MI.getOpcode())) { | 
|  | const RndModeDesc &Desc = Instrs[MI.getOpcode()]; | 
|  | // Get the rounding mode operand | 
|  | MachineOperand &Op = MI.getOperand(Desc.first); | 
|  | // Update the rounding mode if needed | 
|  | if (Op.getImm() == PTXRoundingMode::RndDefault) { | 
|  | Op.setImm(Desc.second); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM, | 
|  | CodeGenOpt::Level OptLevel) { | 
|  | return new PTXFPRoundingModePass(TM, OptLevel); | 
|  | } | 
|  |  |