| //===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the PTXSelectionDAGInfo class. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #define DEBUG_TYPE "ptx-selectiondag-info" |
| #include "PTXTargetMachine.h" |
| #include "llvm/DerivedTypes.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| using namespace llvm; |
| |
| PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM) |
| : TargetSelectionDAGInfo(TM), |
| Subtarget(&TM.getSubtarget<PTXSubtarget>()) { |
| } |
| |
| PTXSelectionDAGInfo::~PTXSelectionDAGInfo() { |
| } |
| |
| SDValue |
| PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, |
| SDValue Chain, |
| SDValue Dst, SDValue Src, |
| SDValue Size, unsigned Align, |
| bool isVolatile, bool AlwaysInline, |
| MachinePointerInfo DstPtrInfo, |
| MachinePointerInfo SrcPtrInfo) const { |
| // Do repeated 4-byte loads and stores. To be improved. |
| // This requires 4-byte alignment. |
| if ((Align & 3) != 0) |
| return SDValue(); |
| // This requires the copy size to be a constant, preferably |
| // within a subtarget-specific limit. |
| ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); |
| if (!ConstantSize) |
| return SDValue(); |
| uint64_t SizeVal = ConstantSize->getZExtValue(); |
| // Always inline memcpys. In PTX, we do not have a C library that provides |
| // a memcpy function. |
| //if (!AlwaysInline) |
| // return SDValue(); |
| |
| unsigned BytesLeft = SizeVal & 3; |
| unsigned NumMemOps = SizeVal >> 2; |
| unsigned EmittedNumMemOps = 0; |
| EVT VT = MVT::i32; |
| unsigned VTSize = 4; |
| unsigned i = 0; |
| const unsigned MAX_LOADS_IN_LDM = 6; |
| SDValue TFOps[MAX_LOADS_IN_LDM]; |
| SDValue Loads[MAX_LOADS_IN_LDM]; |
| uint64_t SrcOff = 0, DstOff = 0; |
| EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; |
| |
| // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the |
| // same number of stores. The loads and stores will get combined into |
| // ldm/stm later on. |
| while (EmittedNumMemOps < NumMemOps) { |
| for (i = 0; |
| i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { |
| Loads[i] = DAG.getLoad(VT, dl, Chain, |
| DAG.getNode(ISD::ADD, dl, PointerType, Src, |
| DAG.getConstant(SrcOff, PointerType)), |
| SrcPtrInfo.getWithOffset(SrcOff), isVolatile, |
| false, 0); |
| TFOps[i] = Loads[i].getValue(1); |
| SrcOff += VTSize; |
| } |
| Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); |
| |
| for (i = 0; |
| i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { |
| TFOps[i] = DAG.getStore(Chain, dl, Loads[i], |
| DAG.getNode(ISD::ADD, dl, PointerType, Dst, |
| DAG.getConstant(DstOff, PointerType)), |
| DstPtrInfo.getWithOffset(DstOff), |
| isVolatile, false, 0); |
| DstOff += VTSize; |
| } |
| Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); |
| |
| EmittedNumMemOps += i; |
| } |
| |
| if (BytesLeft == 0) |
| return Chain; |
| |
| // Issue loads / stores for the trailing (1 - 3) bytes. |
| unsigned BytesLeftSave = BytesLeft; |
| i = 0; |
| while (BytesLeft) { |
| if (BytesLeft >= 2) { |
| VT = MVT::i16; |
| VTSize = 2; |
| } else { |
| VT = MVT::i8; |
| VTSize = 1; |
| } |
| |
| Loads[i] = DAG.getLoad(VT, dl, Chain, |
| DAG.getNode(ISD::ADD, dl, PointerType, Src, |
| DAG.getConstant(SrcOff, PointerType)), |
| SrcPtrInfo.getWithOffset(SrcOff), false, false, 0); |
| TFOps[i] = Loads[i].getValue(1); |
| ++i; |
| SrcOff += VTSize; |
| BytesLeft -= VTSize; |
| } |
| Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); |
| |
| i = 0; |
| BytesLeft = BytesLeftSave; |
| while (BytesLeft) { |
| if (BytesLeft >= 2) { |
| VT = MVT::i16; |
| VTSize = 2; |
| } else { |
| VT = MVT::i8; |
| VTSize = 1; |
| } |
| |
| TFOps[i] = DAG.getStore(Chain, dl, Loads[i], |
| DAG.getNode(ISD::ADD, dl, PointerType, Dst, |
| DAG.getConstant(DstOff, PointerType)), |
| DstPtrInfo.getWithOffset(DstOff), false, false, 0); |
| ++i; |
| DstOff += VTSize; |
| BytesLeft -= VTSize; |
| } |
| return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); |
| } |
| |
| SDValue PTXSelectionDAGInfo:: |
| EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, |
| SDValue Chain, SDValue Dst, |
| SDValue Src, SDValue Size, |
| unsigned Align, bool isVolatile, |
| MachinePointerInfo DstPtrInfo) const { |
| llvm_unreachable("memset lowering not implemented for PTX yet"); |
| } |
| |