| // |
| //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the SPUTargetLowering class. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "SPURegisterNames.h" |
| #include "SPUISelLowering.h" |
| #include "SPUTargetMachine.h" |
| #include "SPUFrameInfo.h" |
| #include "SPUMachineFunction.h" |
| #include "llvm/Constants.h" |
| #include "llvm/Function.h" |
| #include "llvm/Intrinsics.h" |
| #include "llvm/CallingConv.h" |
| #include "llvm/CodeGen/CallingConvLower.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineInstrBuilder.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
| #include "llvm/Target/TargetOptions.h" |
| #include "llvm/ADT/VectorExtras.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <map> |
| |
| using namespace llvm; |
| |
| // Used in getTargetNodeName() below |
| namespace { |
| std::map<unsigned, const char *> node_names; |
| |
| //! EVT mapping to useful data for Cell SPU |
| struct valtype_map_s { |
| EVT valtype; |
| int prefslot_byte; |
| }; |
| |
| const valtype_map_s valtype_map[] = { |
| { MVT::i1, 3 }, |
| { MVT::i8, 3 }, |
| { MVT::i16, 2 }, |
| { MVT::i32, 0 }, |
| { MVT::f32, 0 }, |
| { MVT::i64, 0 }, |
| { MVT::f64, 0 }, |
| { MVT::i128, 0 } |
| }; |
| |
| const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); |
| |
| const valtype_map_s *getValueTypeMapEntry(EVT VT) { |
| const valtype_map_s *retval = 0; |
| |
| for (size_t i = 0; i < n_valtype_map; ++i) { |
| if (valtype_map[i].valtype == VT) { |
| retval = valtype_map + i; |
| break; |
| } |
| } |
| |
| #ifndef NDEBUG |
| if (retval == 0) { |
| report_fatal_error("getValueTypeMapEntry returns NULL for " + |
| Twine(VT.getEVTString())); |
| } |
| #endif |
| |
| return retval; |
| } |
| |
| //! Expand a library call into an actual call DAG node |
| /*! |
| \note |
| This code is taken from SelectionDAGLegalize, since it is not exposed as |
| part of the LLVM SelectionDAG API. |
| */ |
| |
| SDValue |
| ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, |
| bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { |
| // The input chain to this libcall is the entry node of the function. |
| // Legalizing the call will automatically add the previous call to the |
| // dependence. |
| SDValue InChain = DAG.getEntryNode(); |
| |
| TargetLowering::ArgListTy Args; |
| TargetLowering::ArgListEntry Entry; |
| for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
| EVT ArgVT = Op.getOperand(i).getValueType(); |
| const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
| Entry.Node = Op.getOperand(i); |
| Entry.Ty = ArgTy; |
| Entry.isSExt = isSigned; |
| Entry.isZExt = !isSigned; |
| Args.push_back(Entry); |
| } |
| SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), |
| TLI.getPointerTy()); |
| |
| // Splice the libcall in wherever FindInputOutputChains tells us to. |
| const Type *RetTy = |
| Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); |
| std::pair<SDValue, SDValue> CallInfo = |
| TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, |
| 0, TLI.getLibcallCallingConv(LC), false, |
| /*isReturnValueUsed=*/true, |
| Callee, Args, DAG, Op.getDebugLoc()); |
| |
| return CallInfo.first; |
| } |
| } |
| |
| SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) |
| : TargetLowering(TM, new TargetLoweringObjectFileELF()), |
| SPUTM(TM) { |
| // Fold away setcc operations if possible. |
| setPow2DivIsCheap(); |
| |
| // Use _setjmp/_longjmp instead of setjmp/longjmp. |
| setUseUnderscoreSetJmp(true); |
| setUseUnderscoreLongJmp(true); |
| |
| // Set RTLIB libcall names as used by SPU: |
| setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); |
| |
| // Set up the SPU's register classes: |
| addRegisterClass(MVT::i8, SPU::R8CRegisterClass); |
| addRegisterClass(MVT::i16, SPU::R16CRegisterClass); |
| addRegisterClass(MVT::i32, SPU::R32CRegisterClass); |
| addRegisterClass(MVT::i64, SPU::R64CRegisterClass); |
| addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); |
| addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); |
| addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); |
| |
| // SPU has no sign or zero extended loads for i1, i8, i16: |
| setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); |
| setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); |
| setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); |
| |
| setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); |
| setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); |
| |
| setTruncStoreAction(MVT::i128, MVT::i64, Expand); |
| setTruncStoreAction(MVT::i128, MVT::i32, Expand); |
| setTruncStoreAction(MVT::i128, MVT::i16, Expand); |
| setTruncStoreAction(MVT::i128, MVT::i8, Expand); |
| |
| setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
| |
| // SPU constant load actions are custom lowered: |
| setOperationAction(ISD::ConstantFP, MVT::f32, Legal); |
| setOperationAction(ISD::ConstantFP, MVT::f64, Custom); |
| |
| // SPU's loads and stores have to be custom lowered: |
| for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; |
| ++sctype) { |
| MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; |
| |
| setOperationAction(ISD::LOAD, VT, Custom); |
| setOperationAction(ISD::STORE, VT, Custom); |
| setLoadExtAction(ISD::EXTLOAD, VT, Custom); |
| setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); |
| setLoadExtAction(ISD::SEXTLOAD, VT, Custom); |
| |
| for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { |
| MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; |
| setTruncStoreAction(VT, StoreVT, Expand); |
| } |
| } |
| |
| for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; |
| ++sctype) { |
| MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; |
| |
| setOperationAction(ISD::LOAD, VT, Custom); |
| setOperationAction(ISD::STORE, VT, Custom); |
| |
| for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { |
| MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; |
| setTruncStoreAction(VT, StoreVT, Expand); |
| } |
| } |
| |
| // Expand the jumptable branches |
| setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
| setOperationAction(ISD::BR_CC, MVT::Other, Expand); |
| |
| // Custom lower SELECT_CC for most cases, but expand by default |
| setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); |
| setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); |
| setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); |
| |
| // SPU has no intrinsics for these particular operations: |
| setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); |
| |
| // SPU has no division/remainder instructions |
| setOperationAction(ISD::SREM, MVT::i8, Expand); |
| setOperationAction(ISD::UREM, MVT::i8, Expand); |
| setOperationAction(ISD::SDIV, MVT::i8, Expand); |
| setOperationAction(ISD::UDIV, MVT::i8, Expand); |
| setOperationAction(ISD::SDIVREM, MVT::i8, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i8, Expand); |
| setOperationAction(ISD::SREM, MVT::i16, Expand); |
| setOperationAction(ISD::UREM, MVT::i16, Expand); |
| setOperationAction(ISD::SDIV, MVT::i16, Expand); |
| setOperationAction(ISD::UDIV, MVT::i16, Expand); |
| setOperationAction(ISD::SDIVREM, MVT::i16, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i16, Expand); |
| setOperationAction(ISD::SREM, MVT::i32, Expand); |
| setOperationAction(ISD::UREM, MVT::i32, Expand); |
| setOperationAction(ISD::SDIV, MVT::i32, Expand); |
| setOperationAction(ISD::UDIV, MVT::i32, Expand); |
| setOperationAction(ISD::SDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i32, Expand); |
| setOperationAction(ISD::SREM, MVT::i64, Expand); |
| setOperationAction(ISD::UREM, MVT::i64, Expand); |
| setOperationAction(ISD::SDIV, MVT::i64, Expand); |
| setOperationAction(ISD::UDIV, MVT::i64, Expand); |
| setOperationAction(ISD::SDIVREM, MVT::i64, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i64, Expand); |
| setOperationAction(ISD::SREM, MVT::i128, Expand); |
| setOperationAction(ISD::UREM, MVT::i128, Expand); |
| setOperationAction(ISD::SDIV, MVT::i128, Expand); |
| setOperationAction(ISD::UDIV, MVT::i128, Expand); |
| setOperationAction(ISD::SDIVREM, MVT::i128, Expand); |
| setOperationAction(ISD::UDIVREM, MVT::i128, Expand); |
| |
| // We don't support sin/cos/sqrt/fmod |
| setOperationAction(ISD::FSIN , MVT::f64, Expand); |
| setOperationAction(ISD::FCOS , MVT::f64, Expand); |
| setOperationAction(ISD::FREM , MVT::f64, Expand); |
| setOperationAction(ISD::FSIN , MVT::f32, Expand); |
| setOperationAction(ISD::FCOS , MVT::f32, Expand); |
| setOperationAction(ISD::FREM , MVT::f32, Expand); |
| |
| // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt |
| // for f32!) |
| setOperationAction(ISD::FSQRT, MVT::f64, Expand); |
| setOperationAction(ISD::FSQRT, MVT::f32, Expand); |
| |
| setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
| setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); |
| |
| // SPU can do rotate right and left, so legalize it... but customize for i8 |
| // because instructions don't exist. |
| |
| // FIXME: Change from "expand" to appropriate type once ROTR is supported in |
| // .td files. |
| setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); |
| setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); |
| setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); |
| |
| setOperationAction(ISD::ROTL, MVT::i32, Legal); |
| setOperationAction(ISD::ROTL, MVT::i16, Legal); |
| setOperationAction(ISD::ROTL, MVT::i8, Custom); |
| |
| // SPU has no native version of shift left/right for i8 |
| setOperationAction(ISD::SHL, MVT::i8, Custom); |
| setOperationAction(ISD::SRL, MVT::i8, Custom); |
| setOperationAction(ISD::SRA, MVT::i8, Custom); |
| |
| // Make these operations legal and handle them during instruction selection: |
| setOperationAction(ISD::SHL, MVT::i64, Legal); |
| setOperationAction(ISD::SRL, MVT::i64, Legal); |
| setOperationAction(ISD::SRA, MVT::i64, Legal); |
| |
| // Custom lower i8, i32 and i64 multiplications |
| setOperationAction(ISD::MUL, MVT::i8, Custom); |
| setOperationAction(ISD::MUL, MVT::i32, Legal); |
| setOperationAction(ISD::MUL, MVT::i64, Legal); |
| |
| // Expand double-width multiplication |
| // FIXME: It would probably be reasonable to support some of these operations |
| setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); |
| setOperationAction(ISD::MULHU, MVT::i8, Expand); |
| setOperationAction(ISD::MULHS, MVT::i8, Expand); |
| setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); |
| setOperationAction(ISD::MULHU, MVT::i16, Expand); |
| setOperationAction(ISD::MULHS, MVT::i16, Expand); |
| setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); |
| setOperationAction(ISD::MULHU, MVT::i32, Expand); |
| setOperationAction(ISD::MULHS, MVT::i32, Expand); |
| setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); |
| setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); |
| setOperationAction(ISD::MULHU, MVT::i64, Expand); |
| setOperationAction(ISD::MULHS, MVT::i64, Expand); |
| |
| // Need to custom handle (some) common i8, i64 math ops |
| setOperationAction(ISD::ADD, MVT::i8, Custom); |
| setOperationAction(ISD::ADD, MVT::i64, Legal); |
| setOperationAction(ISD::SUB, MVT::i8, Custom); |
| setOperationAction(ISD::SUB, MVT::i64, Legal); |
| |
| // SPU does not have BSWAP. It does have i32 support CTLZ. |
| // CTPOP has to be custom lowered. |
| setOperationAction(ISD::BSWAP, MVT::i32, Expand); |
| setOperationAction(ISD::BSWAP, MVT::i64, Expand); |
| |
| setOperationAction(ISD::CTPOP, MVT::i8, Custom); |
| setOperationAction(ISD::CTPOP, MVT::i16, Custom); |
| setOperationAction(ISD::CTPOP, MVT::i32, Custom); |
| setOperationAction(ISD::CTPOP, MVT::i64, Custom); |
| setOperationAction(ISD::CTPOP, MVT::i128, Expand); |
| |
| setOperationAction(ISD::CTTZ , MVT::i8, Expand); |
| setOperationAction(ISD::CTTZ , MVT::i16, Expand); |
| setOperationAction(ISD::CTTZ , MVT::i32, Expand); |
| setOperationAction(ISD::CTTZ , MVT::i64, Expand); |
| setOperationAction(ISD::CTTZ , MVT::i128, Expand); |
| |
| setOperationAction(ISD::CTLZ , MVT::i8, Promote); |
| setOperationAction(ISD::CTLZ , MVT::i16, Promote); |
| setOperationAction(ISD::CTLZ , MVT::i32, Legal); |
| setOperationAction(ISD::CTLZ , MVT::i64, Expand); |
| setOperationAction(ISD::CTLZ , MVT::i128, Expand); |
| |
| // SPU has a version of select that implements (a&~c)|(b&c), just like |
| // select ought to work: |
| setOperationAction(ISD::SELECT, MVT::i8, Legal); |
| setOperationAction(ISD::SELECT, MVT::i16, Legal); |
| setOperationAction(ISD::SELECT, MVT::i32, Legal); |
| setOperationAction(ISD::SELECT, MVT::i64, Legal); |
| |
| setOperationAction(ISD::SETCC, MVT::i8, Legal); |
| setOperationAction(ISD::SETCC, MVT::i16, Legal); |
| setOperationAction(ISD::SETCC, MVT::i32, Legal); |
| setOperationAction(ISD::SETCC, MVT::i64, Legal); |
| setOperationAction(ISD::SETCC, MVT::f64, Custom); |
| |
| // Custom lower i128 -> i64 truncates |
| setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); |
| |
| // Custom lower i32/i64 -> i128 sign extend |
| setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); |
| |
| setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); |
| // SPU has a legal FP -> signed INT instruction for f32, but for f64, need |
| // to expand to a libcall, hence the custom lowering: |
| setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); |
| setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); |
| setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); |
| |
| // FDIV on SPU requires custom lowering |
| setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall |
| |
| // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: |
| setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
| setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
| |
| setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal); |
| setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal); |
| setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal); |
| setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal); |
| |
| // We cannot sextinreg(i1). Expand to shifts. |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
| |
| // We want to legalize GlobalAddress and ConstantPool nodes into the |
| // appropriate instructions to materialize the address. |
| for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; |
| ++sctype) { |
| MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; |
| |
| setOperationAction(ISD::GlobalAddress, VT, Custom); |
| setOperationAction(ISD::ConstantPool, VT, Custom); |
| setOperationAction(ISD::JumpTable, VT, Custom); |
| } |
| |
| // VASTART needs to be custom lowered to use the VarArgsFrameIndex |
| setOperationAction(ISD::VASTART , MVT::Other, Custom); |
| |
| // Use the default implementation. |
| setOperationAction(ISD::VAARG , MVT::Other, Expand); |
| setOperationAction(ISD::VACOPY , MVT::Other, Expand); |
| setOperationAction(ISD::VAEND , MVT::Other, Expand); |
| setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); |
| setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); |
| setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); |
| |
| // Cell SPU has instructions for converting between i64 and fp. |
| setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
| setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
| |
| // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT |
| setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); |
| |
| // BUILD_PAIR can't be handled natively, and should be expanded to shl/or |
| setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); |
| |
| // First set operation action for all vector types to expand. Then we |
| // will selectively turn on ones that can be effectively codegen'd. |
| addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); |
| addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); |
| addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); |
| addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); |
| addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); |
| addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); |
| |
| // "Odd size" vector classes that we're willing to support: |
| addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); |
| addRegisterClass(MVT::v2f32, SPU::VECREGRegisterClass); |
| |
| for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; |
| i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { |
| MVT::SimpleValueType VT = (MVT::SimpleValueType)i; |
| |
| // add/sub are legal for all supported vector VT's. |
| setOperationAction(ISD::ADD, VT, Legal); |
| setOperationAction(ISD::SUB, VT, Legal); |
| // mul has to be custom lowered. |
| setOperationAction(ISD::MUL, VT, Legal); |
| |
| setOperationAction(ISD::AND, VT, Legal); |
| setOperationAction(ISD::OR, VT, Legal); |
| setOperationAction(ISD::XOR, VT, Legal); |
| setOperationAction(ISD::LOAD, VT, Legal); |
| setOperationAction(ISD::SELECT, VT, Legal); |
| setOperationAction(ISD::STORE, VT, Legal); |
| |
| // These operations need to be expanded: |
| setOperationAction(ISD::SDIV, VT, Expand); |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::UDIV, VT, Expand); |
| setOperationAction(ISD::UREM, VT, Expand); |
| |
| // Custom lower build_vector, constant pool spills, insert and |
| // extract vector elements: |
| setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| setOperationAction(ISD::ConstantPool, VT, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| } |
| |
| setOperationAction(ISD::AND, MVT::v16i8, Custom); |
| setOperationAction(ISD::OR, MVT::v16i8, Custom); |
| setOperationAction(ISD::XOR, MVT::v16i8, Custom); |
| setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); |
| |
| setOperationAction(ISD::FDIV, MVT::v4f32, Legal); |
| |
| setOperationAction(ISD::STORE, MVT::v2i32, Custom); |
| setOperationAction(ISD::STORE, MVT::v2f32, Custom); |
| |
| setShiftAmountType(MVT::i32); |
| setBooleanContents(ZeroOrNegativeOneBooleanContent); |
| |
| setStackPointerRegisterToSaveRestore(SPU::R1); |
| |
| // We have target-specific dag combine patterns for the following nodes: |
| setTargetDAGCombine(ISD::ADD); |
| setTargetDAGCombine(ISD::ZERO_EXTEND); |
| setTargetDAGCombine(ISD::SIGN_EXTEND); |
| setTargetDAGCombine(ISD::ANY_EXTEND); |
| |
| computeRegisterProperties(); |
| |
| // Set pre-RA register scheduler default to BURR, which produces slightly |
| // better code than the default (could also be TDRR, but TargetLowering.h |
| // needs a mod to support that model): |
| setSchedulingPreference(Sched::RegPressure); |
| } |
| |
| const char * |
| SPUTargetLowering::getTargetNodeName(unsigned Opcode) const |
| { |
| if (node_names.empty()) { |
| node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; |
| node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; |
| node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; |
| node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; |
| node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; |
| node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; |
| node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; |
| node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; |
| node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; |
| node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; |
| node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; |
| node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; |
| node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; |
| node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; |
| node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; |
| node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; |
| node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; |
| node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; |
| node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = |
| "SPUISD::ROTBYTES_LEFT_BITS"; |
| node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; |
| node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; |
| node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; |
| node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; |
| node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; |
| node_names[(unsigned) SPUISD::HALF2VEC] = "SPUISD::HALF2VEC"; |
| node_names[(unsigned) SPUISD::VEC2HALF] = "SPUISD::VEC2HALF"; |
| } |
| |
| std::map<unsigned, const char *>::iterator i = node_names.find(Opcode); |
| |
| return ((i != node_names.end()) ? i->second : 0); |
| } |
| |
| /// getFunctionAlignment - Return the Log2 alignment of this function. |
| unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const { |
| return 3; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Return the Cell SPU's SETCC result type |
| //===----------------------------------------------------------------------===// |
| |
| MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const { |
| // i16 and i32 are valid SETCC result types |
| return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? |
| VT.getSimpleVT().SimpleTy : |
| MVT::i32); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Calling convention code: |
| //===----------------------------------------------------------------------===// |
| |
| #include "SPUGenCallingConv.inc" |
| |
| //===----------------------------------------------------------------------===// |
| // LowerOperation implementation |
| //===----------------------------------------------------------------------===// |
| |
| /// Custom lower loads for CellSPU |
| /*! |
| All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements |
| within a 16-byte block, we have to rotate to extract the requested element. |
| |
| For extending loads, we also want to ensure that the following sequence is |
| emitted, e.g. for MVT::f32 extending load to MVT::f64: |
| |
| \verbatim |
| %1 v16i8,ch = load |
| %2 v16i8,ch = rotate %1 |
| %3 v4f8, ch = bitconvert %2 |
| %4 f32 = vec2perfslot %3 |
| %5 f64 = fp_extend %4 |
| \endverbatim |
| */ |
| static SDValue |
| LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { |
| LoadSDNode *LN = cast<LoadSDNode>(Op); |
| SDValue the_chain = LN->getChain(); |
| EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); |
| EVT InVT = LN->getMemoryVT(); |
| EVT OutVT = Op.getValueType(); |
| ISD::LoadExtType ExtType = LN->getExtensionType(); |
| unsigned alignment = LN->getAlignment(); |
| const valtype_map_s *vtm = getValueTypeMapEntry(InVT); |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| switch (LN->getAddressingMode()) { |
| case ISD::UNINDEXED: { |
| SDValue result; |
| SDValue basePtr = LN->getBasePtr(); |
| SDValue rotate; |
| |
| if (alignment == 16) { |
| ConstantSDNode *CN; |
| |
| // Special cases for a known aligned load to simplify the base pointer |
| // and the rotation amount: |
| if (basePtr.getOpcode() == ISD::ADD |
| && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { |
| // Known offset into basePtr |
| int64_t offset = CN->getSExtValue(); |
| int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); |
| |
| if (rotamt < 0) |
| rotamt += 16; |
| |
| rotate = DAG.getConstant(rotamt, MVT::i16); |
| |
| // Simplify the base pointer for this case: |
| basePtr = basePtr.getOperand(0); |
| if ((offset & ~0xf) > 0) { |
| basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, |
| basePtr, |
| DAG.getConstant((offset & ~0xf), PtrVT)); |
| } |
| } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) |
| || (basePtr.getOpcode() == SPUISD::IndirectAddr |
| && basePtr.getOperand(0).getOpcode() == SPUISD::Hi |
| && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { |
| // Plain aligned a-form address: rotate into preferred slot |
| // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) |
| int64_t rotamt = -vtm->prefslot_byte; |
| if (rotamt < 0) |
| rotamt += 16; |
| rotate = DAG.getConstant(rotamt, MVT::i16); |
| } else { |
| // Offset the rotate amount by the basePtr and the preferred slot |
| // byte offset |
| int64_t rotamt = -vtm->prefslot_byte; |
| if (rotamt < 0) |
| rotamt += 16; |
| rotate = DAG.getNode(ISD::ADD, dl, PtrVT, |
| basePtr, |
| DAG.getConstant(rotamt, PtrVT)); |
| } |
| } else { |
| // Unaligned load: must be more pessimistic about addressing modes: |
| if (basePtr.getOpcode() == ISD::ADD) { |
| MachineFunction &MF = DAG.getMachineFunction(); |
| MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
| unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); |
| SDValue Flag; |
| |
| SDValue Op0 = basePtr.getOperand(0); |
| SDValue Op1 = basePtr.getOperand(1); |
| |
| if (isa<ConstantSDNode>(Op1)) { |
| // Convert the (add <ptr>, <const>) to an indirect address contained |
| // in a register. Note that this is done because we need to avoid |
| // creating a 0(reg) d-form address due to the SPU's block loads. |
| basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); |
| the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); |
| basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); |
| } else { |
| // Convert the (add <arg1>, <arg2>) to an indirect address, which |
| // will likely be lowered as a reg(reg) x-form address. |
| basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); |
| } |
| } else { |
| basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, |
| basePtr, |
| DAG.getConstant(0, PtrVT)); |
| } |
| |
| // Offset the rotate amount by the basePtr and the preferred slot |
| // byte offset |
| rotate = DAG.getNode(ISD::ADD, dl, PtrVT, |
| basePtr, |
| DAG.getConstant(-vtm->prefslot_byte, PtrVT)); |
| } |
| |
| // Re-emit as a v16i8 vector load |
| result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, |
| LN->getSrcValue(), LN->getSrcValueOffset(), |
| LN->isVolatile(), LN->isNonTemporal(), 16); |
| |
| // Update the chain |
| the_chain = result.getValue(1); |
| |
| // Rotate into the preferred slot: |
| result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8, |
| result.getValue(0), rotate); |
| |
| // Convert the loaded v16i8 vector to the appropriate vector type |
| // specified by the operand: |
| EVT vecVT = EVT::getVectorVT(*DAG.getContext(), |
| InVT, (128 / InVT.getSizeInBits())); |
| result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, |
| DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result)); |
| |
| // Handle extending loads by extending the scalar result: |
| if (ExtType == ISD::SEXTLOAD) { |
| result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); |
| } else if (ExtType == ISD::ZEXTLOAD) { |
| result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); |
| } else if (ExtType == ISD::EXTLOAD) { |
| unsigned NewOpc = ISD::ANY_EXTEND; |
| |
| if (OutVT.isFloatingPoint()) |
| NewOpc = ISD::FP_EXTEND; |
| |
| result = DAG.getNode(NewOpc, dl, OutVT, result); |
| } |
| |
| SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); |
| SDValue retops[2] = { |
| result, |
| the_chain |
| }; |
| |
| result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, |
| retops, sizeof(retops) / sizeof(retops[0])); |
| return result; |
| } |
| case ISD::PRE_INC: |
| case ISD::PRE_DEC: |
| case ISD::POST_INC: |
| case ISD::POST_DEC: |
| case ISD::LAST_INDEXED_MODE: |
| { |
| report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " |
| "than UNINDEXED\n" + |
| Twine((unsigned)LN->getAddressingMode())); |
| /*NOTREACHED*/ |
| } |
| } |
| |
| return SDValue(); |
| } |
| |
| /// Custom lower stores for CellSPU |
| /*! |
| All CellSPU stores are aligned to 16-byte boundaries, so for elements |
| within a 16-byte block, we have to generate a shuffle to insert the |
| requested element into its place, then store the resulting block. |
| */ |
| static SDValue |
| LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { |
| StoreSDNode *SN = cast<StoreSDNode>(Op); |
| SDValue Value = SN->getValue(); |
| EVT VT = Value.getValueType(); |
| EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); |
| EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); |
| DebugLoc dl = Op.getDebugLoc(); |
| unsigned alignment = SN->getAlignment(); |
| const bool isVec = VT.isVector(); |
| EVT eltTy = isVec ? VT.getVectorElementType(): VT; |
| |
| switch (SN->getAddressingMode()) { |
| case ISD::UNINDEXED: { |
| // The vector type we really want to load from the 16-byte chunk. |
| EVT vecVT = EVT::getVectorVT(*DAG.getContext(), |
| eltTy, (128 / eltTy.getSizeInBits())); |
| |
| SDValue alignLoadVec; |
| SDValue basePtr = SN->getBasePtr(); |
| SDValue the_chain = SN->getChain(); |
| SDValue insertEltOffs; |
| |
| if (alignment == 16) { |
| ConstantSDNode *CN; |
| // Special cases for a known aligned load to simplify the base pointer |
| // and insertion byte: |
| if (basePtr.getOpcode() == ISD::ADD |
| && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { |
| // Known offset into basePtr |
| int64_t offset = CN->getSExtValue(); |
| |
| // Simplify the base pointer for this case: |
| basePtr = basePtr.getOperand(0); |
| insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, |
| basePtr, |
| DAG.getConstant((offset & 0xf), PtrVT)); |
| |
| if ((offset & ~0xf) > 0) { |
| basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, |
| basePtr, |
| DAG.getConstant((offset & ~0xf), PtrVT)); |
| } |
| } else { |
| // Otherwise, assume it's at byte 0 of basePtr |
| insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, |
| basePtr, |
| DAG.getConstant(0, PtrVT)); |
| basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, |
| basePtr, |
| DAG.getConstant(0, PtrVT)); |
| } |
| } else { |
| // Unaligned load: must be more pessimistic about addressing modes: |
| if (basePtr.getOpcode() == ISD::ADD) { |
| MachineFunction &MF = DAG.getMachineFunction(); |
| MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
| unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); |
| SDValue Flag; |
| |
| SDValue Op0 = basePtr.getOperand(0); |
| SDValue Op1 = basePtr.getOperand(1); |
| |
| if (isa<ConstantSDNode>(Op1)) { |
| // Convert the (add <ptr>, <const>) to an indirect address contained |
| // in a register. Note that this is done because we need to avoid |
| // creating a 0(reg) d-form address due to the SPU's block loads. |
| basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); |
| the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); |
| basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); |
| } else { |
| // Convert the (add <arg1>, <arg2>) to an indirect address, which |
| // will likely be lowered as a reg(reg) x-form address. |
| basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); |
| } |
| } else { |
| basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, |
| basePtr, |
| DAG.getConstant(0, PtrVT)); |
| } |
| |
| // Insertion point is solely determined by basePtr's contents |
| insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, |
| basePtr, |
| DAG.getConstant(0, PtrVT)); |
| } |
| |
| // Load the memory to which to store. |
| alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr, |
| SN->getSrcValue(), SN->getSrcValueOffset(), |
| SN->isVolatile(), SN->isNonTemporal(), 16); |
| |
| // Update the chain |
| the_chain = alignLoadVec.getValue(1); |
| |
| LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec); |
| SDValue theValue = SN->getValue(); |
| SDValue result; |
| |
| if (StVT != VT |
| && (theValue.getOpcode() == ISD::AssertZext |
| || theValue.getOpcode() == ISD::AssertSext)) { |
| // Drill down and get the value for zero- and sign-extended |
| // quantities |
| theValue = theValue.getOperand(0); |
| } |
| |
| // If the base pointer is already a D-form address, then just create |
| // a new D-form address with a slot offset and the orignal base pointer. |
| // Otherwise generate a D-form address with the slot offset relative |
| // to the stack pointer, which is always aligned. |
| #if !defined(NDEBUG) |
| if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { |
| errs() << "CellSPU LowerSTORE: basePtr = "; |
| basePtr.getNode()->dump(&DAG); |
| errs() << "\n"; |
| } |
| #endif |
| |
| SDValue insertEltOp; |
| SDValue vectorizeOp; |
| if (isVec) |
| { |
| // FIXME: this works only if the vector is 64bit! |
| insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v2i64, insertEltOffs); |
| vectorizeOp = DAG.getNode(SPUISD::HALF2VEC, dl, vecVT, theValue); |
| } |
| else |
| { |
| insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); |
| vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); |
| } |
| result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, |
| vectorizeOp, alignLoadVec, |
| DAG.getNode(ISD::BIT_CONVERT, dl, |
| MVT::v4i32, insertEltOp)); |
| |
| result = DAG.getStore(the_chain, dl, result, basePtr, |
| LN->getSrcValue(), LN->getSrcValueOffset(), |
| LN->isVolatile(), LN->isNonTemporal(), |
| LN->getAlignment()); |
| |
| #if 0 && !defined(NDEBUG) |
| if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { |
| const SDValue ¤tRoot = DAG.getRoot(); |
| |
| DAG.setRoot(result); |
| errs() << "------- CellSPU:LowerStore result:\n"; |
| DAG.dump(); |
| errs() << "-------\n"; |
| DAG.setRoot(currentRoot); |
| } |
| #endif |
| |
| return result; |
| /*UNREACHED*/ |
| } |
| case ISD::PRE_INC: |
| case ISD::PRE_DEC: |
| case ISD::POST_INC: |
| case ISD::POST_DEC: |
| case ISD::LAST_INDEXED_MODE: |
| { |
| report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " |
| "than UNINDEXED\n" + |
| Twine((unsigned)SN->getAddressingMode())); |
| /*NOTREACHED*/ |
| } |
| } |
| |
| return SDValue(); |
| } |
| |
| //! Generate the address of a constant pool entry. |
| static SDValue |
| LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { |
| EVT PtrVT = Op.getValueType(); |
| ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); |
| const Constant *C = CP->getConstVal(); |
| SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); |
| SDValue Zero = DAG.getConstant(0, PtrVT); |
| const TargetMachine &TM = DAG.getTarget(); |
| // FIXME there is no actual debug info here |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| if (TM.getRelocationModel() == Reloc::Static) { |
| if (!ST->usingLargeMem()) { |
| // Just return the SDValue with the constant pool address in it. |
| return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); |
| } else { |
| SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); |
| SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); |
| return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); |
| } |
| } |
| |
| llvm_unreachable("LowerConstantPool: Relocation model other than static" |
| " not supported."); |
| return SDValue(); |
| } |
| |
| //! Alternate entry point for generating the address of a constant pool entry |
| SDValue |
| SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { |
| return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); |
| } |
| |
| static SDValue |
| LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { |
| EVT PtrVT = Op.getValueType(); |
| JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); |
| SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); |
| SDValue Zero = DAG.getConstant(0, PtrVT); |
| const TargetMachine &TM = DAG.getTarget(); |
| // FIXME there is no actual debug info here |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| if (TM.getRelocationModel() == Reloc::Static) { |
| if (!ST->usingLargeMem()) { |
| return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); |
| } else { |
| SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); |
| SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); |
| return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); |
| } |
| } |
| |
| llvm_unreachable("LowerJumpTable: Relocation model other than static" |
| " not supported."); |
| return SDValue(); |
| } |
| |
| static SDValue |
| LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { |
| EVT PtrVT = Op.getValueType(); |
| GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); |
| const GlobalValue *GV = GSDN->getGlobal(); |
| SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), |
| PtrVT, GSDN->getOffset()); |
| const TargetMachine &TM = DAG.getTarget(); |
| SDValue Zero = DAG.getConstant(0, PtrVT); |
| // FIXME there is no actual debug info here |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| if (TM.getRelocationModel() == Reloc::Static) { |
| if (!ST->usingLargeMem()) { |
| return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); |
| } else { |
| SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); |
| SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); |
| return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); |
| } |
| } else { |
| report_fatal_error("LowerGlobalAddress: Relocation model other than static" |
| "not supported."); |
| /*NOTREACHED*/ |
| } |
| |
| return SDValue(); |
| } |
| |
| //! Custom lower double precision floating point constants |
| static SDValue |
| LowerConstantFP(SDValue Op, SelectionDAG &DAG) { |
| EVT VT = Op.getValueType(); |
| // FIXME there is no actual debug info here |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| if (VT == MVT::f64) { |
| ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode()); |
| |
| assert((FP != 0) && |
| "LowerConstantFP: Node is not ConstantFPSDNode"); |
| |
| uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); |
| SDValue T = DAG.getConstant(dbits, MVT::i64); |
| SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); |
| return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, |
| DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec)); |
| } |
| |
| return SDValue(); |
| } |
| |
| SDValue |
| SPUTargetLowering::LowerFormalArguments(SDValue Chain, |
| CallingConv::ID CallConv, bool isVarArg, |
| const SmallVectorImpl<ISD::InputArg> |
| &Ins, |
| DebugLoc dl, SelectionDAG &DAG, |
| SmallVectorImpl<SDValue> &InVals) |
| const { |
| |
| MachineFunction &MF = DAG.getMachineFunction(); |
| MachineFrameInfo *MFI = MF.getFrameInfo(); |
| MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
| SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>(); |
| |
| unsigned ArgOffset = SPUFrameInfo::minStackSize(); |
| unsigned ArgRegIdx = 0; |
| unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); |
| |
| EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); |
| |
| SmallVector<CCValAssign, 16> ArgLocs; |
| CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, |
| *DAG.getContext()); |
| // FIXME: allow for other calling conventions |
| CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); |
| |
| // Add DAG nodes to load the arguments or copy them out of registers. |
| for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { |
| EVT ObjectVT = Ins[ArgNo].VT; |
| unsigned ObjSize = ObjectVT.getSizeInBits()/8; |
| SDValue ArgVal; |
| CCValAssign &VA = ArgLocs[ArgNo]; |
| |
| if (VA.isRegLoc()) { |
| const TargetRegisterClass *ArgRegClass; |
| |
| switch (ObjectVT.getSimpleVT().SimpleTy) { |
| default: |
| report_fatal_error("LowerFormalArguments Unhandled argument type: " + |
| Twine(ObjectVT.getEVTString())); |
| case MVT::i8: |
| ArgRegClass = &SPU::R8CRegClass; |
| break; |
| case MVT::i16: |
| ArgRegClass = &SPU::R16CRegClass; |
| break; |
| case MVT::i32: |
| ArgRegClass = &SPU::R32CRegClass; |
| break; |
| case MVT::i64: |
| ArgRegClass = &SPU::R64CRegClass; |
| break; |
| case MVT::i128: |
| ArgRegClass = &SPU::GPRCRegClass; |
| break; |
| case MVT::f32: |
| ArgRegClass = &SPU::R32FPRegClass; |
| break; |
| case MVT::f64: |
| ArgRegClass = &SPU::R64FPRegClass; |
| break; |
| case MVT::v2f64: |
| case MVT::v4f32: |
| case MVT::v2i64: |
| case MVT::v4i32: |
| case MVT::v8i16: |
| case MVT::v16i8: |
| case MVT::v2i32: |
| case MVT::v2f32: |
| ArgRegClass = &SPU::VECREGRegClass; |
| break; |
| } |
| |
| unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); |
| RegInfo.addLiveIn(VA.getLocReg(), VReg); |
| ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); |
| ++ArgRegIdx; |
| } else { |
| // We need to load the argument to a virtual register if we determined |
| // above that we ran out of physical registers of the appropriate type |
| // or we're forced to do vararg |
| int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); |
| SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
| ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); |
| ArgOffset += StackSlotSize; |
| } |
| |
| InVals.push_back(ArgVal); |
| // Update the chain |
| Chain = ArgVal.getOperand(0); |
| } |
| |
| // vararg handling: |
| if (isVarArg) { |
| // FIXME: we should be able to query the argument registers from |
| // tablegen generated code. |
| static const unsigned ArgRegs[] = { |
| SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, |
| SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, |
| SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, |
| SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, |
| SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, |
| SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, |
| SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, |
| SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, |
| SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, |
| SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, |
| SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 |
| }; |
| // size of ArgRegs array |
| unsigned NumArgRegs = 77; |
| |
| // We will spill (79-3)+1 registers to the stack |
| SmallVector<SDValue, 79-3+1> MemOps; |
| |
| // Create the frame slot |
| for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { |
| FuncInfo->setVarArgsFrameIndex( |
| MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); |
| SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
| unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); |
| SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); |
| SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, |
| false, false, 0); |
| Chain = Store.getOperand(0); |
| MemOps.push_back(Store); |
| |
| // Increment address by stack slot size for the next stored argument |
| ArgOffset += StackSlotSize; |
| } |
| if (!MemOps.empty()) |
| Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
| &MemOps[0], MemOps.size()); |
| } |
| |
| return Chain; |
| } |
| |
| /// isLSAAddress - Return the immediate to use if the specified |
| /// value is representable as a LSA address. |
| static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { |
| ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); |
| if (!C) return 0; |
| |
| int Addr = C->getZExtValue(); |
| if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. |
| (Addr << 14 >> 14) != Addr) |
| return 0; // Top 14 bits have to be sext of immediate. |
| |
| return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); |
| } |
| |
| SDValue |
| SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, |
| CallingConv::ID CallConv, bool isVarArg, |
| bool &isTailCall, |
| const SmallVectorImpl<ISD::OutputArg> &Outs, |
| const SmallVectorImpl<SDValue> &OutVals, |
| const SmallVectorImpl<ISD::InputArg> &Ins, |
| DebugLoc dl, SelectionDAG &DAG, |
| SmallVectorImpl<SDValue> &InVals) const { |
| // CellSPU target does not yet support tail call optimization. |
| isTailCall = false; |
| |
| const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); |
| unsigned NumOps = Outs.size(); |
| unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); |
| |
| SmallVector<CCValAssign, 16> ArgLocs; |
| CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, |
| *DAG.getContext()); |
| // FIXME: allow for other calling conventions |
| CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); |
| |
| const unsigned NumArgRegs = ArgLocs.size(); |
| |
| |
| // Handy pointer type |
| EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); |
| |
| // Set up a copy of the stack pointer for use loading and storing any |
| // arguments that may not fit in the registers available for argument |
| // passing. |
| SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); |
| |
| // Figure out which arguments are going to go in registers, and which in |
| // memory. |
| unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR] |
| unsigned ArgRegIdx = 0; |
| |
| // Keep track of registers passing arguments |
| std::vector<std::pair<unsigned, SDValue> > RegsToPass; |
| // And the arguments passed on the stack |
| SmallVector<SDValue, 8> MemOpChains; |
| |
| for (; ArgRegIdx != NumOps; ++ArgRegIdx) { |
| SDValue Arg = OutVals[ArgRegIdx]; |
| CCValAssign &VA = ArgLocs[ArgRegIdx]; |
| |
| // PtrOff will be used to store the current argument to the stack if a |
| // register cannot be found for it. |
| SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); |
| PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); |
| |
| switch (Arg.getValueType().getSimpleVT().SimpleTy) { |
| default: llvm_unreachable("Unexpected ValueType for argument!"); |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: |
| case MVT::i64: |
| case MVT::i128: |
| case MVT::f32: |
| case MVT::f64: |
| case MVT::v2i64: |
| case MVT::v2f64: |
| case MVT::v4f32: |
| case MVT::v4i32: |
| case MVT::v8i16: |
| case MVT::v16i8: |
| if (ArgRegIdx != NumArgRegs) { |
| RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); |
| } else { |
| MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, |
| false, false, 0)); |
| ArgOffset += StackSlotSize; |
| } |
| break; |
| } |
| } |
| |
| // Accumulate how many bytes are to be pushed on the stack, including the |
| // linkage area, and parameter passing area. According to the SPU ABI, |
| // we minimally need space for [LR] and [SP]. |
| unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize(); |
| |
| // Insert a call sequence start |
| Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, |
| true)); |
| |
| if (!MemOpChains.empty()) { |
| // Adjust the stack pointer for the stack arguments. |
| Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
| &MemOpChains[0], MemOpChains.size()); |
| } |
| |
| // Build a sequence of copy-to-reg nodes chained together with token chain |
| // and flag operands which copy the outgoing args into the appropriate regs. |
| SDValue InFlag; |
| for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { |
| Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, |
| RegsToPass[i].second, InFlag); |
| InFlag = Chain.getValue(1); |
| } |
| |
| SmallVector<SDValue, 8> Ops; |
| unsigned CallOpc = SPUISD::CALL; |
| |
| // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every |
| // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol |
| // node so that legalize doesn't hack it. |
| if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { |
| const GlobalValue *GV = G->getGlobal(); |
| EVT CalleeVT = Callee.getValueType(); |
| SDValue Zero = DAG.getConstant(0, PtrVT); |
| SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); |
| |
| if (!ST->usingLargeMem()) { |
| // Turn calls to targets that are defined (i.e., have bodies) into BRSL |
| // style calls, otherwise, external symbols are BRASL calls. This assumes |
| // that declared/defined symbols are in the same compilation unit and can |
| // be reached through PC-relative jumps. |
| // |
| // NOTE: |
| // This may be an unsafe assumption for JIT and really large compilation |
| // units. |
| if (GV->isDeclaration()) { |
| Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); |
| } else { |
| Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); |
| } |
| } else { |
| // "Large memory" mode: Turn all calls into indirect calls with a X-form |
| // address pairs: |
| Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); |
| } |
| } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { |
| EVT CalleeVT = Callee.getValueType(); |
| SDValue Zero = DAG.getConstant(0, PtrVT); |
| SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), |
| Callee.getValueType()); |
| |
| if (!ST->usingLargeMem()) { |
| Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); |
| } else { |
| Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); |
| } |
| } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { |
| // If this is an absolute destination address that appears to be a legal |
| // local store address, use the munged value. |
| Callee = SDValue(Dest, 0); |
| } |
| |
| Ops.push_back(Chain); |
| Ops.push_back(Callee); |
| |
| // Add argument registers to the end of the list so that they are known live |
| // into the call. |
| for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) |
| Ops.push_back(DAG.getRegister(RegsToPass[i].first, |
| RegsToPass[i].second.getValueType())); |
| |
| if (InFlag.getNode()) |
| Ops.push_back(InFlag); |
| // Returns a chain and a flag for retval copy to use. |
| Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), |
| &Ops[0], Ops.size()); |
| InFlag = Chain.getValue(1); |
| |
| Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), |
| DAG.getIntPtrConstant(0, true), InFlag); |
| if (!Ins.empty()) |
| InFlag = Chain.getValue(1); |
| |
| // If the function returns void, just return the chain. |
| if (Ins.empty()) |
| return Chain; |
| |
| // If the call has results, copy the values out of the ret val registers. |
| switch (Ins[0].VT.getSimpleVT().SimpleTy) { |
| default: llvm_unreachable("Unexpected ret value!"); |
| case MVT::Other: break; |
| case MVT::i32: |
| if (Ins.size() > 1 && Ins[1].VT == MVT::i32) { |
| Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, |
| MVT::i32, InFlag).getValue(1); |
| InVals.push_back(Chain.getValue(0)); |
| Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, |
| Chain.getValue(2)).getValue(1); |
| InVals.push_back(Chain.getValue(0)); |
| } else { |
| Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, |
| InFlag).getValue(1); |
| InVals.push_back(Chain.getValue(0)); |
| } |
| break; |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i64: |
| case MVT::i128: |
| case MVT::f32: |
| case MVT::f64: |
| case MVT::v2f64: |
| case MVT::v2i64: |
| case MVT::v4f32: |
| case MVT::v4i32: |
| case MVT::v8i16: |
| case MVT::v16i8: |
| Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, |
| InFlag).getValue(1); |
| InVals.push_back(Chain.getValue(0)); |
| break; |
| } |
| |
| return Chain; |
| } |
| |
| SDValue |
| SPUTargetLowering::LowerReturn(SDValue Chain, |
| CallingConv::ID CallConv, bool isVarArg, |
| const SmallVectorImpl<ISD::OutputArg> &Outs, |
| const SmallVectorImpl<SDValue> &OutVals, |
| DebugLoc dl, SelectionDAG &DAG) const { |
| |
| SmallVector<CCValAssign, 16> RVLocs; |
| CCState CCInfo(CallConv, isVarArg, getTargetMachine(), |
| RVLocs, *DAG.getContext()); |
| CCInfo.AnalyzeReturn(Outs, RetCC_SPU); |
| |
| // If this is the first return lowered for this function, add the regs to the |
| // liveout set for the function. |
| if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { |
| for (unsigned i = 0; i != RVLocs.size(); ++i) |
| DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); |
| } |
| |
| SDValue Flag; |
| |
| // Copy the result values into the output registers. |
| for (unsigned i = 0; i != RVLocs.size(); ++i) { |
| CCValAssign &VA = RVLocs[i]; |
| assert(VA.isRegLoc() && "Can only return in registers!"); |
| Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), |
| OutVals[i], Flag); |
| Flag = Chain.getValue(1); |
| } |
| |
| if (Flag.getNode()) |
| return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); |
| else |
| return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); |
| } |
| |
| |
| //===----------------------------------------------------------------------===// |
| // Vector related lowering: |
| //===----------------------------------------------------------------------===// |
| |
| static ConstantSDNode * |
| getVecImm(SDNode *N) { |
| SDValue OpVal(0, 0); |
| |
| // Check to see if this buildvec has a single non-undef value in its elements. |
| for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { |
| if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; |
| if (OpVal.getNode() == 0) |
| OpVal = N->getOperand(i); |
| else if (OpVal != N->getOperand(i)) |
| return 0; |
| } |
| |
| if (OpVal.getNode() != 0) { |
| if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { |
| return CN; |
| } |
| } |
| |
| return 0; |
| } |
| |
| /// get_vec_i18imm - Test if this vector is a vector filled with the same value |
| /// and the value fits into an unsigned 18-bit constant, and if so, return the |
| /// constant |
| SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, |
| EVT ValueType) { |
| if (ConstantSDNode *CN = getVecImm(N)) { |
| uint64_t Value = CN->getZExtValue(); |
| if (ValueType == MVT::i64) { |
| uint64_t UValue = CN->getZExtValue(); |
| uint32_t upper = uint32_t(UValue >> 32); |
| uint32_t lower = uint32_t(UValue); |
| if (upper != lower) |
| return SDValue(); |
| Value = Value >> 32; |
| } |
| if (Value <= 0x3ffff) |
| return DAG.getTargetConstant(Value, ValueType); |
| } |
| |
| return SDValue(); |
| } |
| |
| /// get_vec_i16imm - Test if this vector is a vector filled with the same value |
| /// and the value fits into a signed 16-bit constant, and if so, return the |
| /// constant |
| SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, |
| EVT ValueType) { |
| if (ConstantSDNode *CN = getVecImm(N)) { |
| int64_t Value = CN->getSExtValue(); |
| if (ValueType == MVT::i64) { |
| uint64_t UValue = CN->getZExtValue(); |
| uint32_t upper = uint32_t(UValue >> 32); |
| uint32_t lower = uint32_t(UValue); |
| if (upper != lower) |
| return SDValue(); |
| Value = Value >> 32; |
| } |
| if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { |
| return DAG.getTargetConstant(Value, ValueType); |
| } |
| } |
| |
| return SDValue(); |
| } |
| |
| /// get_vec_i10imm - Test if this vector is a vector filled with the same value |
| /// and the value fits into a signed 10-bit constant, and if so, return the |
| /// constant |
| SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, |
| EVT ValueType) { |
| if (ConstantSDNode *CN = getVecImm(N)) { |
| int64_t Value = CN->getSExtValue(); |
| if (ValueType == MVT::i64) { |
| uint64_t UValue = CN->getZExtValue(); |
| uint32_t upper = uint32_t(UValue >> 32); |
| uint32_t lower = uint32_t(UValue); |
| if (upper != lower) |
| return SDValue(); |
| Value = Value >> 32; |
| } |
| if (isInt<10>(Value)) |
| return DAG.getTargetConstant(Value, ValueType); |
| } |
| |
| return SDValue(); |
| } |
| |
| /// get_vec_i8imm - Test if this vector is a vector filled with the same value |
| /// and the value fits into a signed 8-bit constant, and if so, return the |
| /// constant. |
| /// |
| /// @note: The incoming vector is v16i8 because that's the only way we can load |
| /// constant vectors. Thus, we test to see if the upper and lower bytes are the |
| /// same value. |
| SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, |
| EVT ValueType) { |
| if (ConstantSDNode *CN = getVecImm(N)) { |
| int Value = (int) CN->getZExtValue(); |
| if (ValueType == MVT::i16 |
| && Value <= 0xffff /* truncated from uint64_t */ |
| && ((short) Value >> 8) == ((short) Value & 0xff)) |
| return DAG.getTargetConstant(Value & 0xff, ValueType); |
| else if (ValueType == MVT::i8 |
| && (Value & 0xff) == Value) |
| return DAG.getTargetConstant(Value, ValueType); |
| } |
| |
| return SDValue(); |
| } |
| |
| /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value |
| /// and the value fits into a signed 16-bit constant, and if so, return the |
| /// constant |
| SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, |
| EVT ValueType) { |
| if (ConstantSDNode *CN = getVecImm(N)) { |
| uint64_t Value = CN->getZExtValue(); |
| if ((ValueType == MVT::i32 |
| && ((unsigned) Value & 0xffff0000) == (unsigned) Value) |
| || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) |
| return DAG.getTargetConstant(Value >> 16, ValueType); |
| } |
| |
| return SDValue(); |
| } |
| |
| /// get_v4i32_imm - Catch-all for general 32-bit constant vectors |
| SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { |
| if (ConstantSDNode *CN = getVecImm(N)) { |
| return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); |
| } |
| |
| return SDValue(); |
| } |
| |
| /// get_v4i32_imm - Catch-all for general 64-bit constant vectors |
| SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { |
| if (ConstantSDNode *CN = getVecImm(N)) { |
| return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); |
| } |
| |
| return SDValue(); |
| } |
| |
| //! Lower a BUILD_VECTOR instruction creatively: |
| static SDValue |
| LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { |
| EVT VT = Op.getValueType(); |
| EVT EltVT = VT.getVectorElementType(); |
| DebugLoc dl = Op.getDebugLoc(); |
| BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode()); |
| assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); |
| unsigned minSplatBits = EltVT.getSizeInBits(); |
| |
| if (minSplatBits < 16) |
| minSplatBits = 16; |
| |
| APInt APSplatBits, APSplatUndef; |
| unsigned SplatBitSize; |
| bool HasAnyUndefs; |
| |
| if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, |
| HasAnyUndefs, minSplatBits) |
| || minSplatBits < SplatBitSize) |
| return SDValue(); // Wasn't a constant vector or splat exceeded min |
| |
| uint64_t SplatBits = APSplatBits.getZExtValue(); |
| |
| switch (VT.getSimpleVT().SimpleTy) { |
| default: |
| report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + |
| Twine(VT.getEVTString())); |
| /*NOTREACHED*/ |
| case MVT::v4f32: { |
| uint32_t Value32 = uint32_t(SplatBits); |
| assert(SplatBitSize == 32 |
| && "LowerBUILD_VECTOR: Unexpected floating point vector element."); |
| // NOTE: pretend the constant is an integer. LLVM won't load FP constants |
| SDValue T = DAG.getConstant(Value32, MVT::i32); |
| return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, |
| DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); |
| break; |
| } |
| case MVT::v2f64: { |
| uint64_t f64val = uint64_t(SplatBits); |
| assert(SplatBitSize == 64 |
| && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); |
| // NOTE: pretend the constant is an integer. LLVM won't load FP constants |
| SDValue T = DAG.getConstant(f64val, MVT::i64); |
| return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, |
| DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); |
| break; |
| } |
| case MVT::v16i8: { |
| // 8-bit constants have to be expanded to 16-bits |
| unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; |
| SmallVector<SDValue, 8> Ops; |
| |
| Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); |
| return DAG.getNode(ISD::BIT_CONVERT, dl, VT, |
| DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); |
| } |
| case MVT::v8i16: { |
| unsigned short Value16 = SplatBits; |
| SDValue T = DAG.getConstant(Value16, EltVT); |
| SmallVector<SDValue, 8> Ops; |
| |
| Ops.assign(8, T); |
| return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); |
| } |
| case MVT::v4i32: { |
| SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); |
| return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); |
| } |
| case MVT::v2f32: |
| case MVT::v2i32: { |
| return SDValue(); |
| } |
| case MVT::v2i64: { |
| return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); |
| } |
| } |
| |
| return SDValue(); |
| } |
| |
| /*! |
| */ |
| SDValue |
| SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, |
| DebugLoc dl) { |
| uint32_t upper = uint32_t(SplatVal >> 32); |
| uint32_t lower = uint32_t(SplatVal); |
| |
| if (upper == lower) { |
| // Magic constant that can be matched by IL, ILA, et. al. |
| SDValue Val = DAG.getTargetConstant(upper, MVT::i32); |
| return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, |
| DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, |
| Val, Val, Val, Val)); |
| } else { |
| bool upper_special, lower_special; |
| |
| // NOTE: This code creates common-case shuffle masks that can be easily |
| // detected as common expressions. It is not attempting to create highly |
| // specialized masks to replace any and all 0's, 0xff's and 0x80's. |
| |
| // Detect if the upper or lower half is a special shuffle mask pattern: |
| upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); |
| lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); |
| |
| // Both upper and lower are special, lower to a constant pool load: |
| if (lower_special && upper_special) { |
| SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); |
| return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, |
| SplatValCN, SplatValCN); |
| } |
| |
| SDValue LO32; |
| SDValue HI32; |
| SmallVector<SDValue, 16> ShufBytes; |
| SDValue Result; |
| |
| // Create lower vector if not a special pattern |
| if (!lower_special) { |
| SDValue LO32C = DAG.getConstant(lower, MVT::i32); |
| LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, |
| DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, |
| LO32C, LO32C, LO32C, LO32C)); |
| } |
| |
| // Create upper vector if not a special pattern |
| if (!upper_special) { |
| SDValue HI32C = DAG.getConstant(upper, MVT::i32); |
| HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, |
| DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, |
| HI32C, HI32C, HI32C, HI32C)); |
| } |
| |
| // If either upper or lower are special, then the two input operands are |
| // the same (basically, one of them is a "don't care") |
| if (lower_special) |
| LO32 = HI32; |
| if (upper_special) |
| HI32 = LO32; |
| |
| for (int i = 0; i < 4; ++i) { |
| uint64_t val = 0; |
| for (int j = 0; j < 4; ++j) { |
| SDValue V; |
| bool process_upper, process_lower; |
| val <<= 8; |
| process_upper = (upper_special && (i & 1) == 0); |
| process_lower = (lower_special && (i & 1) == 1); |
| |
| if (process_upper || process_lower) { |
| if ((process_upper && upper == 0) |
| || (process_lower && lower == 0)) |
| val |= 0x80; |
| else if ((process_upper && upper == 0xffffffff) |
| || (process_lower && lower == 0xffffffff)) |
| val |= 0xc0; |
| else if ((process_upper && upper == 0x80000000) |
| || (process_lower && lower == 0x80000000)) |
| val |= (j == 0 ? 0xe0 : 0x80); |
| } else |
| val |= i * 4 + j + ((i & 1) * 16); |
| } |
| |
| ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); |
| } |
| |
| return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, |
| DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, |
| &ShufBytes[0], ShufBytes.size())); |
| } |
| } |
| |
| /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on |
| /// which the Cell can operate. The code inspects V3 to ascertain whether the |
| /// permutation vector, V3, is monotonically increasing with one "exception" |
| /// element, e.g., (0, 1, _, 3). If this is the case, then generate a |
| /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. |
| /// In either case, the net result is going to eventually invoke SHUFB to |
| /// permute/shuffle the bytes from V1 and V2. |
| /// \note |
| /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate |
| /// control word for byte/halfword/word insertion. This takes care of a single |
| /// element move from V2 into V1. |
| /// \note |
| /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions. |
| static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { |
| const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); |
| SDValue V1 = Op.getOperand(0); |
| SDValue V2 = Op.getOperand(1); |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| if (V2.getOpcode() == ISD::UNDEF) V2 = V1; |
| |
| // If we have a single element being moved from V1 to V2, this can be handled |
| // using the C*[DX] compute mask instructions, but the vector elements have |
| // to be monotonically increasing with one exception element. |
| EVT VecVT = V1.getValueType(); |
| EVT EltVT = VecVT.getVectorElementType(); |
| unsigned EltsFromV2 = 0; |
| unsigned V2Elt = 0; |
| unsigned V2EltIdx0 = 0; |
| unsigned CurrElt = 0; |
| unsigned MaxElts = VecVT.getVectorNumElements(); |
| unsigned PrevElt = 0; |
| unsigned V0Elt = 0; |
| bool monotonic = true; |
| bool rotate = true; |
| EVT maskVT; // which of the c?d instructions to use |
| |
| if (EltVT == MVT::i8) { |
| V2EltIdx0 = 16; |
| maskVT = MVT::v16i8; |
| } else if (EltVT == MVT::i16) { |
| V2EltIdx0 = 8; |
| maskVT = MVT::v8i16; |
| } else if (VecVT == MVT::v2i32 || VecVT == MVT::v2f32 ) { |
| V2EltIdx0 = 2; |
| maskVT = MVT::v4i32; |
| } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { |
| V2EltIdx0 = 4; |
| maskVT = MVT::v4i32; |
| } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { |
| V2EltIdx0 = 2; |
| maskVT = MVT::v2i64; |
| } else |
| llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); |
| |
| for (unsigned i = 0; i != MaxElts; ++i) { |
| if (SVN->getMaskElt(i) < 0) |
| continue; |
| |
| unsigned SrcElt = SVN->getMaskElt(i); |
| |
| if (monotonic) { |
| if (SrcElt >= V2EltIdx0) { |
| if (1 >= (++EltsFromV2)) { |
| V2Elt = (V2EltIdx0 - SrcElt) << 2; |
| } |
| } else if (CurrElt != SrcElt) { |
| monotonic = false; |
| } |
| |
| ++CurrElt; |
| } |
| |
| if (rotate) { |
| if (PrevElt > 0 && SrcElt < MaxElts) { |
| if ((PrevElt == SrcElt - 1) |
| || (PrevElt == MaxElts - 1 && SrcElt == 0)) { |
| PrevElt = SrcElt; |
| if (SrcElt == 0) |
| V0Elt = i; |
| } else { |
| rotate = false; |
| } |
| } else if (i == 0) { |
| // First time through, need to keep track of previous element |
| PrevElt = SrcElt; |
| } else { |
| // This isn't a rotation, takes elements from vector 2 |
| rotate = false; |
| } |
| } |
| } |
| |
| if (EltsFromV2 == 1 && monotonic) { |
| // Compute mask and shuffle |
| EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); |
| |
| // As SHUFFLE_MASK becomes a c?d instruction, feed it an address |
| // R1 ($sp) is used here only as it is guaranteed to have last bits zero |
| SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, |
| DAG.getRegister(SPU::R1, PtrVT), |
| DAG.getConstant(V2Elt, MVT::i32)); |
| SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, |
| maskVT, Pointer); |
| |
| // Use shuffle mask in SHUFB synthetic instruction: |
| return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, |
| ShufMaskOp); |
| } else if (rotate) { |
| int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; |
| |
| return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), |
| V1, DAG.getConstant(rotamt, MVT::i16)); |
| } else { |
| // Convert the SHUFFLE_VECTOR mask's input element units to the |
| // actual bytes. |
| unsigned BytesPerElement = EltVT.getSizeInBits()/8; |
| |
| SmallVector<SDValue, 16> ResultMask; |
| for (unsigned i = 0, e = MaxElts; i != e; ++i) { |
| unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); |
| |
| for (unsigned j = 0; j < BytesPerElement; ++j) |
| ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); |
| } |
| // For half vectors padd the mask with zeros for the second half. |
| // This is needed because mask is assumed to be full vector elsewhere in |
| // the SPU backend. |
| if(VecVT == MVT::v2i32 || VecVT == MVT::v2f32) |
| for( unsigned i = 0; i < 2; ++i ) |
| { |
| for (unsigned j = 0; j < BytesPerElement; ++j) |
| ResultMask.push_back(DAG.getConstant(0,MVT::i8)); |
| } |
| |
| SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, |
| &ResultMask[0], ResultMask.size()); |
| return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); |
| } |
| } |
| |
| static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { |
| SDValue Op0 = Op.getOperand(0); // Op0 = the scalar |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| if (Op0.getNode()->getOpcode() == ISD::Constant) { |
| // For a constant, build the appropriate constant vector, which will |
| // eventually simplify to a vector register load. |
| |
| ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode()); |
| SmallVector<SDValue, 16> ConstVecValues; |
| EVT VT; |
| size_t n_copies; |
| |
| // Create a constant vector: |
| switch (Op.getValueType().getSimpleVT().SimpleTy) { |
| default: llvm_unreachable("Unexpected constant value type in " |
| "LowerSCALAR_TO_VECTOR"); |
| case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; |
| case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; |
| case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; |
| case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; |
| case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; |
| case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; |
| case MVT::v2i32: n_copies = 2; VT = MVT::i32; break; |
| } |
| |
| SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); |
| for (size_t j = 0; j < n_copies; ++j) |
| ConstVecValues.push_back(CValue); |
| |
| return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), |
| &ConstVecValues[0], ConstVecValues.size()); |
| } else { |
| // Otherwise, copy the value from one register to another: |
| switch (Op0.getValueType().getSimpleVT().SimpleTy) { |
| default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); |
| case MVT::i8: |
| case MVT::i16: |
| case MVT::i32: |
| case MVT::i64: |
| case MVT::f32: |
| case MVT::f64: |
| return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); |
| } |
| } |
| |
| return SDValue(); |
| } |
| |
| static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { |
| EVT VT = Op.getValueType(); |
| SDValue N = Op.getOperand(0); |
| SDValue Elt = Op.getOperand(1); |
| DebugLoc dl = Op.getDebugLoc(); |
| SDValue retval; |
| |
| if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { |
| // Constant argument: |
| int EltNo = (int) C->getZExtValue(); |
| |
| // sanity checks: |
| if (VT == MVT::i8 && EltNo >= 16) |
| llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); |
| else if (VT == MVT::i16 && EltNo >= 8) |
| llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); |
| else if (VT == MVT::i32 && EltNo >= 4) |
| llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); |
| else if (VT == MVT::i64 && EltNo >= 2) |
| llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); |
| |
| if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { |
| // i32 and i64: Element 0 is the preferred slot |
| return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); |
| } |
| |
| // Need to generate shuffle mask and extract: |
| int prefslot_begin = -1, prefslot_end = -1; |
| int elt_byte = EltNo * VT.getSizeInBits() / 8; |
| |
| switch (VT.getSimpleVT().SimpleTy) { |
| default: |
| assert(false && "Invalid value type!"); |
| case MVT::i8: { |
| prefslot_begin = prefslot_end = 3; |
| break; |
| } |
| case MVT::i16: { |
| prefslot_begin = 2; prefslot_end = 3; |
| break; |
| } |
| case MVT::i32: |
| case MVT::f32: { |
| prefslot_begin = 0; prefslot_end = 3; |
| break; |
| } |
| case MVT::i64: |
| case MVT::f64: { |
| prefslot_begin = 0; prefslot_end = 7; |
| break; |
| } |
| } |
| |
| assert(prefslot_begin != -1 && prefslot_end != -1 && |
| "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); |
| |
| unsigned int ShufBytes[16] = { |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
| }; |
| for (int i = 0; i < 16; ++i) { |
| // zero fill uppper part of preferred slot, don't care about the |
| // other slots: |
| unsigned int mask_val; |
| if (i <= prefslot_end) { |
| mask_val = |
| ((i < prefslot_begin) |
| ? 0x80 |
| : elt_byte + (i - prefslot_begin)); |
| |
| ShufBytes[i] = mask_val; |
| } else |
| ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; |
| } |
| |
| SDValue ShufMask[4]; |
| for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { |
| unsigned bidx = i * 4; |
| unsigned int bits = ((ShufBytes[bidx] << 24) | |
| (ShufBytes[bidx+1] << 16) | |
| (ShufBytes[bidx+2] << 8) | |
| ShufBytes[bidx+3]); |
| ShufMask[i] = DAG.getConstant(bits, MVT::i32); |
| } |
| |
| SDValue ShufMaskVec = |
| DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, |
| &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); |
| |
| retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, |
| DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), |
| N, N, ShufMaskVec)); |
| } else { |
| // Variable index: Rotate the requested element into slot 0, then replicate |
| // slot 0 across the vector |
| EVT VecVT = N.getValueType(); |
| if (!VecVT.isSimple() || !VecVT.isVector()) { |
| report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" |
| "vector type!"); |
| } |
| |
| // Make life easier by making sure the index is zero-extended to i32 |
| if (Elt.getValueType() != MVT::i32) |
| Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); |
| |
| // Scale the index to a bit/byte shift quantity |
| APInt scaleFactor = |
| APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); |
| unsigned scaleShift = scaleFactor.logBase2(); |
| SDValue vecShift; |
| |
| if (scaleShift > 0) { |
| // Scale the shift factor: |
| Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, |
| DAG.getConstant(scaleShift, MVT::i32)); |
| } |
| |
| vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt); |
| |
| // Replicate the bytes starting at byte 0 across the entire vector (for |
| // consistency with the notion of a unified register set) |
| SDValue replicate; |
| |
| switch (VT.getSimpleVT().SimpleTy) { |
| default: |
| report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" |
| "type"); |
| /*NOTREACHED*/ |
| case MVT::i8: { |
| SDValue factor = DAG.getConstant(0x00000000, MVT::i32); |
| replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, |
| factor, factor, factor, factor); |
| break; |
| } |
| case MVT::i16: { |
| SDValue factor = DAG.getConstant(0x00010001, MVT::i32); |
| replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, |
| factor, factor, factor, factor); |
| break; |
| } |
| case MVT::i32: |
| case MVT::f32: { |
| SDValue factor = DAG.getConstant(0x00010203, MVT::i32); |
| replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, |
| factor, factor, factor, factor); |
| break; |
| } |
| case MVT::i64: |
| case MVT::f64: { |
| SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); |
| SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); |
| replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, |
| loFactor, hiFactor, loFactor, hiFactor); |
| break; |
| } |
| } |
| |
| retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, |
| DAG.getNode(SPUISD::SHUFB, dl, VecVT, |
| vecShift, vecShift, replicate)); |
| } |
| |
| return retval; |
| } |
| |
| static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { |
| SDValue VecOp = Op.getOperand(0); |
| SDValue ValOp = Op.getOperand(1); |
| SDValue IdxOp = Op.getOperand(2); |
| DebugLoc dl = Op.getDebugLoc(); |
| EVT VT = Op.getValueType(); |
| |
| // use 0 when the lane to insert to is 'undef' |
| int64_t Idx=0; |
| if (IdxOp.getOpcode() != ISD::UNDEF) { |
| ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); |
| assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); |
| Idx = (CN->getSExtValue()); |
| } |
| |
| EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); |
| // Use $sp ($1) because it's always 16-byte aligned and it's available: |
| SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, |
| DAG.getRegister(SPU::R1, PtrVT), |
| DAG.getConstant(Idx, PtrVT)); |
| // widen the mask when dealing with half vectors |
| EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), |
| 128/ VT.getVectorElementType().getSizeInBits()); |
| SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); |
| |
| SDValue result = |
| DAG.getNode(SPUISD::SHUFB, dl, VT, |
| DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), |
| VecOp, |
| DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask)); |
| |
| return result; |
| } |
| |
| static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, |
| const TargetLowering &TLI) |
| { |
| SDValue N0 = Op.getOperand(0); // Everything has at least one operand |
| DebugLoc dl = Op.getDebugLoc(); |
| EVT ShiftVT = TLI.getShiftAmountTy(); |
| |
| assert(Op.getValueType() == MVT::i8); |
| switch (Opc) { |
| default: |
| llvm_unreachable("Unhandled i8 math operator"); |
| /*NOTREACHED*/ |
| break; |
| case ISD::ADD: { |
| // 8-bit addition: Promote the arguments up to 16-bits and truncate |
| // the result: |
| SDValue N1 = Op.getOperand(1); |
| N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); |
| N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); |
| return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, |
| DAG.getNode(Opc, dl, MVT::i16, N0, N1)); |
| |
| } |
| |
| case ISD::SUB: { |
| // 8-bit subtraction: Promote the arguments up to 16-bits and truncate |
| // the result: |
| SDValue N1 = Op.getOperand(1); |
| N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); |
| N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); |
| return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, |
| DAG.getNode(Opc, dl, MVT::i16, N0, N1)); |
| } |
| case ISD::ROTR: |
| case ISD::ROTL: { |
| SDValue N1 = Op.getOperand(1); |
| EVT N1VT = N1.getValueType(); |
| |
| N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); |
| if (!N1VT.bitsEq(ShiftVT)) { |
| unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) |
| ? ISD::ZERO_EXTEND |
| : ISD::TRUNCATE; |
| N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); |
| } |
| |
| // Replicate lower 8-bits into upper 8: |
| SDValue ExpandArg = |
| DAG.getNode(ISD::OR, dl, MVT::i16, N0, |
| DAG.getNode(ISD::SHL, dl, MVT::i16, |
| N0, DAG.getConstant(8, MVT::i32))); |
| |
| // Truncate back down to i8 |
| return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, |
| DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); |
| } |
| case ISD::SRL: |
| case ISD::SHL: { |
| SDValue N1 = Op.getOperand(1); |
| EVT N1VT = N1.getValueType(); |
| |
| N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); |
| if (!N1VT.bitsEq(ShiftVT)) { |
| unsigned N1Opc = ISD::ZERO_EXTEND; |
| |
| if (N1.getValueType().bitsGT(ShiftVT)) |
| N1Opc = ISD::TRUNCATE; |
| |
| N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); |
| } |
| |
| return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, |
| DAG.getNode(Opc, dl, MVT::i16, N0, N1)); |
| } |
| case ISD::SRA: { |
| SDValue N1 = Op.getOperand(1); |
| EVT N1VT = N1.getValueType(); |
| |
| N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); |
| if (!N1VT.bitsEq(ShiftVT)) { |
| unsigned N1Opc = ISD::SIGN_EXTEND; |
| |
| if (N1VT.bitsGT(ShiftVT)) |
| N1Opc = ISD::TRUNCATE; |
| N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); |
| } |
| |
| return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, |
| DAG.getNode(Opc, dl, MVT::i16, N0, N1)); |
| } |
| case ISD::MUL: { |
| SDValue N1 = Op.getOperand(1); |
| |
| N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); |
| N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); |
| return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, |
| DAG.getNode(Opc, dl, MVT::i16, N0, N1)); |
| break; |
| } |
| } |
| |
| return SDValue(); |
| } |
| |
| //! Lower byte immediate operations for v16i8 vectors: |
| static SDValue |
| LowerByteImmed(SDValue Op, SelectionDAG &DAG) { |
| SDValue ConstVec; |
| SDValue Arg; |
| EVT VT = Op.getValueType(); |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| ConstVec = Op.getOperand(0); |
| Arg = Op.getOperand(1); |
| if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { |
| if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { |
| ConstVec = ConstVec.getOperand(0); |
| } else { |
| ConstVec = Op.getOperand(1); |
| Arg = Op.getOperand(0); |
| if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { |
| ConstVec = ConstVec.getOperand(0); |
| } |
| } |
| } |
| |
| if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { |
| BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode()); |
| assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); |
| |
| APInt APSplatBits, APSplatUndef; |
| unsigned SplatBitSize; |
| bool HasAnyUndefs; |
| unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); |
| |
| if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, |
| HasAnyUndefs, minSplatBits) |
| && minSplatBits <= SplatBitSize) { |
| uint64_t SplatBits = APSplatBits.getZExtValue(); |
| SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); |
| |
| SmallVector<SDValue, 16> tcVec; |
| tcVec.assign(16, tc); |
| return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, |
| DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); |
| } |
| } |
| |
| // These operations (AND, OR, XOR) are legal, they just couldn't be custom |
| // lowered. Return the operation, rather than a null SDValue. |
| return Op; |
| } |
| |
| //! Custom lowering for CTPOP (count population) |
| /*! |
| Custom lowering code that counts the number ones in the input |
| operand. SPU has such an instruction, but it counts the number of |
| ones per byte, which then have to be accumulated. |
| */ |
| static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { |
| EVT VT = Op.getValueType(); |
| EVT vecVT = EVT::getVectorVT(*DAG.getContext(), |
| VT, (128 / VT.getSizeInBits())); |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| switch (VT.getSimpleVT().SimpleTy) { |
| default: |
| assert(false && "Invalid value type!"); |
| case MVT::i8: { |
| SDValue N = Op.getOperand(0); |
| SDValue Elt0 = DAG.getConstant(0, MVT::i32); |
| |
| SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); |
| SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); |
| |
| return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); |
| } |
| |
| case MVT::i16: { |
| MachineFunction &MF = DAG.getMachineFunction(); |
| MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
| |
| unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); |
| |
| SDValue N = Op.getOperand(0); |
| SDValue Elt0 = DAG.getConstant(0, MVT::i16); |
| SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); |
| SDValue Shift1 = DAG.getConstant(8, MVT::i32); |
| |
| SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); |
| SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); |
| |
| // CNTB_result becomes the chain to which all of the virtual registers |
| // CNTB_reg, SUM1_reg become associated: |
| SDValue CNTB_result = |
| DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); |
| |
| SDValue CNTB_rescopy = |
| DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); |
| |
| SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); |
| |
| return DAG.getNode(ISD::AND, dl, MVT::i16, |
| DAG.getNode(ISD::ADD, dl, MVT::i16, |
| DAG.getNode(ISD::SRL, dl, MVT::i16, |
| Tmp1, Shift1), |
| Tmp1), |
| Mask0); |
| } |
| |
| case MVT::i32: { |
| MachineFunction &MF = DAG.getMachineFunction(); |
| MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
| |
| unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); |
| unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); |
| |
| SDValue N = Op.getOperand(0); |
| SDValue Elt0 = DAG.getConstant(0, MVT::i32); |
| SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); |
| SDValue Shift1 = DAG.getConstant(16, MVT::i32); |
| SDValue Shift2 = DAG.getConstant(8, MVT::i32); |
| |
| SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); |
| SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); |
| |
| // CNTB_result becomes the chain to which all of the virtual registers |
| // CNTB_reg, SUM1_reg become associated: |
| SDValue CNTB_result = |
| DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); |
| |
| SDValue CNTB_rescopy = |
| DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); |
| |
| SDValue Comp1 = |
| DAG.getNode(ISD::SRL, dl, MVT::i32, |
| DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), |
| Shift1); |
| |
| SDValue Sum1 = |
| DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, |
| DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); |
| |
| SDValue Sum1_rescopy = |
| DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); |
| |
| SDValue Comp2 = |
| DAG.getNode(ISD::SRL, dl, MVT::i32, |
| DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), |
| Shift2); |
| SDValue Sum2 = |
| DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, |
| DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); |
| |
| return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); |
| } |
| |
| case MVT::i64: |
| break; |
| } |
| |
| return SDValue(); |
| } |
| |
| //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 |
| /*! |
| f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. |
| All conversions to i64 are expanded to a libcall. |
| */ |
| static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, |
| const SPUTargetLowering &TLI) { |
| EVT OpVT = Op.getValueType(); |
| SDValue Op0 = Op.getOperand(0); |
| EVT Op0VT = Op0.getValueType(); |
| |
| if ((OpVT == MVT::i32 && Op0VT == MVT::f64) |
| || OpVT == MVT::i64) { |
| // Convert f32 / f64 to i32 / i64 via libcall. |
| RTLIB::Libcall LC = |
| (Op.getOpcode() == ISD::FP_TO_SINT) |
| ? RTLIB::getFPTOSINT(Op0VT, OpVT) |
| : RTLIB::getFPTOUINT(Op0VT, OpVT); |
| assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); |
| SDValue Dummy; |
| return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); |
| } |
| |
| return Op; |
| } |
| |
| //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 |
| /*! |
| i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. |
| All conversions from i64 are expanded to a libcall. |
| */ |
| static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, |
| const SPUTargetLowering &TLI) { |
| EVT OpVT = Op.getValueType(); |
| SDValue Op0 = Op.getOperand(0); |
| EVT Op0VT = Op0.getValueType(); |
| |
| if ((OpVT == MVT::f64 && Op0VT == MVT::i32) |
| || Op0VT == MVT::i64) { |
| // Convert i32, i64 to f64 via libcall: |
| RTLIB::Libcall LC = |
| (Op.getOpcode() == ISD::SINT_TO_FP) |
| ? RTLIB::getSINTTOFP(Op0VT, OpVT) |
| : RTLIB::getUINTTOFP(Op0VT, OpVT); |
| assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); |
| SDValue Dummy; |
| return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); |
| } |
| |
| return Op; |
| } |
| |
| //! Lower ISD::SETCC |
| /*! |
| This handles MVT::f64 (double floating point) condition lowering |
| */ |
| static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, |
| const TargetLowering &TLI) { |
| CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2)); |
| DebugLoc dl = Op.getDebugLoc(); |
| assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); |
| |
| SDValue lhs = Op.getOperand(0); |
| SDValue rhs = Op.getOperand(1); |
| EVT lhsVT = lhs.getValueType(); |
| assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); |
| |
| EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); |
| APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); |
| EVT IntVT(MVT::i64); |
| |
| // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently |
| // selected to a NOP: |
| SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs); |
| SDValue lhsHi32 = |
| DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, |
| DAG.getNode(ISD::SRL, dl, IntVT, |
| i64lhs, DAG.getConstant(32, MVT::i32))); |
| SDValue lhsHi32abs = |
| DAG.getNode(ISD::AND, dl, MVT::i32, |
| lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); |
| SDValue lhsLo32 = |
| DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); |
| |
| // SETO and SETUO only use the lhs operand: |
| if (CC->get() == ISD::SETO) { |
| // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of |
| // SETUO |
| APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); |
| return DAG.getNode(ISD::XOR, dl, ccResultVT, |
| DAG.getSetCC(dl, ccResultVT, |
| lhs, DAG.getConstantFP(0.0, lhsVT), |
| ISD::SETUO), |
| DAG.getConstant(ccResultAllOnes, ccResultVT)); |
| } else if (CC->get() == ISD::SETUO) { |
| // Evaluates to true if Op0 is [SQ]NaN |
| return DAG.getNode(ISD::AND, dl, ccResultVT, |
| DAG.getSetCC(dl, ccResultVT, |
| lhsHi32abs, |
| DAG.getConstant(0x7ff00000, MVT::i32), |
| ISD::SETGE), |
| DAG.getSetCC(dl, ccResultVT, |
| lhsLo32, |
| DAG.getConstant(0, MVT::i32), |
| ISD::SETGT)); |
| } |
| |
| SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs); |
| SDValue rhsHi32 = |
| DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, |
| DAG.getNode(ISD::SRL, dl, IntVT, |
| i64rhs, DAG.getConstant(32, MVT::i32))); |
| |
| // If a value is negative, subtract from the sign magnitude constant: |
| SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); |
| |
| // Convert the sign-magnitude representation into 2's complement: |
| SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, |
| lhsHi32, DAG.getConstant(31, MVT::i32)); |
| SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); |
| SDValue lhsSelect = |
| DAG.getNode(ISD::SELECT, dl, IntVT, |
| lhsSelectMask, lhsSignMag2TC, i64lhs); |
| |
| SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, |
| rhsHi32, DAG.getConstant(31, MVT::i32)); |
| SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); |
| SDValue rhsSelect = |
| DAG.getNode(ISD::SELECT, dl, IntVT, |
| rhsSelectMask, rhsSignMag2TC, i64rhs); |
| |
| unsigned compareOp; |
| |
| switch (CC->get()) { |
| case ISD::SETOEQ: |
| case ISD::SETUEQ: |
| compareOp = ISD::SETEQ; break; |
| case ISD::SETOGT: |
| case ISD::SETUGT: |
| compareOp = ISD::SETGT; break; |
| case ISD::SETOGE: |
| case ISD::SETUGE: |
| compareOp = ISD::SETGE; break; |
| case ISD::SETOLT: |
| case ISD::SETULT: |
| compareOp = ISD::SETLT; break; |
| case ISD::SETOLE: |
| case ISD::SETULE: |
| compareOp = ISD::SETLE; break; |
| case ISD::SETUNE: |
| case ISD::SETONE: |
| compareOp = ISD::SETNE; break; |
| default: |
| report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); |
| } |
| |
| SDValue result = |
| DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, |
| (ISD::CondCode) compareOp); |
| |
| if ((CC->get() & 0x8) == 0) { |
| // Ordered comparison: |
| SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, |
| lhs, DAG.getConstantFP(0.0, MVT::f64), |
| ISD::SETO); |
| SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, |
| rhs, DAG.getConstantFP(0.0, MVT::f64), |
| ISD::SETO); |
| SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); |
| |
| result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); |
| } |
| |
| return result; |
| } |
| |
| //! Lower ISD::SELECT_CC |
| /*! |
| ISD::SELECT_CC can (generally) be implemented directly on the SPU using the |
| SELB instruction. |
| |
| \note Need to revisit this in the future: if the code path through the true |
| and false value computations is longer than the latency of a branch (6 |
| cycles), then it would be more advantageous to branch and insert a new basic |
| block and branch on the condition. However, this code does not make that |
| assumption, given the simplisitc uses so far. |
| */ |
| |
| static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, |
| const TargetLowering &TLI) { |
| EVT VT = Op.getValueType(); |
| SDValue lhs = Op.getOperand(0); |
| SDValue rhs = Op.getOperand(1); |
| SDValue trueval = Op.getOperand(2); |
| SDValue falseval = Op.getOperand(3); |
| SDValue condition = Op.getOperand(4); |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| // NOTE: SELB's arguments: $rA, $rB, $mask |
| // |
| // SELB selects bits from $rA where bits in $mask are 0, bits from $rB |
| // where bits in $mask are 1. CCond will be inverted, having 1s where the |
| // condition was true and 0s where the condition was false. Hence, the |
| // arguments to SELB get reversed. |
| |
| // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's |
| // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up |
| // with another "cannot select select_cc" assert: |
| |
| SDValue compare = DAG.getNode(ISD::SETCC, dl, |
| TLI.getSetCCResultType(Op.getValueType()), |
| lhs, rhs, condition); |
| return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); |
| } |
| |
| //! Custom lower ISD::TRUNCATE |
| static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) |
| { |
| // Type to truncate to |
| EVT VT = Op.getValueType(); |
| MVT simpleVT = VT.getSimpleVT(); |
| EVT VecVT = EVT::getVectorVT(*DAG.getContext(), |
| VT, (128 / VT.getSizeInBits())); |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| // Type to truncate from |
| SDValue Op0 = Op.getOperand(0); |
| EVT Op0VT = Op0.getValueType(); |
| |
| if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { |
| // Create shuffle mask, least significant doubleword of quadword |
| unsigned maskHigh = 0x08090a0b; |
| unsigned maskLow = 0x0c0d0e0f; |
| // Use a shuffle to perform the truncation |
| SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, |
| DAG.getConstant(maskHigh, MVT::i32), |
| DAG.getConstant(maskLow, MVT::i32), |
| DAG.getConstant(maskHigh, MVT::i32), |
| DAG.getConstant(maskLow, MVT::i32)); |
| |
| SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, |
| Op0, Op0, shufMask); |
| |
| return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); |
| } |
| |
| return SDValue(); // Leave the truncate unmolested |
| } |
| |
| /*! |
| * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic |
| * algorithm is to duplicate the sign bit using rotmai to generate at |
| * least one byte full of sign bits. Then propagate the "sign-byte" into |
| * the leftmost words and the i64/i32 into the rightmost words using shufb. |
| * |
| * @param Op The sext operand |
| * @param DAG The current DAG |
| * @return The SDValue with the entire instruction sequence |
| */ |
| static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) |
| { |
| DebugLoc dl = Op.getDebugLoc(); |
| |
| // Type to extend to |
| MVT OpVT = Op.getValueType().getSimpleVT(); |
| |
| // Type to extend from |
| SDValue Op0 = Op.getOperand(0); |
| MVT Op0VT = Op0.getValueType().getSimpleVT(); |
| |
| // The type to extend to needs to be a i128 and |
| // the type to extend from needs to be i64 or i32. |
| assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && |
| "LowerSIGN_EXTEND: input and/or output operand have wrong size"); |
| |
| // Create shuffle mask |
| unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 |
| unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11 |
| unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15 |
| SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, |
| DAG.getConstant(mask1, MVT::i32), |
| DAG.getConstant(mask1, MVT::i32), |
| DAG.getConstant(mask2, MVT::i32), |
| DAG.getConstant(mask3, MVT::i32)); |
| |
| // Word wise arithmetic right shift to generate at least one byte |
| // that contains sign bits. |
| MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32; |
| SDValue sraVal = DAG.getNode(ISD::SRA, |
| dl, |
| mvt, |
| DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), |
| DAG.getConstant(31, MVT::i32)); |
| |
| // Shuffle bytes - Copy the sign bits into the upper 64 bits |
| // and the input value into the lower 64 bits. |
| SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, |
| DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask); |
| |
| return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle); |
| } |
| |
| //! Custom (target-specific) lowering entry point |
| /*! |
| This is where LLVM's DAG selection process calls to do target-specific |
| lowering of nodes. |
| */ |
| SDValue |
| SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const |
| { |
| unsigned Opc = (unsigned) Op.getOpcode(); |
| EVT VT = Op.getValueType(); |
| |
| switch (Opc) { |
| default: { |
| #ifndef NDEBUG |
| errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; |
| errs() << "Op.getOpcode() = " << Opc << "\n"; |
| errs() << "*Op.getNode():\n"; |
| Op.getNode()->dump(); |
| #endif |
| llvm_unreachable(0); |
| } |
| case ISD::LOAD: |
| case ISD::EXTLOAD: |
| case ISD::SEXTLOAD: |
| case ISD::ZEXTLOAD: |
| return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); |
| case ISD::STORE: |
| return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); |
| case ISD::ConstantPool: |
| return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); |
| case ISD::GlobalAddress: |
| return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); |
| case ISD::JumpTable: |
| return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); |
| case ISD::ConstantFP: |
| return LowerConstantFP(Op, DAG); |
| |
| // i8, i64 math ops: |
| case ISD::ADD: |
| case ISD::SUB: |
| case ISD::ROTR: |
| case ISD::ROTL: |
| case ISD::SRL: |
| case ISD::SHL: |
| case ISD::SRA: { |
| if (VT == MVT::i8) |
| return LowerI8Math(Op, DAG, Opc, *this); |
| break; |
| } |
| |
| case ISD::FP_TO_SINT: |
| case ISD::FP_TO_UINT: |
| return LowerFP_TO_INT(Op, DAG, *this); |
| |
| case ISD::SINT_TO_FP: |
| case ISD::UINT_TO_FP: |
| return LowerINT_TO_FP(Op, DAG, *this); |
| |
| // Vector-related lowering. |
| case ISD::BUILD_VECTOR: |
| return LowerBUILD_VECTOR(Op, DAG); |
| case ISD::SCALAR_TO_VECTOR: |
| return LowerSCALAR_TO_VECTOR(Op, DAG); |
| case ISD::VECTOR_SHUFFLE: |
| return LowerVECTOR_SHUFFLE(Op, DAG); |
| case ISD::EXTRACT_VECTOR_ELT: |
| return LowerEXTRACT_VECTOR_ELT(Op, DAG); |
| case ISD::INSERT_VECTOR_ELT: |
| return LowerINSERT_VECTOR_ELT(Op, DAG); |
| |
| // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: |
| case ISD::AND: |
| case ISD::OR: |
| case ISD::XOR: |
| return LowerByteImmed(Op, DAG); |
| |
| // Vector and i8 multiply: |
| case ISD::MUL: |
| if (VT == MVT::i8) |
| return LowerI8Math(Op, DAG, Opc, *this); |
| |
| case ISD::CTPOP: |
| return LowerCTPOP(Op, DAG); |
| |
| case ISD::SELECT_CC: |
| return LowerSELECT_CC(Op, DAG, *this); |
| |
| case ISD::SETCC: |
| return LowerSETCC(Op, DAG, *this); |
| |
| case ISD::TRUNCATE: |
| return LowerTRUNCATE(Op, DAG); |
| |
| case ISD::SIGN_EXTEND: |
| return LowerSIGN_EXTEND(Op, DAG); |
| } |
| |
| return SDValue(); |
| } |
| |
| void SPUTargetLowering::ReplaceNodeResults(SDNode *N, |
| SmallVectorImpl<SDValue>&Results, |
| SelectionDAG &DAG) const |
| { |
| #if 0 |
| unsigned Opc = (unsigned) N->getOpcode(); |
| EVT OpVT = N->getValueType(0); |
| |
| switch (Opc) { |
| default: { |
| errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; |
| errs() << "Op.getOpcode() = " << Opc << "\n"; |
| errs() << "*Op.getNode():\n"; |
| N->dump(); |
| abort(); |
| /*NOTREACHED*/ |
| } |
| } |
| #endif |
| |
| /* Otherwise, return unchanged */ |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Target Optimization Hooks |
| //===----------------------------------------------------------------------===// |
| |
| SDValue |
| SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const |
| { |
| #if 0 |
| TargetMachine &TM = getTargetMachine(); |
| #endif |
| const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); |
| SelectionDAG &DAG = DCI.DAG; |
| SDValue Op0 = N->getOperand(0); // everything has at least one operand |
| EVT NodeVT = N->getValueType(0); // The node's value type |
| EVT Op0VT = Op0.getValueType(); // The first operand's result |
| SDValue Result; // Initially, empty result |
| DebugLoc dl = N->getDebugLoc(); |
| |
| switch (N->getOpcode()) { |
| default: break; |
| case ISD::ADD: { |
| SDValue Op1 = N->getOperand(1); |
| |
| if (Op0.getOpcode() == SPUISD::IndirectAddr |
| || Op1.getOpcode() == SPUISD::IndirectAddr) { |
| // Normalize the operands to reduce repeated code |
| SDValue IndirectArg = Op0, AddArg = Op1; |
| |
| if (Op1.getOpcode() == SPUISD::IndirectAddr) { |
| IndirectArg = Op1; |
| AddArg = Op0; |
| } |
| |
| if (isa<ConstantSDNode>(AddArg)) { |
| ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg); |
| SDValue IndOp1 = IndirectArg.getOperand(1); |
| |
| if (CN0->isNullValue()) { |
| // (add (SPUindirect <arg>, <arg>), 0) -> |
| // (SPUindirect <arg>, <arg>) |
| |
| #if !defined(NDEBUG) |
| if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { |
| errs() << "\n" |
| << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n" |
| << "With: (SPUindirect <arg>, <arg>)\n"; |
| } |
| #endif |
| |
| return IndirectArg; |
| } else if (isa<ConstantSDNode>(IndOp1)) { |
| // (add (SPUindirect <arg>, <const>), <const>) -> |
| // (SPUindirect <arg>, <const + const>) |
| ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1); |
| int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); |
| SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); |
| |
| #if !defined(NDEBUG) |
| if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { |
| errs() << "\n" |
| << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue() |
| << "), " << CN0->getSExtValue() << ")\n" |
| << "With: (SPUindirect <arg>, " |
| << combinedConst << ")\n"; |
| } |
| #endif |
| |
| return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, |
| IndirectArg, combinedValue); |
| } |
| } |
| } |
| break; |
| } |
| case ISD::SIGN_EXTEND: |
| case ISD::ZERO_EXTEND: |
| case ISD::ANY_EXTEND: { |
| if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { |
| // (any_extend (SPUextract_elt0 <arg>)) -> |
| // (SPUextract_elt0 <arg>) |
| // Types must match, however... |
| #if !defined(NDEBUG) |
| if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { |
| errs() << "\nReplace: "; |
| N->dump(&DAG); |
| errs() << "\nWith: "; |
| Op0.getNode()->dump(&DAG); |
| errs() << "\n"; |
| } |
| #endif |
| |
| return Op0; |
| } |
| break; |
| } |
| case SPUISD::IndirectAddr: { |
| if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { |
| ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
| if (CN != 0 && CN->isNullValue()) { |
| // (SPUindirect (SPUaform <addr>, 0), 0) -> |
| // (SPUaform <addr>, 0) |
| |
| DEBUG(errs() << "Replace: "); |
| DEBUG(N->dump(&DAG)); |
| DEBUG(errs() << "\nWith: "); |
| DEBUG(Op0.getNode()->dump(&DAG)); |
| DEBUG(errs() << "\n"); |
| |
| return Op0; |
| } |
| } else if (Op0.getOpcode() == ISD::ADD) { |
| SDValue Op1 = N->getOperand(1); |
| if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) { |
| // (SPUindirect (add <arg>, <arg>), 0) -> |
| // (SPUindirect <arg>, <arg>) |
| if (CN1->isNullValue()) { |
| |
| #if !defined(NDEBUG) |
| if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { |
| errs() << "\n" |
| << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n" |
| << "With: (SPUindirect <arg>, <arg>)\n"; |
| } |
| #endif |
| |
| return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, |
| Op0.getOperand(0), Op0.getOperand(1)); |
| } |
| } |
| } |
| break; |
| } |
| case SPUISD::SHLQUAD_L_BITS: |
| case SPUISD::SHLQUAD_L_BYTES: |
| case SPUISD::ROTBYTES_LEFT: { |
| SDValue Op1 = N->getOperand(1); |
| |
| // Kill degenerate vector shifts: |
| if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) { |
| if (CN->isNullValue()) { |
| Result = Op0; |
| } |
| } |
| break; |
| } |
| case SPUISD::PREFSLOT2VEC: { |
| switch (Op0.getOpcode()) { |
| default: |
| break; |
| case ISD::ANY_EXTEND: |
| case ISD::ZERO_EXTEND: |
| case ISD::SIGN_EXTEND: { |
| // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) -> |
| // <arg> |
| // but only if the SPUprefslot2vec and <arg> types match. |
| SDValue Op00 = Op0.getOperand(0); |
| if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { |
| SDValue Op000 = Op00.getOperand(0); |
| if (Op000.getValueType() == NodeVT) { |
| Result = Op000; |
| } |
| } |
| break; |
| } |
| case SPUISD::VEC2PREFSLOT: { |
| // (SPUprefslot2vec (SPUvec2prefslot <arg>)) -> |
| // <arg> |
| Result = Op0.getOperand(0); |
| break; |
| } |
| } |
| break; |
| } |
| } |
| |
| // Otherwise, return unchanged. |
| #ifndef NDEBUG |
| if (Result.getNode()) { |
| DEBUG(errs() << "\nReplace.SPU: "); |
| DEBUG(N->dump(&DAG)); |
| DEBUG(errs() << "\nWith: "); |
| DEBUG(Result.getNode()->dump(&DAG)); |
| DEBUG(errs() << "\n"); |
| } |
| #endif |
| |
| return Result; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Inline Assembly Support |
| //===----------------------------------------------------------------------===// |
| |
| /// getConstraintType - Given a constraint letter, return the type of |
| /// constraint it is for this target. |
| SPUTargetLowering::ConstraintType |
| SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { |
| if (ConstraintLetter.size() == 1) { |
| switch (ConstraintLetter[0]) { |
| default: break; |
| case 'b': |
| case 'r': |
| case 'f': |
| case 'v': |
| case 'y': |
| return C_RegisterClass; |
| } |
| } |
| return TargetLowering::getConstraintType(ConstraintLetter); |
| } |
| |
| std::pair<unsigned, const TargetRegisterClass*> |
| SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, |
| EVT VT) const |
| { |
| if (Constraint.size() == 1) { |
| // GCC RS6000 Constraint Letters |
| switch (Constraint[0]) { |
| case 'b': // R1-R31 |
| case 'r': // R0-R31 |
| if (VT == MVT::i64) |
| return std::make_pair(0U, SPU::R64CRegisterClass); |
| return std::make_pair(0U, SPU::R32CRegisterClass); |
| case 'f': |
| if (VT == MVT::f32) |
| return std::make_pair(0U, SPU::R32FPRegisterClass); |
| else if (VT == MVT::f64) |
| return std::make_pair(0U, SPU::R64FPRegisterClass); |
| break; |
| case 'v': |
| return std::make_pair(0U, SPU::GPRCRegisterClass); |
| } |
| } |
| |
| return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); |
| } |
| |
| //! Compute used/known bits for a SPU operand |
| void |
| SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, |
| const APInt &Mask, |
| APInt &KnownZero, |
| APInt &KnownOne, |
| const SelectionDAG &DAG, |
| unsigned Depth ) const { |
| #if 0 |
| const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; |
| |
| switch (Op.getOpcode()) { |
| default: |
| // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); |
| break; |
| case CALL: |
| case SHUFB: |
| case SHUFFLE_MASK: |
| case CNTB: |
| case SPUISD::PREFSLOT2VEC: |
| case SPUISD::LDRESULT: |
| case SPUISD::VEC2PREFSLOT: |
| case SPUISD::SHLQUAD_L_BITS: |
| case SPUISD::SHLQUAD_L_BYTES: |
| case SPUISD::VEC_ROTL: |
| case SPUISD::VEC_ROTR: |
| case SPUISD::ROTBYTES_LEFT: |
| case SPUISD::SELECT_MASK: |
| case SPUISD::SELB: |
| } |
| #endif |
| } |
| |
| unsigned |
| SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, |
| unsigned Depth) const { |
| switch (Op.getOpcode()) { |
| default: |
| return 1; |
| |
| case ISD::SETCC: { |
| EVT VT = Op.getValueType(); |
| |
| if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { |
| VT = MVT::i32; |
| } |
| return VT.getSizeInBits(); |
| } |
| } |
| } |
| |
| // LowerAsmOperandForConstraint |
| void |
| SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, |
| char ConstraintLetter, |
| std::vector<SDValue> &Ops, |
| SelectionDAG &DAG) const { |
| // Default, for the time being, to the base class handler |
| TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG); |
| } |
| |
| /// isLegalAddressImmediate - Return true if the integer value can be used |
| /// as the offset of the target addressing mode. |
| bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, |
| const Type *Ty) const { |
| // SPU's addresses are 256K: |
| return (V > -(1 << 18) && V < (1 << 18) - 1); |
| } |
| |
| bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { |
| return false; |
| } |
| |
| bool |
| SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { |
| // The SPU target isn't yet aware of offsets. |
| return false; |
| } |