| //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run |
| // both before and after the DAG is legalized. |
| // |
| // This pass is not a substitute for the LLVM IR instcombine pass. This pass is |
| // primarily intended to handle simplification opportunities that are implicit |
| // in the LLVM IR and exposed by the various codegen lowering phases. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/ADT/APFloat.h" |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/IntervalMap.h" |
| #include "llvm/ADT/None.h" |
| #include "llvm/ADT/Optional.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/Analysis/AliasAnalysis.h" |
| #include "llvm/Analysis/MemoryLocation.h" |
| #include "llvm/CodeGen/DAGCombine.h" |
| #include "llvm/CodeGen/ISDOpcodes.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineFunction.h" |
| #include "llvm/CodeGen/MachineMemOperand.h" |
| #include "llvm/CodeGen/RuntimeLibcalls.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" |
| #include "llvm/CodeGen/SelectionDAGNodes.h" |
| #include "llvm/CodeGen/SelectionDAGTargetInfo.h" |
| #include "llvm/CodeGen/TargetLowering.h" |
| #include "llvm/CodeGen/TargetRegisterInfo.h" |
| #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| #include "llvm/CodeGen/ValueTypes.h" |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/Constant.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/LLVMContext.h" |
| #include "llvm/IR/Metadata.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CodeGen.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/KnownBits.h" |
| #include "llvm/Support/MachineValueType.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include "llvm/Target/TargetOptions.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <cstdint> |
| #include <functional> |
| #include <iterator> |
| #include <string> |
| #include <tuple> |
| #include <utility> |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "dagcombine" |
| |
| STATISTIC(NodesCombined , "Number of dag nodes combined"); |
| STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); |
| STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); |
| STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); |
| STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); |
| STATISTIC(SlicedLoads, "Number of load sliced"); |
| STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops"); |
| |
| static cl::opt<bool> |
| CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, |
| cl::desc("Enable DAG combiner's use of IR alias analysis")); |
| |
| static cl::opt<bool> |
| UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), |
| cl::desc("Enable DAG combiner's use of TBAA")); |
| |
| #ifndef NDEBUG |
| static cl::opt<std::string> |
| CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, |
| cl::desc("Only use DAG-combiner alias analysis in this" |
| " function")); |
| #endif |
| |
| /// Hidden option to stress test load slicing, i.e., when this option |
| /// is enabled, load slicing bypasses most of its profitability guards. |
| static cl::opt<bool> |
| StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, |
| cl::desc("Bypass the profitability model of load slicing"), |
| cl::init(false)); |
| |
| static cl::opt<bool> |
| MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), |
| cl::desc("DAG combiner may split indexing from loads")); |
| |
| static cl::opt<bool> |
| EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), |
| cl::desc("DAG combiner enable merging multiple stores " |
| "into a wider store")); |
| |
| static cl::opt<unsigned> TokenFactorInlineLimit( |
| "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), |
| cl::desc("Limit the number of operands to inline for Token Factors")); |
| |
| static cl::opt<unsigned> StoreMergeDependenceLimit( |
| "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), |
| cl::desc("Limit the number of times for the same StoreNode and RootNode " |
| "to bail out in store merging dependence check")); |
| |
| namespace { |
| |
| class DAGCombiner { |
| SelectionDAG &DAG; |
| const TargetLowering &TLI; |
| CombineLevel Level; |
| CodeGenOpt::Level OptLevel; |
| bool LegalDAG = false; |
| bool LegalOperations = false; |
| bool LegalTypes = false; |
| bool ForCodeSize; |
| |
| /// Worklist of all of the nodes that need to be simplified. |
| /// |
| /// This must behave as a stack -- new nodes to process are pushed onto the |
| /// back and when processing we pop off of the back. |
| /// |
| /// The worklist will not contain duplicates but may contain null entries |
| /// due to nodes being deleted from the underlying DAG. |
| SmallVector<SDNode *, 64> Worklist; |
| |
| /// Mapping from an SDNode to its position on the worklist. |
| /// |
| /// This is used to find and remove nodes from the worklist (by nulling |
| /// them) when they are deleted from the underlying DAG. It relies on |
| /// stable indices of nodes within the worklist. |
| DenseMap<SDNode *, unsigned> WorklistMap; |
| /// This records all nodes attempted to add to the worklist since we |
| /// considered a new worklist entry. As we keep do not add duplicate nodes |
| /// in the worklist, this is different from the tail of the worklist. |
| SmallSetVector<SDNode *, 32> PruningList; |
| |
| /// Set of nodes which have been combined (at least once). |
| /// |
| /// This is used to allow us to reliably add any operands of a DAG node |
| /// which have not yet been combined to the worklist. |
| SmallPtrSet<SDNode *, 32> CombinedNodes; |
| |
| /// Map from candidate StoreNode to the pair of RootNode and count. |
| /// The count is used to track how many times we have seen the StoreNode |
| /// with the same RootNode bail out in dependence check. If we have seen |
| /// the bail out for the same pair many times over a limit, we won't |
| /// consider the StoreNode with the same RootNode as store merging |
| /// candidate again. |
| DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap; |
| |
| // AA - Used for DAG load/store alias analysis. |
| AliasAnalysis *AA; |
| |
| /// When an instruction is simplified, add all users of the instruction to |
| /// the work lists because they might get more simplified now. |
| void AddUsersToWorklist(SDNode *N) { |
| for (SDNode *Node : N->uses()) |
| AddToWorklist(Node); |
| } |
| |
| /// Convenient shorthand to add a node and all of its user to the worklist. |
| void AddToWorklistWithUsers(SDNode *N) { |
| AddUsersToWorklist(N); |
| AddToWorklist(N); |
| } |
| |
| // Prune potentially dangling nodes. This is called after |
| // any visit to a node, but should also be called during a visit after any |
| // failed combine which may have created a DAG node. |
| void clearAddedDanglingWorklistEntries() { |
| // Check any nodes added to the worklist to see if they are prunable. |
| while (!PruningList.empty()) { |
| auto *N = PruningList.pop_back_val(); |
| if (N->use_empty()) |
| recursivelyDeleteUnusedNodes(N); |
| } |
| } |
| |
| SDNode *getNextWorklistEntry() { |
| // Before we do any work, remove nodes that are not in use. |
| clearAddedDanglingWorklistEntries(); |
| SDNode *N = nullptr; |
| // The Worklist holds the SDNodes in order, but it may contain null |
| // entries. |
| while (!N && !Worklist.empty()) { |
| N = Worklist.pop_back_val(); |
| } |
| |
| if (N) { |
| bool GoodWorklistEntry = WorklistMap.erase(N); |
| (void)GoodWorklistEntry; |
| assert(GoodWorklistEntry && |
| "Found a worklist entry without a corresponding map entry!"); |
| } |
| return N; |
| } |
| |
| /// Call the node-specific routine that folds each particular type of node. |
| SDValue visit(SDNode *N); |
| |
| public: |
| DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) |
| : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), |
| OptLevel(OL), AA(AA) { |
| ForCodeSize = DAG.shouldOptForSize(); |
| |
| MaximumLegalStoreInBits = 0; |
| // We use the minimum store size here, since that's all we can guarantee |
| // for the scalable vector types. |
| for (MVT VT : MVT::all_valuetypes()) |
| if (EVT(VT).isSimple() && VT != MVT::Other && |
| TLI.isTypeLegal(EVT(VT)) && |
| VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits) |
| MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize(); |
| } |
| |
| void ConsiderForPruning(SDNode *N) { |
| // Mark this for potential pruning. |
| PruningList.insert(N); |
| } |
| |
| /// Add to the worklist making sure its instance is at the back (next to be |
| /// processed.) |
| void AddToWorklist(SDNode *N) { |
| assert(N->getOpcode() != ISD::DELETED_NODE && |
| "Deleted Node added to Worklist"); |
| |
| // Skip handle nodes as they can't usefully be combined and confuse the |
| // zero-use deletion strategy. |
| if (N->getOpcode() == ISD::HANDLENODE) |
| return; |
| |
| ConsiderForPruning(N); |
| |
| if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) |
| Worklist.push_back(N); |
| } |
| |
| /// Remove all instances of N from the worklist. |
| void removeFromWorklist(SDNode *N) { |
| CombinedNodes.erase(N); |
| PruningList.remove(N); |
| StoreRootCountMap.erase(N); |
| |
| auto It = WorklistMap.find(N); |
| if (It == WorklistMap.end()) |
| return; // Not in the worklist. |
| |
| // Null out the entry rather than erasing it to avoid a linear operation. |
| Worklist[It->second] = nullptr; |
| WorklistMap.erase(It); |
| } |
| |
| void deleteAndRecombine(SDNode *N); |
| bool recursivelyDeleteUnusedNodes(SDNode *N); |
| |
| /// Replaces all uses of the results of one DAG node with new values. |
| SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, |
| bool AddTo = true); |
| |
| /// Replaces all uses of the results of one DAG node with new values. |
| SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { |
| return CombineTo(N, &Res, 1, AddTo); |
| } |
| |
| /// Replaces all uses of the results of one DAG node with new values. |
| SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, |
| bool AddTo = true) { |
| SDValue To[] = { Res0, Res1 }; |
| return CombineTo(N, To, 2, AddTo); |
| } |
| |
| void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); |
| |
| private: |
| unsigned MaximumLegalStoreInBits; |
| |
| /// Check the specified integer node value to see if it can be simplified or |
| /// if things it uses can be simplified by bit propagation. |
| /// If so, return true. |
| bool SimplifyDemandedBits(SDValue Op) { |
| unsigned BitWidth = Op.getScalarValueSizeInBits(); |
| APInt DemandedBits = APInt::getAllOnesValue(BitWidth); |
| return SimplifyDemandedBits(Op, DemandedBits); |
| } |
| |
| bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) { |
| EVT VT = Op.getValueType(); |
| unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1; |
| APInt DemandedElts = APInt::getAllOnesValue(NumElts); |
| return SimplifyDemandedBits(Op, DemandedBits, DemandedElts); |
| } |
| |
| /// Check the specified vector node value to see if it can be simplified or |
| /// if things it uses can be simplified as it only uses some of the |
| /// elements. If so, return true. |
| bool SimplifyDemandedVectorElts(SDValue Op) { |
| unsigned NumElts = Op.getValueType().getVectorNumElements(); |
| APInt DemandedElts = APInt::getAllOnesValue(NumElts); |
| return SimplifyDemandedVectorElts(Op, DemandedElts); |
| } |
| |
| bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
| const APInt &DemandedElts); |
| bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, |
| bool AssumeSingleUse = false); |
| |
| bool CombineToPreIndexedLoadStore(SDNode *N); |
| bool CombineToPostIndexedLoadStore(SDNode *N); |
| SDValue SplitIndexingFromLoad(LoadSDNode *LD); |
| bool SliceUpLoad(SDNode *N); |
| |
| // Scalars have size 0 to distinguish from singleton vectors. |
| SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD); |
| bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val); |
| bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val); |
| |
| /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed |
| /// load. |
| /// |
| /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. |
| /// \param InVecVT type of the input vector to EVE with bitcasts resolved. |
| /// \param EltNo index of the vector element to load. |
| /// \param OriginalLoad load that EVE came from to be replaced. |
| /// \returns EVE on success SDValue() on failure. |
| SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, |
| SDValue EltNo, |
| LoadSDNode *OriginalLoad); |
| void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); |
| SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); |
| SDValue SExtPromoteOperand(SDValue Op, EVT PVT); |
| SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); |
| SDValue PromoteIntBinOp(SDValue Op); |
| SDValue PromoteIntShiftOp(SDValue Op); |
| SDValue PromoteExtend(SDValue Op); |
| bool PromoteLoad(SDValue Op); |
| |
| /// Call the node-specific routine that knows how to fold each |
| /// particular type of node. If that doesn't do anything, try the |
| /// target-specific DAG combines. |
| SDValue combine(SDNode *N); |
| |
| // Visitation implementation - Implement dag node combining for different |
| // node types. The semantics are as follows: |
| // Return Value: |
| // SDValue.getNode() == 0 - No change was made |
| // SDValue.getNode() == N - N was replaced, is dead and has been handled. |
| // otherwise - N should be replaced by the returned Operand. |
| // |
| SDValue visitTokenFactor(SDNode *N); |
| SDValue visitMERGE_VALUES(SDNode *N); |
| SDValue visitADD(SDNode *N); |
| SDValue visitADDLike(SDNode *N); |
| SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference); |
| SDValue visitSUB(SDNode *N); |
| SDValue visitADDSAT(SDNode *N); |
| SDValue visitSUBSAT(SDNode *N); |
| SDValue visitADDC(SDNode *N); |
| SDValue visitADDO(SDNode *N); |
| SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); |
| SDValue visitSUBC(SDNode *N); |
| SDValue visitSUBO(SDNode *N); |
| SDValue visitADDE(SDNode *N); |
| SDValue visitADDCARRY(SDNode *N); |
| SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); |
| SDValue visitSUBE(SDNode *N); |
| SDValue visitSUBCARRY(SDNode *N); |
| SDValue visitMUL(SDNode *N); |
| SDValue visitMULFIX(SDNode *N); |
| SDValue useDivRem(SDNode *N); |
| SDValue visitSDIV(SDNode *N); |
| SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); |
| SDValue visitUDIV(SDNode *N); |
| SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N); |
| SDValue visitREM(SDNode *N); |
| SDValue visitMULHU(SDNode *N); |
| SDValue visitMULHS(SDNode *N); |
| SDValue visitSMUL_LOHI(SDNode *N); |
| SDValue visitUMUL_LOHI(SDNode *N); |
| SDValue visitMULO(SDNode *N); |
| SDValue visitIMINMAX(SDNode *N); |
| SDValue visitAND(SDNode *N); |
| SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N); |
| SDValue visitOR(SDNode *N); |
| SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); |
| SDValue visitXOR(SDNode *N); |
| SDValue SimplifyVBinOp(SDNode *N); |
| SDValue visitSHL(SDNode *N); |
| SDValue visitSRA(SDNode *N); |
| SDValue visitSRL(SDNode *N); |
| SDValue visitFunnelShift(SDNode *N); |
| SDValue visitRotate(SDNode *N); |
| SDValue visitABS(SDNode *N); |
| SDValue visitBSWAP(SDNode *N); |
| SDValue visitBITREVERSE(SDNode *N); |
| SDValue visitCTLZ(SDNode *N); |
| SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); |
| SDValue visitCTTZ(SDNode *N); |
| SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); |
| SDValue visitCTPOP(SDNode *N); |
| SDValue visitSELECT(SDNode *N); |
| SDValue visitVSELECT(SDNode *N); |
| SDValue visitSELECT_CC(SDNode *N); |
| SDValue visitSETCC(SDNode *N); |
| SDValue visitSETCCCARRY(SDNode *N); |
| SDValue visitSIGN_EXTEND(SDNode *N); |
| SDValue visitZERO_EXTEND(SDNode *N); |
| SDValue visitANY_EXTEND(SDNode *N); |
| SDValue visitAssertExt(SDNode *N); |
| SDValue visitSIGN_EXTEND_INREG(SDNode *N); |
| SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); |
| SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); |
| SDValue visitTRUNCATE(SDNode *N); |
| SDValue visitBITCAST(SDNode *N); |
| SDValue visitBUILD_PAIR(SDNode *N); |
| SDValue visitFADD(SDNode *N); |
| SDValue visitFSUB(SDNode *N); |
| SDValue visitFMUL(SDNode *N); |
| SDValue visitFMA(SDNode *N); |
| SDValue visitFDIV(SDNode *N); |
| SDValue visitFREM(SDNode *N); |
| SDValue visitFSQRT(SDNode *N); |
| SDValue visitFCOPYSIGN(SDNode *N); |
| SDValue visitFPOW(SDNode *N); |
| SDValue visitSINT_TO_FP(SDNode *N); |
| SDValue visitUINT_TO_FP(SDNode *N); |
| SDValue visitFP_TO_SINT(SDNode *N); |
| SDValue visitFP_TO_UINT(SDNode *N); |
| SDValue visitFP_ROUND(SDNode *N); |
| SDValue visitFP_EXTEND(SDNode *N); |
| SDValue visitFNEG(SDNode *N); |
| SDValue visitFABS(SDNode *N); |
| SDValue visitFCEIL(SDNode *N); |
| SDValue visitFTRUNC(SDNode *N); |
| SDValue visitFFLOOR(SDNode *N); |
| SDValue visitFMINNUM(SDNode *N); |
| SDValue visitFMAXNUM(SDNode *N); |
| SDValue visitFMINIMUM(SDNode *N); |
| SDValue visitFMAXIMUM(SDNode *N); |
| SDValue visitBRCOND(SDNode *N); |
| SDValue visitBR_CC(SDNode *N); |
| SDValue visitLOAD(SDNode *N); |
| |
| SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); |
| SDValue replaceStoreOfFPConstant(StoreSDNode *ST); |
| |
| SDValue visitSTORE(SDNode *N); |
| SDValue visitLIFETIME_END(SDNode *N); |
| SDValue visitINSERT_VECTOR_ELT(SDNode *N); |
| SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); |
| SDValue visitBUILD_VECTOR(SDNode *N); |
| SDValue visitCONCAT_VECTORS(SDNode *N); |
| SDValue visitEXTRACT_SUBVECTOR(SDNode *N); |
| SDValue visitVECTOR_SHUFFLE(SDNode *N); |
| SDValue visitSCALAR_TO_VECTOR(SDNode *N); |
| SDValue visitINSERT_SUBVECTOR(SDNode *N); |
| SDValue visitMLOAD(SDNode *N); |
| SDValue visitMSTORE(SDNode *N); |
| SDValue visitMGATHER(SDNode *N); |
| SDValue visitMSCATTER(SDNode *N); |
| SDValue visitFP_TO_FP16(SDNode *N); |
| SDValue visitFP16_TO_FP(SDNode *N); |
| SDValue visitVECREDUCE(SDNode *N); |
| |
| SDValue visitFADDForFMACombine(SDNode *N); |
| SDValue visitFSUBForFMACombine(SDNode *N); |
| SDValue visitFMULForFMADistributiveCombine(SDNode *N); |
| |
| SDValue XformToShuffleWithZero(SDNode *N); |
| bool reassociationCanBreakAddressingModePattern(unsigned Opc, |
| const SDLoc &DL, SDValue N0, |
| SDValue N1); |
| SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0, |
| SDValue N1); |
| SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, |
| SDValue N1, SDNodeFlags Flags); |
| |
| SDValue visitShiftByConstant(SDNode *N); |
| |
| SDValue foldSelectOfConstants(SDNode *N); |
| SDValue foldVSelectOfConstants(SDNode *N); |
| SDValue foldBinOpIntoSelect(SDNode *BO); |
| bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); |
| SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N); |
| SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); |
| SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, |
| SDValue N2, SDValue N3, ISD::CondCode CC, |
| bool NotExtCompare = false); |
| SDValue convertSelectOfFPConstantsToLoadOffset( |
| const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, |
| ISD::CondCode CC); |
| SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, |
| SDValue N2, SDValue N3, ISD::CondCode CC); |
| SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, |
| const SDLoc &DL); |
| SDValue unfoldMaskedMerge(SDNode *N); |
| SDValue unfoldExtremeBitClearingToShifts(SDNode *N); |
| SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
| const SDLoc &DL, bool foldBooleans); |
| SDValue rebuildSetCC(SDValue N); |
| |
| bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, |
| SDValue &CC) const; |
| bool isOneUseSetCC(SDValue N) const; |
| bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); |
| |
| SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, |
| unsigned HiOp); |
| SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); |
| SDValue CombineExtLoad(SDNode *N); |
| SDValue CombineZExtLogicopShiftLoad(SDNode *N); |
| SDValue combineRepeatedFPDivisors(SDNode *N); |
| SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); |
| SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); |
| SDValue BuildSDIV(SDNode *N); |
| SDValue BuildSDIVPow2(SDNode *N); |
| SDValue BuildUDIV(SDNode *N); |
| SDValue BuildLogBase2(SDValue V, const SDLoc &DL); |
| SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); |
| SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); |
| SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); |
| SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); |
| SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, |
| SDNodeFlags Flags, bool Reciprocal); |
| SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, |
| SDNodeFlags Flags, bool Reciprocal); |
| SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, |
| bool DemandHighBits = true); |
| SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); |
| SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, |
| SDValue InnerPos, SDValue InnerNeg, |
| unsigned PosOpcode, unsigned NegOpcode, |
| const SDLoc &DL); |
| SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); |
| SDValue MatchLoadCombine(SDNode *N); |
| SDValue MatchStoreCombine(StoreSDNode *N); |
| SDValue ReduceLoadWidth(SDNode *N); |
| SDValue ReduceLoadOpStoreWidth(SDNode *N); |
| SDValue splitMergedValStore(StoreSDNode *ST); |
| SDValue TransformFPLoadStorePair(SDNode *N); |
| SDValue convertBuildVecZextToZext(SDNode *N); |
| SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); |
| SDValue reduceBuildVecToShuffle(SDNode *N); |
| SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, |
| ArrayRef<int> VectorMask, SDValue VecIn1, |
| SDValue VecIn2, unsigned LeftIdx, |
| bool DidSplitVec); |
| SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast); |
| |
| /// Walk up chain skipping non-aliasing memory nodes, |
| /// looking for aliasing nodes and adding them to the Aliases vector. |
| void GatherAllAliases(SDNode *N, SDValue OriginalChain, |
| SmallVectorImpl<SDValue> &Aliases); |
| |
| /// Return true if there is any possibility that the two addresses overlap. |
| bool isAlias(SDNode *Op0, SDNode *Op1) const; |
| |
| /// Walk up chain skipping non-aliasing memory nodes, looking for a better |
| /// chain (aliasing node.) |
| SDValue FindBetterChain(SDNode *N, SDValue Chain); |
| |
| /// Try to replace a store and any possibly adjacent stores on |
| /// consecutive chains with better chains. Return true only if St is |
| /// replaced. |
| /// |
| /// Notice that other chains may still be replaced even if the function |
| /// returns false. |
| bool findBetterNeighborChains(StoreSDNode *St); |
| |
| // Helper for findBetterNeighborChains. Walk up store chain add additional |
| // chained stores that do not overlap and can be parallelized. |
| bool parallelizeChainedStores(StoreSDNode *St); |
| |
| /// Holds a pointer to an LSBaseSDNode as well as information on where it |
| /// is located in a sequence of memory operations connected by a chain. |
| struct MemOpLink { |
| // Ptr to the mem node. |
| LSBaseSDNode *MemNode; |
| |
| // Offset from the base ptr. |
| int64_t OffsetFromBase; |
| |
| MemOpLink(LSBaseSDNode *N, int64_t Offset) |
| : MemNode(N), OffsetFromBase(Offset) {} |
| }; |
| |
| /// This is a helper function for visitMUL to check the profitability |
| /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). |
| /// MulNode is the original multiply, AddNode is (add x, c1), |
| /// and ConstNode is c2. |
| bool isMulAddWithConstProfitable(SDNode *MulNode, |
| SDValue &AddNode, |
| SDValue &ConstNode); |
| |
| /// This is a helper function for visitAND and visitZERO_EXTEND. Returns |
| /// true if the (and (load x) c) pattern matches an extload. ExtVT returns |
| /// the type of the loaded value to be extended. |
| bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, |
| EVT LoadResultTy, EVT &ExtVT); |
| |
| /// Helper function to calculate whether the given Load/Store can have its |
| /// width reduced to ExtVT. |
| bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType, |
| EVT &MemVT, unsigned ShAmt = 0); |
| |
| /// Used by BackwardsPropagateMask to find suitable loads. |
| bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads, |
| SmallPtrSetImpl<SDNode*> &NodesWithConsts, |
| ConstantSDNode *Mask, SDNode *&NodeToMask); |
| /// Attempt to propagate a given AND node back to load leaves so that they |
| /// can be combined into narrow loads. |
| bool BackwardsPropagateMask(SDNode *N); |
| |
| /// Helper function for MergeConsecutiveStores which merges the |
| /// component store chains. |
| SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, |
| unsigned NumStores); |
| |
| /// This is a helper function for MergeConsecutiveStores. When the |
| /// source elements of the consecutive stores are all constants or |
| /// all extracted vector elements, try to merge them into one |
| /// larger store introducing bitcasts if necessary. \return True |
| /// if a merged store was created. |
| bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, |
| EVT MemVT, unsigned NumStores, |
| bool IsConstantSrc, bool UseVector, |
| bool UseTrunc); |
| |
| /// This is a helper function for MergeConsecutiveStores. Stores |
| /// that potentially may be merged with St are placed in |
| /// StoreNodes. RootNode is a chain predecessor to all store |
| /// candidates. |
| void getStoreMergeCandidates(StoreSDNode *St, |
| SmallVectorImpl<MemOpLink> &StoreNodes, |
| SDNode *&Root); |
| |
| /// Helper function for MergeConsecutiveStores. Checks if |
| /// candidate stores have indirect dependency through their |
| /// operands. RootNode is the predecessor to all stores calculated |
| /// by getStoreMergeCandidates and is used to prune the dependency check. |
| /// \return True if safe to merge. |
| bool checkMergeStoreCandidatesForDependencies( |
| SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, |
| SDNode *RootNode); |
| |
| /// Merge consecutive store operations into a wide store. |
| /// This optimization uses wide integers or vectors when possible. |
| /// \return number of stores that were merged into a merged store (the |
| /// affected nodes are stored as a prefix in \p StoreNodes). |
| bool MergeConsecutiveStores(StoreSDNode *St); |
| |
| /// Try to transform a truncation where C is a constant: |
| /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) |
| /// |
| /// \p N needs to be a truncation and its first operand an AND. Other |
| /// requirements are checked by the function (e.g. that trunc is |
| /// single-use) and if missed an empty SDValue is returned. |
| SDValue distributeTruncateThroughAnd(SDNode *N); |
| |
| /// Helper function to determine whether the target supports operation |
| /// given by \p Opcode for type \p VT, that is, whether the operation |
| /// is legal or custom before legalizing operations, and whether is |
| /// legal (but not custom) after legalization. |
| bool hasOperation(unsigned Opcode, EVT VT) { |
| if (LegalOperations) |
| return TLI.isOperationLegal(Opcode, VT); |
| return TLI.isOperationLegalOrCustom(Opcode, VT); |
| } |
| |
| public: |
| /// Runs the dag combiner on all nodes in the work list |
| void Run(CombineLevel AtLevel); |
| |
| SelectionDAG &getDAG() const { return DAG; } |
| |
| /// Returns a type large enough to hold any valid shift amount - before type |
| /// legalization these can be huge. |
| EVT getShiftAmountTy(EVT LHSTy) { |
| assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); |
| return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes); |
| } |
| |
| /// This method returns true if we are running before type legalization or |
| /// if the specified VT is legal. |
| bool isTypeLegal(const EVT &VT) { |
| if (!LegalTypes) return true; |
| return TLI.isTypeLegal(VT); |
| } |
| |
| /// Convenience wrapper around TargetLowering::getSetCCResultType |
| EVT getSetCCResultType(EVT VT) const { |
| return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); |
| } |
| |
| void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, |
| SDValue OrigLoad, SDValue ExtLoad, |
| ISD::NodeType ExtType); |
| }; |
| |
| /// This class is a DAGUpdateListener that removes any deleted |
| /// nodes from the worklist. |
| class WorklistRemover : public SelectionDAG::DAGUpdateListener { |
| DAGCombiner &DC; |
| |
| public: |
| explicit WorklistRemover(DAGCombiner &dc) |
| : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} |
| |
| void NodeDeleted(SDNode *N, SDNode *E) override { |
| DC.removeFromWorklist(N); |
| } |
| }; |
| |
| class WorklistInserter : public SelectionDAG::DAGUpdateListener { |
| DAGCombiner &DC; |
| |
| public: |
| explicit WorklistInserter(DAGCombiner &dc) |
| : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} |
| |
| // FIXME: Ideally we could add N to the worklist, but this causes exponential |
| // compile time costs in large DAGs, e.g. Halide. |
| void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); } |
| }; |
| |
| } // end anonymous namespace |
| |
| //===----------------------------------------------------------------------===// |
| // TargetLowering::DAGCombinerInfo implementation |
| //===----------------------------------------------------------------------===// |
| |
| void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { |
| ((DAGCombiner*)DC)->AddToWorklist(N); |
| } |
| |
| SDValue TargetLowering::DAGCombinerInfo:: |
| CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { |
| return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); |
| } |
| |
| SDValue TargetLowering::DAGCombinerInfo:: |
| CombineTo(SDNode *N, SDValue Res, bool AddTo) { |
| return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); |
| } |
| |
| SDValue TargetLowering::DAGCombinerInfo:: |
| CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { |
| return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); |
| } |
| |
| bool TargetLowering::DAGCombinerInfo:: |
| recursivelyDeleteUnusedNodes(SDNode *N) { |
| return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N); |
| } |
| |
| void TargetLowering::DAGCombinerInfo:: |
| CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { |
| return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Helper Functions |
| //===----------------------------------------------------------------------===// |
| |
| void DAGCombiner::deleteAndRecombine(SDNode *N) { |
| removeFromWorklist(N); |
| |
| // If the operands of this node are only used by the node, they will now be |
| // dead. Make sure to re-visit them and recursively delete dead nodes. |
| for (const SDValue &Op : N->ops()) |
| // For an operand generating multiple values, one of the values may |
| // become dead allowing further simplification (e.g. split index |
| // arithmetic from an indexed load). |
| if (Op->hasOneUse() || Op->getNumValues() > 1) |
| AddToWorklist(Op.getNode()); |
| |
| DAG.DeleteNode(N); |
| } |
| |
| // APInts must be the same size for most operations, this helper |
| // function zero extends the shorter of the pair so that they match. |
| // We provide an Offset so that we can create bitwidths that won't overflow. |
| static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { |
| unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); |
| LHS = LHS.zextOrSelf(Bits); |
| RHS = RHS.zextOrSelf(Bits); |
| } |
| |
| // Return true if this node is a setcc, or is a select_cc |
| // that selects between the target values used for true and false, making it |
| // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to |
| // the appropriate nodes based on the type of node we are checking. This |
| // simplifies life a bit for the callers. |
| bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, |
| SDValue &CC) const { |
| if (N.getOpcode() == ISD::SETCC) { |
| LHS = N.getOperand(0); |
| RHS = N.getOperand(1); |
| CC = N.getOperand(2); |
| return true; |
| } |
| |
| if (N.getOpcode() != ISD::SELECT_CC || |
| !TLI.isConstTrueVal(N.getOperand(2).getNode()) || |
| !TLI.isConstFalseVal(N.getOperand(3).getNode())) |
| return false; |
| |
| if (TLI.getBooleanContents(N.getValueType()) == |
| TargetLowering::UndefinedBooleanContent) |
| return false; |
| |
| LHS = N.getOperand(0); |
| RHS = N.getOperand(1); |
| CC = N.getOperand(4); |
| return true; |
| } |
| |
| /// Return true if this is a SetCC-equivalent operation with only one use. |
| /// If this is true, it allows the users to invert the operation for free when |
| /// it is profitable to do so. |
| bool DAGCombiner::isOneUseSetCC(SDValue N) const { |
| SDValue N0, N1, N2; |
| if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) |
| return true; |
| return false; |
| } |
| |
| // Returns the SDNode if it is a constant float BuildVector |
| // or constant float. |
| static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { |
| if (isa<ConstantFPSDNode>(N)) |
| return N.getNode(); |
| if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) |
| return N.getNode(); |
| return nullptr; |
| } |
| |
| // Determines if it is a constant integer or a build vector of constant |
| // integers (and undefs). |
| // Do not permit build vector implicit truncation. |
| static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { |
| if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N)) |
| return !(Const->isOpaque() && NoOpaques); |
| if (N.getOpcode() != ISD::BUILD_VECTOR) |
| return false; |
| unsigned BitWidth = N.getScalarValueSizeInBits(); |
| for (const SDValue &Op : N->op_values()) { |
| if (Op.isUndef()) |
| continue; |
| ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op); |
| if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth || |
| (Const->isOpaque() && NoOpaques)) |
| return false; |
| } |
| return true; |
| } |
| |
| // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with |
| // undef's. |
| static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) { |
| if (V.getOpcode() != ISD::BUILD_VECTOR) |
| return false; |
| return isConstantOrConstantVector(V, NoOpaques) || |
| ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); |
| } |
| |
| bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, |
| const SDLoc &DL, |
| SDValue N0, |
| SDValue N1) { |
| // Currently this only tries to ensure we don't undo the GEP splits done by |
| // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this, |
| // we check if the following transformation would be problematic: |
| // (load/store (add, (add, x, offset1), offset2)) -> |
| // (load/store (add, x, offset1+offset2)). |
| |
| if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) |
| return false; |
| |
| if (N0.hasOneUse()) |
| return false; |
| |
| auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
| auto *C2 = dyn_cast<ConstantSDNode>(N1); |
| if (!C1 || !C2) |
| return false; |
| |
| const APInt &C1APIntVal = C1->getAPIntValue(); |
| const APInt &C2APIntVal = C2->getAPIntValue(); |
| if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64) |
| return false; |
| |
| const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; |
| if (CombinedValueIntVal.getBitWidth() > 64) |
| return false; |
| const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); |
| |
| for (SDNode *Node : N0->uses()) { |
| auto LoadStore = dyn_cast<MemSDNode>(Node); |
| if (LoadStore) { |
| // Is x[offset2] already not a legal addressing mode? If so then |
| // reassociating the constants breaks nothing (we test offset2 because |
| // that's the one we hope to fold into the load or store). |
| TargetLoweringBase::AddrMode AM; |
| AM.HasBaseReg = true; |
| AM.BaseOffs = C2APIntVal.getSExtValue(); |
| EVT VT = LoadStore->getMemoryVT(); |
| unsigned AS = LoadStore->getAddressSpace(); |
| Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); |
| if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) |
| continue; |
| |
| // Would x[offset1+offset2] still be a legal addressing mode? |
| AM.BaseOffs = CombinedValue; |
| if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression |
| // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc. |
| SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, |
| SDValue N0, SDValue N1) { |
| EVT VT = N0.getValueType(); |
| |
| if (N0.getOpcode() != Opc) |
| return SDValue(); |
| |
| // Don't reassociate reductions. |
| if (N0->getFlags().hasVectorReduction()) |
| return SDValue(); |
| |
| if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { |
| if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { |
| // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) |
| if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2)) |
| return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); |
| return SDValue(); |
| } |
| if (N0.hasOneUse()) { |
| // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) |
| // iff (op x, c1) has one use |
| SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); |
| if (!OpNode.getNode()) |
| return SDValue(); |
| return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); |
| } |
| } |
| return SDValue(); |
| } |
| |
| // Try to reassociate commutative binops. |
| SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, |
| SDValue N1, SDNodeFlags Flags) { |
| assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative."); |
| // Don't reassociate reductions. |
| if (Flags.hasVectorReduction()) |
| return SDValue(); |
| |
| // Floating-point reassociation is not allowed without loose FP math. |
| if (N0.getValueType().isFloatingPoint() || |
| N1.getValueType().isFloatingPoint()) |
| if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros()) |
| return SDValue(); |
| |
| if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1)) |
| return Combined; |
| if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0)) |
| return Combined; |
| return SDValue(); |
| } |
| |
| SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, |
| bool AddTo) { |
| assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); |
| ++NodesCombined; |
| LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; |
| To[0].getNode()->dump(&DAG); |
| dbgs() << " and " << NumTo - 1 << " other values\n"); |
| for (unsigned i = 0, e = NumTo; i != e; ++i) |
| assert((!To[i].getNode() || |
| N->getValueType(i) == To[i].getValueType()) && |
| "Cannot combine value to value of different type!"); |
| |
| WorklistRemover DeadNodes(*this); |
| DAG.ReplaceAllUsesWith(N, To); |
| if (AddTo) { |
| // Push the new nodes and any users onto the worklist |
| for (unsigned i = 0, e = NumTo; i != e; ++i) { |
| if (To[i].getNode()) { |
| AddToWorklist(To[i].getNode()); |
| AddUsersToWorklist(To[i].getNode()); |
| } |
| } |
| } |
| |
| // Finally, if the node is now dead, remove it from the graph. The node |
| // may not be dead if the replacement process recursively simplified to |
| // something else needing this node. |
| if (N->use_empty()) |
| deleteAndRecombine(N); |
| return SDValue(N, 0); |
| } |
| |
| void DAGCombiner:: |
| CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { |
| // Replace all uses. If any nodes become isomorphic to other nodes and |
| // are deleted, make sure to remove them from our worklist. |
| WorklistRemover DeadNodes(*this); |
| DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); |
| |
| // Push the new node and any (possibly new) users onto the worklist. |
| AddToWorklistWithUsers(TLO.New.getNode()); |
| |
| // Finally, if the node is now dead, remove it from the graph. The node |
| // may not be dead if the replacement process recursively simplified to |
| // something else needing this node. |
| if (TLO.Old.getNode()->use_empty()) |
| deleteAndRecombine(TLO.Old.getNode()); |
| } |
| |
| /// Check the specified integer node value to see if it can be simplified or if |
| /// things it uses can be simplified by bit propagation. If so, return true. |
| bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
| const APInt &DemandedElts) { |
| TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); |
| KnownBits Known; |
| if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO)) |
| return false; |
| |
| // Revisit the node. |
| AddToWorklist(Op.getNode()); |
| |
| // Replace the old value with the new one. |
| ++NodesCombined; |
| LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); |
| dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); |
| dbgs() << '\n'); |
| |
| CommitTargetLoweringOpt(TLO); |
| return true; |
| } |
| |
| /// Check the specified vector node value to see if it can be simplified or |
| /// if things it uses can be simplified as it only uses some of the elements. |
| /// If so, return true. |
| bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, |
| const APInt &DemandedElts, |
| bool AssumeSingleUse) { |
| TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); |
| APInt KnownUndef, KnownZero; |
| if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, |
| TLO, 0, AssumeSingleUse)) |
| return false; |
| |
| // Revisit the node. |
| AddToWorklist(Op.getNode()); |
| |
| // Replace the old value with the new one. |
| ++NodesCombined; |
| LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); |
| dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); |
| dbgs() << '\n'); |
| |
| CommitTargetLoweringOpt(TLO); |
| return true; |
| } |
| |
| void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { |
| SDLoc DL(Load); |
| EVT VT = Load->getValueType(0); |
| SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); |
| |
| LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; |
| Trunc.getNode()->dump(&DAG); dbgs() << '\n'); |
| WorklistRemover DeadNodes(*this); |
| DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); |
| DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); |
| deleteAndRecombine(Load); |
| AddToWorklist(Trunc.getNode()); |
| } |
| |
| SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { |
| Replace = false; |
| SDLoc DL(Op); |
| if (ISD::isUNINDEXEDLoad(Op.getNode())) { |
| LoadSDNode *LD = cast<LoadSDNode>(Op); |
| EVT MemVT = LD->getMemoryVT(); |
| ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD |
| : LD->getExtensionType(); |
| Replace = true; |
| return DAG.getExtLoad(ExtType, DL, PVT, |
| LD->getChain(), LD->getBasePtr(), |
| MemVT, LD->getMemOperand()); |
| } |
| |
| unsigned Opc = Op.getOpcode(); |
| switch (Opc) { |
| default: break; |
| case ISD::AssertSext: |
| if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT)) |
| return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1)); |
| break; |
| case ISD::AssertZext: |
| if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT)) |
| return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1)); |
| break; |
| case ISD::Constant: { |
| unsigned ExtOpc = |
| Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
| return DAG.getNode(ExtOpc, DL, PVT, Op); |
| } |
| } |
| |
| if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) |
| return SDValue(); |
| return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op); |
| } |
| |
| SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { |
| if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) |
| return SDValue(); |
| EVT OldVT = Op.getValueType(); |
| SDLoc DL(Op); |
| bool Replace = false; |
| SDValue NewOp = PromoteOperand(Op, PVT, Replace); |
| if (!NewOp.getNode()) |
| return SDValue(); |
| AddToWorklist(NewOp.getNode()); |
| |
| if (Replace) |
| ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); |
| return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp, |
| DAG.getValueType(OldVT)); |
| } |
| |
| SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { |
| EVT OldVT = Op.getValueType(); |
| SDLoc DL(Op); |
| bool Replace = false; |
| SDValue NewOp = PromoteOperand(Op, PVT, Replace); |
| if (!NewOp.getNode()) |
| return SDValue(); |
| AddToWorklist(NewOp.getNode()); |
| |
| if (Replace) |
| ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); |
| return DAG.getZeroExtendInReg(NewOp, DL, OldVT); |
| } |
| |
| /// Promote the specified integer binary operation if the target indicates it is |
| /// beneficial. e.g. On x86, it's usually better to promote i16 operations to |
| /// i32 since i16 instructions are longer. |
| SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { |
| if (!LegalOperations) |
| return SDValue(); |
| |
| EVT VT = Op.getValueType(); |
| if (VT.isVector() || !VT.isInteger()) |
| return SDValue(); |
| |
| // If operation type is 'undesirable', e.g. i16 on x86, consider |
| // promoting it. |
| unsigned Opc = Op.getOpcode(); |
| if (TLI.isTypeDesirableForOp(Opc, VT)) |
| return SDValue(); |
| |
| EVT PVT = VT; |
| // Consult target whether it is a good idea to promote this operation and |
| // what's the right type to promote it to. |
| if (TLI.IsDesirableToPromoteOp(Op, PVT)) { |
| assert(PVT != VT && "Don't know what type to promote to!"); |
| |
| LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); |
| |
| bool Replace0 = false; |
| SDValue N0 = Op.getOperand(0); |
| SDValue NN0 = PromoteOperand(N0, PVT, Replace0); |
| |
| bool Replace1 = false; |
| SDValue N1 = Op.getOperand(1); |
| SDValue NN1 = PromoteOperand(N1, PVT, Replace1); |
| SDLoc DL(Op); |
| |
| SDValue RV = |
| DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1)); |
| |
| // We are always replacing N0/N1's use in N and only need |
| // additional replacements if there are additional uses. |
| Replace0 &= !N0->hasOneUse(); |
| Replace1 &= (N0 != N1) && !N1->hasOneUse(); |
| |
| // Combine Op here so it is preserved past replacements. |
| CombineTo(Op.getNode(), RV); |
| |
| // If operands have a use ordering, make sure we deal with |
| // predecessor first. |
| if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) { |
| std::swap(N0, N1); |
| std::swap(NN0, NN1); |
| } |
| |
| if (Replace0) { |
| AddToWorklist(NN0.getNode()); |
| ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); |
| } |
| if (Replace1) { |
| AddToWorklist(NN1.getNode()); |
| ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); |
| } |
| return Op; |
| } |
| return SDValue(); |
| } |
| |
| /// Promote the specified integer shift operation if the target indicates it is |
| /// beneficial. e.g. On x86, it's usually better to promote i16 operations to |
| /// i32 since i16 instructions are longer. |
| SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { |
| if (!LegalOperations) |
| return SDValue(); |
| |
| EVT VT = Op.getValueType(); |
| if (VT.isVector() || !VT.isInteger()) |
| return SDValue(); |
| |
| // If operation type is 'undesirable', e.g. i16 on x86, consider |
| // promoting it. |
| unsigned Opc = Op.getOpcode(); |
| if (TLI.isTypeDesirableForOp(Opc, VT)) |
| return SDValue(); |
| |
| EVT PVT = VT; |
| // Consult target whether it is a good idea to promote this operation and |
| // what's the right type to promote it to. |
| if (TLI.IsDesirableToPromoteOp(Op, PVT)) { |
| assert(PVT != VT && "Don't know what type to promote to!"); |
| |
| LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); |
| |
| bool Replace = false; |
| SDValue N0 = Op.getOperand(0); |
| SDValue N1 = Op.getOperand(1); |
| if (Opc == ISD::SRA) |
| N0 = SExtPromoteOperand(N0, PVT); |
| else if (Opc == ISD::SRL) |
| N0 = ZExtPromoteOperand(N0, PVT); |
| else |
| N0 = PromoteOperand(N0, PVT, Replace); |
| |
| if (!N0.getNode()) |
| return SDValue(); |
| |
| SDLoc DL(Op); |
| SDValue RV = |
| DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); |
| |
| if (Replace) |
| ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); |
| |
| // Deal with Op being deleted. |
| if (Op && Op.getOpcode() != ISD::DELETED_NODE) |
| return RV; |
| } |
| return SDValue(); |
| } |
| |
| SDValue DAGCombiner::PromoteExtend(SDValue Op) { |
| if (!LegalOperations) |
| return SDValue(); |
| |
| EVT VT = Op.getValueType(); |
| if (VT.isVector() || !VT.isInteger()) |
| return SDValue(); |
| |
| // If operation type is 'undesirable', e.g. i16 on x86, consider |
| // promoting it. |
| unsigned Opc = Op.getOpcode(); |
| if (TLI.isTypeDesirableForOp(Opc, VT)) |
| return SDValue(); |
| |
| EVT PVT = VT; |
| // Consult target whether it is a good idea to promote this operation and |
| // what's the right type to promote it to. |
| if (TLI.IsDesirableToPromoteOp(Op, PVT)) { |
| assert(PVT != VT && "Don't know what type to promote to!"); |
| // fold (aext (aext x)) -> (aext x) |
| // fold (aext (zext x)) -> (zext x) |
| // fold (aext (sext x)) -> (sext x) |
| LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); |
| return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); |
| } |
| return SDValue(); |
| } |
| |
| bool DAGCombiner::PromoteLoad(SDValue Op) { |
| if (!LegalOperations) |
| return false; |
| |
| if (!ISD::isUNINDEXEDLoad(Op.getNode())) |
| return false; |
| |
| EVT VT = Op.getValueType(); |
| if (VT.isVector() || !VT.isInteger()) |
| return false; |
| |
| // If operation type is 'undesirable', e.g. i16 on x86, consider |
| // promoting it. |
| unsigned Opc = Op.getOpcode(); |
| if (TLI.isTypeDesirableForOp(Opc, VT)) |
| return false; |
| |
| EVT PVT = VT; |
| // Consult target whether it is a good idea to promote this operation and |
| // what's the right type to promote it to. |
| if (TLI.IsDesirableToPromoteOp(Op, PVT)) { |
| assert(PVT != VT && "Don't know what type to promote to!"); |
| |
| SDLoc DL(Op); |
| SDNode *N = Op.getNode(); |
| LoadSDNode *LD = cast<LoadSDNode>(N); |
| EVT MemVT = LD->getMemoryVT(); |
| ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD |
| : LD->getExtensionType(); |
| SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, |
| LD->getChain(), LD->getBasePtr(), |
| MemVT, LD->getMemOperand()); |
| SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); |
| |
| LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; |
| Result.getNode()->dump(&DAG); dbgs() << '\n'); |
| WorklistRemover DeadNodes(*this); |
| DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); |
| DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); |
| deleteAndRecombine(N); |
| AddToWorklist(Result.getNode()); |
| return true; |
| } |
| return false; |
| } |
| |
| /// Recursively delete a node which has no uses and any operands for |
| /// which it is the only use. |
| /// |
| /// Note that this both deletes the nodes and removes them from the worklist. |
| /// It also adds any nodes who have had a user deleted to the worklist as they |
| /// may now have only one use and subject to other combines. |
| bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { |
| if (!N->use_empty()) |
| return false; |
| |
| SmallSetVector<SDNode *, 16> Nodes; |
| Nodes.insert(N); |
| do { |
| N = Nodes.pop_back_val(); |
| if (!N) |
| continue; |
| |
| if (N->use_empty()) { |
| for (const SDValue &ChildN : N->op_values()) |
| Nodes.insert(ChildN.getNode()); |
| |
| removeFromWorklist(N); |
| DAG.DeleteNode(N); |
| } else { |
| AddToWorklist(N); |
| } |
| } while (!Nodes.empty()); |
| return true; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Main DAG Combiner implementation |
| //===----------------------------------------------------------------------===// |
| |
| void DAGCombiner::Run(CombineLevel AtLevel) { |
| // set the instance variables, so that the various visit routines may use it. |
| Level = AtLevel; |
| LegalDAG = Level >= AfterLegalizeDAG; |
| LegalOperations = Level >= AfterLegalizeVectorOps; |
| LegalTypes = Level >= AfterLegalizeTypes; |
| |
| WorklistInserter AddNodes(*this); |
| |
| // Add all the dag nodes to the worklist. |
| for (SDNode &Node : DAG.allnodes()) |
| AddToWorklist(&Node); |
| |
| // Create a dummy node (which is not added to allnodes), that adds a reference |
| // to the root node, preventing it from being deleted, and tracking any |
| // changes of the root. |
| HandleSDNode Dummy(DAG.getRoot()); |
| |
| // While we have a valid worklist entry node, try to combine it. |
| while (SDNode *N = getNextWorklistEntry()) { |
| // If N has no uses, it is dead. Make sure to revisit all N's operands once |
| // N is deleted from the DAG, since they too may now be dead or may have a |
| // reduced number of uses, allowing other xforms. |
| if (recursivelyDeleteUnusedNodes(N)) |
| continue; |
| |
| WorklistRemover DeadNodes(*this); |
| |
| // If this combine is running after legalizing the DAG, re-legalize any |
| // nodes pulled off the worklist. |
| if (LegalDAG) { |
| SmallSetVector<SDNode *, 16> UpdatedNodes; |
| bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); |
| |
| for (SDNode *LN : UpdatedNodes) |
| AddToWorklistWithUsers(LN); |
| |
| if (!NIsValid) |
| continue; |
| } |
| |
| LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); |
| |
| // Add any operands of the new node which have not yet been combined to the |
| // worklist as well. Because the worklist uniques things already, this |
| // won't repeatedly process the same operand. |
| CombinedNodes.insert(N); |
| for (const SDValue &ChildN : N->op_values()) |
| if (!CombinedNodes.count(ChildN.getNode())) |
| AddToWorklist(ChildN.getNode()); |
| |
| SDValue RV = combine(N); |
| |
| if (!RV.getNode()) |
| continue; |
| |
| ++NodesCombined; |
| |
| // If we get back the same node we passed in, rather than a new node or |
| // zero, we know that the node must have defined multiple values and |
| // CombineTo was used. Since CombineTo takes care of the worklist |
| // mechanics for us, we have no work to do in this case. |
| if (RV.getNode() == N) |
| continue; |
| |
| assert(N->getOpcode() != ISD::DELETED_NODE && |
| RV.getOpcode() != ISD::DELETED_NODE && |
| "Node was deleted but visit returned new node!"); |
| |
| LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); |
| |
| if (N->getNumValues() == RV.getNode()->getNumValues()) |
| DAG.ReplaceAllUsesWith(N, RV.getNode()); |
| else { |
| assert(N->getValueType(0) == RV.getValueType() && |
| N->getNumValues() == 1 && "Type mismatch"); |
| DAG.ReplaceAllUsesWith(N, &RV); |
| } |
| |
| // Push the new node and any users onto the worklist |
| AddToWorklist(RV.getNode()); |
| AddUsersToWorklist(RV.getNode()); |
| |
| // Finally, if the node is now dead, remove it from the graph. The node |
| // may not be dead if the replacement process recursively simplified to |
| // something else needing this node. This will also take care of adding any |
| // operands which have lost a user to the worklist. |
| recursivelyDeleteUnusedNodes(N); |
| } |
| |
| // If the root changed (e.g. it was a dead load, update the root). |
| DAG.setRoot(Dummy.getValue()); |
| DAG.RemoveDeadNodes(); |
| } |
| |
| SDValue DAGCombiner::visit(SDNode *N) { |
| switch (N->getOpcode()) { |
| default: break; |
| case ISD::TokenFactor: return visitTokenFactor(N); |
| case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); |
| case ISD::ADD: return visitADD(N); |
| case ISD::SUB: return visitSUB(N); |
| case ISD::SADDSAT: |
| case ISD::UADDSAT: return visitADDSAT(N); |
| case ISD::SSUBSAT: |
| case ISD::USUBSAT: return visitSUBSAT(N); |
| case ISD::ADDC: return visitADDC(N); |
| case ISD::SADDO: |
| case ISD::UADDO: return visitADDO(N); |
| case ISD::SUBC: return visitSUBC(N); |
| case ISD::SSUBO: |
| case ISD::USUBO: return visitSUBO(N); |
| case ISD::ADDE: return visitADDE(N); |
| case ISD::ADDCARRY: return visitADDCARRY(N); |
| case ISD::SUBE: return visitSUBE(N); |
| case ISD::SUBCARRY: return visitSUBCARRY(N); |
| case ISD::SMULFIX: |
| case ISD::SMULFIXSAT: |
| case ISD::UMULFIX: |
| case ISD::UMULFIXSAT: return visitMULFIX(N); |
| case ISD::MUL: return visitMUL(N); |
| case ISD::SDIV: return visitSDIV(N); |
| case ISD::UDIV: return visitUDIV(N); |
| case ISD::SREM: |
| case ISD::UREM: return visitREM(N); |
| case ISD::MULHU: return visitMULHU(N); |
| case ISD::MULHS: return visitMULHS(N); |
| case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); |
| case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); |
| case ISD::SMULO: |
| case ISD::UMULO: return visitMULO(N); |
| case ISD::SMIN: |
| case ISD::SMAX: |
| case ISD::UMIN: |
| case ISD::UMAX: return visitIMINMAX(N); |
| case ISD::AND: return visitAND(N); |
| case ISD::OR: return visitOR(N); |
| case ISD::XOR: return visitXOR(N); |
| case ISD::SHL: return visitSHL(N); |
| case ISD::SRA: return visitSRA(N); |
| case ISD::SRL: return visitSRL(N); |
| case ISD::ROTR: |
| case ISD::ROTL: return visitRotate(N); |
| case ISD::FSHL: |
| case ISD::FSHR: return visitFunnelShift(N); |
| case ISD::ABS: return visitABS(N); |
| case ISD::BSWAP: return visitBSWAP(N); |
| case ISD::BITREVERSE: return visitBITREVERSE(N); |
| case ISD::CTLZ: return visitCTLZ(N); |
| case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); |
| case ISD::CTTZ: return visitCTTZ(N); |
| case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); |
| case ISD::CTPOP: return visitCTPOP(N); |
| case ISD::SELECT: return visitSELECT(N); |
| case ISD::VSELECT: return visitVSELECT(N); |
| case ISD::SELECT_CC: return visitSELECT_CC(N); |
| case ISD::SETCC: return visitSETCC(N); |
| case ISD::SETCCCARRY: return visitSETCCCARRY(N); |
| case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); |
| case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); |
| case ISD::ANY_EXTEND: return visitANY_EXTEND(N); |
| case ISD::AssertSext: |
| case ISD::AssertZext: return visitAssertExt(N); |
| case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); |
| case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); |
| case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); |
| case ISD::TRUNCATE: return visitTRUNCATE(N); |
| case ISD::BITCAST: return visitBITCAST(N); |
| case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); |
| case ISD::FADD: return visitFADD(N); |
| case ISD::FSUB: return visitFSUB(N); |
| case ISD::FMUL: return visitFMUL(N); |
| case ISD::FMA: return visitFMA(N); |
| case ISD::FDIV: return visitFDIV(N); |
| case ISD::FREM: return visitFREM(N); |
| case ISD::FSQRT: return visitFSQRT(N); |
| case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); |
| case ISD::FPOW: return visitFPOW(N); |
| case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); |
| case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); |
| case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); |
| case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); |
| case ISD::FP_ROUND: return visitFP_ROUND(N); |
| case ISD::FP_EXTEND: return visitFP_EXTEND(N); |
| case ISD::FNEG: return visitFNEG(N); |
| case ISD::FABS: return visitFABS(N); |
| case ISD::FFLOOR: return visitFFLOOR(N); |
| case ISD::FMINNUM: return visitFMINNUM(N); |
| case ISD::FMAXNUM: return visitFMAXNUM(N); |
| case ISD::FMINIMUM: return visitFMINIMUM(N); |
| case ISD::FMAXIMUM: return visitFMAXIMUM(N); |
| case ISD::FCEIL: return visitFCEIL(N); |
| case ISD::FTRUNC: return visitFTRUNC(N); |
| case ISD::BRCOND: return visitBRCOND(N); |
| case ISD::BR_CC: return visitBR_CC(N); |
| case ISD::LOAD: return visitLOAD(N); |
| case ISD::STORE: return visitSTORE(N); |
| case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); |
| case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); |
| case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); |
| case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); |
| case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); |
| case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); |
| case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); |
| case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); |
| case ISD::MGATHER: return visitMGATHER(N); |
| case ISD::MLOAD: return visitMLOAD(N); |
| case ISD::MSCATTER: return visitMSCATTER(N); |
| case ISD::MSTORE: return visitMSTORE(N); |
| case ISD::LIFETIME_END: return visitLIFETIME_END(N); |
| case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); |
| case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); |
| case ISD::VECREDUCE_FADD: |
| case ISD::VECREDUCE_FMUL: |
| case ISD::VECREDUCE_ADD: |
| case ISD::VECREDUCE_MUL: |
| case ISD::VECREDUCE_AND: |
| case ISD::VECREDUCE_OR: |
| case ISD::VECREDUCE_XOR: |
| case ISD::VECREDUCE_SMAX: |
| case ISD::VECREDUCE_SMIN: |
| case ISD::VECREDUCE_UMAX: |
| case ISD::VECREDUCE_UMIN: |
| case ISD::VECREDUCE_FMAX: |
| case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N); |
| } |
| return SDValue(); |
| } |
| |
| SDValue DAGCombiner::combine(SDNode *N) { |
| SDValue RV = visit(N); |
| |
| // If nothing happened, try a target-specific DAG combine. |
| if (!RV.getNode()) { |
| assert(N->getOpcode() != ISD::DELETED_NODE && |
| "Node was deleted but visit returned NULL!"); |
| |
| if (N->getOpcode() >= ISD::BUILTIN_OP_END || |
| TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { |
| |
| // Expose the DAG combiner to the target combiner impls. |
| TargetLowering::DAGCombinerInfo |
| DagCombineInfo(DAG, Level, false, this); |
| |
| RV = TLI.PerformDAGCombine(N, DagCombineInfo); |
| } |
| } |
| |
| // If nothing happened still, try promoting the operation. |
| if (!RV.getNode()) { |
| switch (N->getOpcode()) { |
| default: break; |
| case ISD::ADD: |
| case ISD::SUB: |
| case ISD::MUL: |
| case ISD::AND: |
| case ISD::OR: |
| case ISD::XOR: |
| RV = PromoteIntBinOp(SDValue(N, 0)); |
| break; |
| case ISD::SHL: |
| case ISD::SRA: |
| case ISD::SRL: |
| RV = PromoteIntShiftOp(SDValue(N, 0)); |
| break; |
| case ISD::SIGN_EXTEND: |
| case ISD::ZERO_EXTEND: |
| case ISD::ANY_EXTEND: |
| RV = PromoteExtend(SDValue(N, 0)); |
| break; |
| case ISD::LOAD: |
| if (PromoteLoad(SDValue(N, 0))) |
| RV = SDValue(N, 0); |
| break; |
| } |
| } |
| |
| // If N is a commutative binary node, try to eliminate it if the commuted |
| // version is already present in the DAG. |
| if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && |
| N->getNumValues() == 1) { |
| SDValue N0 = N->getOperand(0); |
| SDValue N1 = N->getOperand(1); |
| |
| // Constant operands are canonicalized to RHS. |
| if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) { |
| SDValue Ops[] = {N1, N0}; |
| SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, |
| N->getFlags()); |
| if (CSENode) |
| return SDValue(CSENode, 0); |
| } |
| } |
| |
| return RV; |
| } |
| |
| /// Given a node, return its input chain if it has one, otherwise return a null |
| /// sd operand. |
| static SDValue getInputChainForNode(SDNode *N) { |
| if (unsigned NumOps = N->getNumOperands()) { |
| if (N->getOperand(0).getValueType() == MVT::Other) |
| return N->getOperand(0); |
| if (N->getOperand(NumOps-1).getValueType() == MVT::Other) |
| return N->getOperand(NumOps-1); |
| for (unsigned i = 1; i < NumOps-1; ++i) |
| if (N->getOperand(i).getValueType() == MVT::Other) |
| return N->getOperand(i); |
| } |
| return SDValue(); |
| } |
| |
| SDValue DAGCombiner::visitTokenFactor(SDNode *N) { |
| // If N has two operands, where one has an input chain equal to the other, |
| // the 'other' chain is redundant. |
| if (N->getNumOperands() == 2) { |
| if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) |
| return N->getOperand(0); |
| if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) |
| return N->getOperand(1); |
| } |
| |
| // Don't simplify token factors if optnone. |
| if (OptLevel == CodeGenOpt::None) |
| return SDValue(); |
| |
| // If the sole user is a token factor, we should make sure we have a |
| // chance to merge them together. This prevents TF chains from inhibiting |
| // optimizations. |
| if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor) |
| AddToWorklist(*(N->use_begin())); |
| |
| SmallVector<SDNode *, 8> TFs; // List of token factors to visit. |
| SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. |
| SmallPtrSet<SDNode*, 16> SeenOps; |
| bool Changed = false; // If we should replace this token factor. |
| |
| // Start out with this token factor. |
| TFs.push_back(N); |
| |
| // Iterate through token factors. The TFs grows when new token factors are |
| // encountered. |
| for (unsigned i = 0; i < TFs.size(); ++i) { |
| // Limit number of nodes to inline, to avoid quadratic compile times. |
| // We have to add the outstanding Token Factors to Ops, otherwise we might |
| // drop Ops from the resulting Token Factors. |
| if (Ops.size() > TokenFactorInlineLimit) { |
| for (unsigned j = i; j < TFs.size(); j++) |
| Ops.emplace_back(TFs[j], 0); |
| // Drop unprocessed Token Factors from TFs, so we do not add them to the |
| // combiner worklist later. |
| TFs.resize(i); |
| break; |
| } |
| |
| SDNode *TF = TFs[i]; |
| // Check each of the operands. |
| for (const SDValue &Op : TF->op_values()) { |
| switch (Op.getOpcode()) { |
| case ISD::EntryToken: |
| // Entry tokens don't need to be added to the list. They are |
| // redundant. |
| Changed = true; |
| break; |
| |
| case ISD::TokenFactor: |
| if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { |
| // Queue up for processing. |
| TFs.push_back(Op.getNode()); |
| Changed = true; |
| break; |
| } |
| LLVM_FALLTHROUGH; |
| |
| default: |
| // Only add if it isn't already in the list. |
| if (SeenOps.insert(Op.getNode()).second) |
| Ops.push_back(Op); |
| else |
| Changed = true; |
| break; |
| } |
| } |
| } |
| |
| // Re-visit inlined Token Factors, to clean them up in case they have been |
| // removed. Skip the first Token Factor, as this is the current node. |
| for (unsigned i = 1, e = TFs.size(); i < e; i++) |
| AddToWorklist(TFs[i]); |
| |
| // Remove Nodes that are chained to another node in the list. Do so |
| // by walking up chains breath-first stopping when we've seen |
| // another operand. In general we must climb to the EntryNode, but we can exit |
| // early if we find all remaining work is associated with just one operand as |
| // no further pruning is possible. |
| |
| // List of nodes to search through and original Ops from which they originate. |
| SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist; |
| SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op. |
| SmallPtrSet<SDNode *, 16> SeenChains; |
| bool DidPruneOps = false; |
| |
| unsigned NumLeftToConsider = 0; |
| for (const SDValue &Op : Ops) { |
| Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++)); |
| OpWorkCount.push_back(1); |
| } |
| |
| auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) { |
| // If this is an Op, we can remove the op from the list. Remark any |
| // search associated with it as from the current OpNumber. |
| if (SeenOps.count(Op) != 0) { |
| Changed = true; |
| DidPruneOps = true; |
| unsigned OrigOpNumber = 0; |
| while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op) |
| OrigOpNumber++; |
| assert((OrigOpNumber != Ops.size()) && |
| "expected to find TokenFactor Operand"); |
| // Re-mark worklist from OrigOpNumber to OpNumber |
| for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) { |
| if (Worklist[i].second == OrigOpNumber) { |
| Worklist[i].second = OpNumber; |
| } |
| } |
| OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber]; |
| OpWorkCount[OrigOpNumber] = 0; |
| NumLeftToConsider--; |
| } |
| // Add if it's a new chain |
| if (SeenChains.insert(Op).second) { |
| OpWorkCount[OpNumber]++; |
| Worklist.push_back(std::make_pair(Op, OpNumber)); |
| } |
| }; |
| |
| for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) { |
| // We need at least be consider at least 2 Ops to prune. |
| if (NumLeftToConsider <= 1) |
| break; |
| auto CurNode = Worklist[i].first; |
| auto CurOpNumber = Worklist[i].second; |
| assert((OpWorkCount[CurOpNumber] > 0) && |
| "Node should not appear in worklist"); |
| switch (CurNode->getOpcode()) { |
| case ISD::EntryToken: |
| // Hitting EntryToken is the only way for the search to terminate without |
| // hitting |
| // another operand's search. Prevent us from marking this operand |
| // considered. |
| NumLeftToConsider++; |
| break; |
| case ISD::TokenFactor: |
| for (const SDValue &Op : CurNode->op_values()) |
| AddToWorklist(i, Op.getNode(), CurOpNumber); |
| break; |
| case ISD::LIFETIME_START: |
| case ISD::LIFETIME_END: |
| case ISD::CopyFromReg: |
| case ISD::CopyToReg: |
| AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber); |
| break; |
| default: |
| if (auto *MemNode = dyn_cast<MemSDNode>(CurNode)) |
| AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber); |
| break; |
| } |
| OpWorkCount[CurOpNumber]--; |
| if (OpWorkCount[CurOpNumber] == 0) |
| NumLeftToConsider--; |
| } |
| |
| // If we've changed things around then replace token factor. |
| if (Changed) { |
| SDValue Result; |
| if (Ops.empty()) { |
| // The entry token is the only possible outcome. |
| Result = DAG.getEntryNode(); |
| } else { |
| if (DidPruneOps) { |
| SmallVector<SDValue, 8> PrunedOps; |
| // |
| for (const SDValue &Op : Ops) { |
| if (SeenChains.count(Op.getNode()) == 0) |
| PrunedOps.push_back(Op); |
| } |
| Result = DAG.getTokenFactor(SDLoc(N), PrunedOps); |
| } else { |
| Result = DAG.getTokenFactor(SDLoc(N), Ops); |
| } |
| } |
| return Result; |
| } |
| return SDValue(); |
| } |
| |
| /// MERGE_VALUES can always be eliminated. |
| SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { |
| WorklistRemover DeadNodes(*this); |
| // Replacing results may cause a different MERGE_VALUES to suddenly |
| // be CSE'd with N, and carry its uses with it. Iterate until no |
| // uses remain, to ensure that the node can be safely deleted. |
| // First add the users of this node to the work list so that they |
| // can be tried again once they have new operands. |
| AddUsersToWorklist(N); |
| do { |
| // Do as a single replacement to avoid rewalking use lists. |
| SmallVector<SDValue, 8> Ops; |
| for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) |
| Ops.push_back(N->getOperand(i)); |
| DAG.ReplaceAllUsesWith(N, Ops.data()); |
| } while (!N->use_empty()); |
| deleteAndRecombine(N); |
| return SDValue(N, 0); // Return N so it doesn't get rechecked! |
| } |
| |
| /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a |
| /// ConstantSDNode pointer else nullptr. |
| static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { |
| ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); |
| return Const != nullptr && !Const->isOpaque() ? Const : nullptr; |
| } |
| |
| SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { |
| assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && |
| "Unexpected binary operator"); |
| |
| // Don't do this unless the old select is going away. We want to eliminate the |
| // binary operator, not replace a binop with a select. |
| // TODO: Handle ISD::SELECT_CC. |
| unsigned SelOpNo = 0; |
| SDValue Sel = BO->getOperand(0); |
| if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { |
| SelOpNo = 1; |
| Sel = BO->getOperand(1); |
| } |
| |
| if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) |
| return SDValue(); |
| |
| SDValue CT = Sel.getOperand(1); |
| if (!isConstantOrConstantVector(CT, true) && |
| !isConstantFPBuildVectorOrConstantFP(CT)) |
| return SDValue(); |
| |
| SDValue CF = Sel.getOperand(2); |
| if (!isConstantOrConstantVector(CF, true) && |
| !isConstantFPBuildVectorOrConstantFP(CF)) |
| return SDValue(); |
| |
| // Bail out if any constants are opaque because we can't constant fold those. |
| // The exception is "and" and "or" with either 0 or -1 in which case we can |
| // propagate non constant operands into select. I.e.: |
| // and (select Cond, 0, -1), X --> select Cond, 0, X |
| // or X, (select Cond, -1, 0) --> select Cond, -1, X |
| auto BinOpcode = BO->getOpcode(); |
| bool CanFoldNonConst = |
| (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && |
| (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) && |
| (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF)); |
| |
| SDValue CBO = BO->getOperand(SelOpNo ^ 1); |
| if (!CanFoldNonConst && |
| !isConstantOrConstantVector(CBO, true) && |
| !isConstantFPBuildVectorOrConstantFP(CBO)) |
| return SDValue(); |
| |
| EVT VT = Sel.getValueType(); |
| |
| // In case of shift value and shift amount may have different VT. For instance |
| // on x86 shift amount is i8 regardles of LHS type. Bail out if we have |
| // swapped operands and value types do not match. NB: x86 is fine if operands |
| // are not swapped with shift amount VT being not bigger than shifted value. |
| // TODO: that is possible to check for a shift operation, correct VTs and |
| // still perform optimization on x86 if needed. |
| if (SelOpNo && VT != CBO.getValueType()) |
| return SDValue(); |
| |
| // We have a select-of-constants followed by a binary operator with a |
| // constant. Eliminate the binop by pulling the constant math into the select. |
| // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO |
| SDLoc DL(Sel); |
| SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) |
| : DAG.getNode(BinOpcode, DL, VT, CT, CBO); |
| if (!CanFoldNonConst && !NewCT.isUndef() && |
| !isConstantOrConstantVector(NewCT, true) && |
| !isConstantFPBuildVectorOrConstantFP(NewCT)) |
| return SDValue(); |
| |
| SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) |
| : DAG.getNode(BinOpcode, DL, VT, CF, CBO); |
| if (!CanFoldNonConst && !NewCF.isUndef() && |
| !isConstantOrConstantVector(NewCF, true) && |
| !isConstantFPBuildVectorOrConstantFP(NewCF)) |
| return SDValue(); |
| |
| SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); |
| SelectOp->setFlags(BO->getFlags()); |
| return SelectOp; |
| } |
| |
| static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { |
| assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && |
| "Expecting add or sub"); |
| |
| // Match a constant operand and a zext operand for the math instruction: |
| // add Z, C |
| // sub C, Z |
| bool IsAdd = N->getOpcode() == ISD::ADD; |
| SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0); |
| SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1); |
| auto *CN = dyn_cast<ConstantSDNode>(C); |
| if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND) |
| return SDValue(); |
| |
| // Match the zext operand as a setcc of a boolean. |
| if (Z.getOperand(0).getOpcode() != ISD::SETCC || |
| Z.getOperand(0).getValueType() != MVT::i1) |
| return SDValue(); |
| |
| // Match the compare as: setcc (X & 1), 0, eq. |
| SDValue SetCC = Z.getOperand(0); |
| ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); |
| if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) || |
| SetCC.getOperand(0).getOpcode() != ISD::AND || |
| !isOneConstant(SetCC.getOperand(0).getOperand(1))) |
| return SDValue(); |
| |
| // We are adding/subtracting a constant and an inverted low bit. Turn that |
| // into a subtract/add of the low bit with incremented/decremented constant: |
| // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1)) |
| // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1)) |
| EVT VT = C.getValueType(); |
| SDLoc DL(N); |
| SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT); |
| SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) : |
| DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); |
| return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); |
| } |
| |
| /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into |
| /// a shift and add with a different constant. |
| static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { |
| assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && |
| "Expecting add or sub"); |
| |
| // We need a constant operand for the add/sub, and the other operand is a |
| // logical shift right: add (srl), C or sub C, (srl). |
| // TODO - support non-uniform vector amounts. |
| bool IsAdd = N->getOpcode() == ISD::ADD; |
| SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); |
| SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); |
| ConstantSDNode *C = isConstOrConstSplat(ConstantOp); |
| if (!C || ShiftOp.getOpcode() != ISD::SRL) |
| return SDValue(); |
| |
| // The shift must be of a 'not' value. |
| SDValue Not = ShiftOp.getOperand(0); |
| if (!Not.hasOneUse() || !isBitwiseNot(Not)) |
| return SDValue(); |
| |
| // The shift must be moving the sign bit to the least-significant-bit. |
| EVT VT = ShiftOp.getValueType(); |
| SDValue ShAmt = ShiftOp.getOperand(1); |
| ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); |
| if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1)) |
| return SDValue(); |
| |
| // Eliminate the 'not' by adjusting the shift and add/sub constant: |
| // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) |
| // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) |
| SDLoc DL(N); |
| auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; |
| SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); |
| APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1; |
| return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); |
| } |
| |
| /// Try to fold a node that behaves like an ADD (note that N isn't necessarily |
| /// an ISD::ADD here, it could for example be an ISD::OR if we know that there |
| /// are no common bits set in the operands). |
| SDValue DAGCombiner::visitADDLike(SDNode *N) { |
| SDValue N0 = N->getOperand(0); |
| SDValue N1 = N->getOperand(1); |
| EVT VT = N0.getValueType(); |
| SDLoc DL(N); |
| |
| // fold vector ops |
| if (VT.isVector()) { |
| if (SDValue FoldedVOp = SimplifyVBinOp(N)) |
| return FoldedVOp; |
| |
| // fold (add x, 0) -> x, vector edition |
| if (ISD::isBuildVectorAllZeros(N1.getNode())) |
| return N0; |
| if (ISD::isBuildVectorAllZeros(N0.getNode())) |
| return N1; |
| } |
| |
| // fold (add x, undef) -> undef |
| if (N0.isUndef()) |
| return N0; |
| |
| if (N1.isUndef()) |
| return N1; |
| |
| if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { |
| // canonicalize constant to RHS |
| if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) |
| return DAG.getNode(ISD::ADD, DL, VT, N1, N0); |
| // fold (add c1, c2) -> c1+c2 |
| return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(), |
| N1.getNode()); |
| } |
| |
| // fold (add x, 0) -> x |
| if (isNullConstant(N1)) |
| return N0; |
| |
| if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { |
| // fold ((A-c1)+c2) -> (A+(c2-c1)) |
| if (N0.getOpcode() == ISD::SUB && |
| isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) { |
| SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(), |
| N0.getOperand(1).getNode()); |
| assert(Sub && "Constant folding failed"); |
| return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub); |
| } |
| |
| // fold ((c1-A)+c2) -> (c1+c2)-A |
| if (N0.getOpcode() == ISD::SUB && |
| isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { |
| SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(), |
| N0.getOperand(0).getNode()); |
| assert(Add && "Constant folding failed"); |
| return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); |
| } |
| |
| // add (sext i1 X), 1 -> zext (not i1 X) |
| // We don't transform this pattern: |
| // add (zext i1 X), -1 -> sext (not i1 X) |
| // because most (?) targets generate better code for the zext form. |
| if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && |
| isOneOrOneSplat(N1)) { |
| SDValue X = N0.getOperand(0); |
| if ((!LegalOperations || |
| (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && |
| TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && |
| X.getScalarValueSizeInBits() == 1) { |
| SDValue Not = DAG.getNOT(DL, X, X.getValueType()); |
| return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); |
| } |
| } |
| |
| // Undo the add -> or combine to merge constant offsets from a frame index. |
| if (N0.getOpcode() == ISD::OR && |
| isa<FrameIndexSDNode>(N0.getOperand(0)) && |
| isa<ConstantSDNode>(N0.getOperand(1)) && |
| DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { |
| SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); |
| return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); |
| } |
| } |
| |
| if (SDValue NewSel = foldBinOpIntoSelect(N)) |
| return NewSel; |
| |
| // reassociate add |
| if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) { |
| if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) |
| return RADD; |
| } |
| // fold ((0-A) + B) -> B-A |
| if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) |
| return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); |
| |
| // fold (A + (0-B)) -> A-B |
| if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) |
| return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); |
| |
| // fold (A+(B-A)) -> B |
| if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) |
| return N1.getOperand(0); |
| |
| // fold ((B-A)+A) -> B |
| if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) |
| return N0.getOperand(0); |
| |
| // fold ((A-B)+(C-A)) -> (C-B) |
| if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && |
| N0.getOperand(0) == N1.getOperand(1)) |
| return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), |
| N0.getOperand(1)); |
| |
| // fold ((A-B)+(B-C)) -> (A-C) |
| if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && |
| N0.getOperand(1) == N1.getOperand(0)) |
| return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), |
| N1.getOperand(1)); |
| |
| // fold (A+(B-(A+C))) to (B-C) |
| if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && |
| N0 == N1.getOperand(1).getOperand(0)) |
| return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), |
| N1.getOperand(1).getOperand(1)); |
| |
| // fold (A+(B-(C+A))) to (B-C) |
| if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && |
| N0 == N1.getOperand(1).getOperand(1)) |
| return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), |
| N1.getOperand(1).getOperand(0)); |
| |
| // fold (A+((B-A)+or-C)) to (B+or-C) |
| if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && |
| N1.getOperand(0).getOpcode() == ISD::SUB && |
| N0 == N1.getOperand(0).getOperand(1)) |
| return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0), |
| N1.getOperand(1)); |
| |
| // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant |
| if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { |
| SDValue N00 = N0.getOperand(0); |
| SDValue N01 = N0.getOperand(1); |
| SDValue N10 = N1.getOperand(0); |
| SDValue N11 = N1.getOperand(1); |
| |
| if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10)) |
| return DAG.getNode(ISD::SUB, DL, VT, |
| DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), |
| DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); |
| } |
| |
| // fold (add (umax X, C), -C) --> (usubsat X, C) |
| if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) { |
| auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) { |
| return (!Max && !Op) || |
| (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue())); |
| }; |
| if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT, |
| /*AllowUndefs*/ true)) |
| return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), |
| N0.getOperand(1)); |
| } |
| |
| if (SimplifyDemandedBits(SDValue(N, 0))) |
| return SDValue(N, 0); |
| |
| if (isOneOrOneSplat(N1)) { |
| // fold (add (xor a, -1), 1) -> (sub 0, a) |
| if (isBitwiseNot(N0)) |
| return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
| N0.getOperand(0)); |
| |
| // fold (add (add (xor a, -1), b), 1) -> (sub b, a) |
| if (N0.getOpcode() == ISD::ADD || |
| N0.getOpcode() == ISD::UADDO || |
| N0.getOpcode() == ISD::SADDO) { |
| SDValue A, Xor; |
| |
| if (isBitwiseNot(N0.getOperand(0))) { |
| A = N0.getOperand(1); |
| Xor = N0.getOperand(0); |
| } else if (isBitwiseNot(N0.getOperand(1))) { |
| A = N0.getOperand(0); |
| Xor = N0.getOperand(1); |
| } |
| |
| if (Xor) |
| return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0)); |
| } |
| |
| // Look for: |
| // add (add x, y), 1 |
| // And if the target does not like this form then turn into: |
| // sub y, (xor x, -1) |
| if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && |
| N0.getOpcode() == ISD::ADD) { |
| SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), |
| DAG.getAllOnesConstant(DL, VT)); |
| return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not); |
| } |
| } |
| |
| // (x - y) + -1 -> add (xor y, -1), x |
| if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && |
| isAllOnesOrAllOnesSplat(N1)) { |
| SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1); |
| return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); |
| } |
| |
| if (SDValue Combined = visitADDLikeCommutative(N0, N1, N)) |
| return Combined; |
| |
| if (SDValue Combined = visitADDLikeCommutative(N1, N0, N)) |
| return Combined; |
| |
| return SDValue(); |
| } |
| |
| SDValue DAGCombiner::visitADD(SDNode *N) { |
| SDValue N0 = N->getOperand(0); |
| SDValue N1 = N->getOperand(1); |
| EVT VT = N0.getValueType(); |
| SDLoc DL(N); |
| |
| if (SDValue Combined = visitADDLike(N)) |
| return Combined; |
| |
| if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) |
| return V; |
| |
| if (SDValue V = foldAddSubOfSignBit(N, DAG)) |
| return V; |
| |
| // fold (a+b) -> (a|b) iff a and b share no bits. |
| if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && |
| DAG.haveNoCommonBitsSet(N0, N1)) |
| return DAG.getNode(ISD::OR, DL, VT, N0, N1); |
| |
| return SDValue(); |
| } |
| |
| SDValue DAGCombiner::visitADDSAT(SDNode *N) { |
| unsigned Opcode = N->getOpcode(); |
| SDValue N0 = N->getOperand(0); |
| SDValue N1 = N->getOperand(1); |
| EVT VT = N0.getValueType(); |
| SDLoc DL(N); |
| |
| // fold vector ops |
| if (VT.isVector()) { |
| // TODO SimplifyVBinOp |
| |
| // fold (add_sat x, 0) -> x, vector edition |
| if (ISD::isBuildVectorAllZeros(N1.getNode())) |
| return N0; |
| if (ISD::isBuildVectorAllZeros(N0.getNode())) |
| return N1; |
| } |
| |
| // fold (add_sat x, undef) -> -1 |
| if (N0.isUndef() || N1.isUndef()) |
| return DAG.getAllOnesConstant(DL, VT); |
| |
| if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { |
| // canonicalize constant to RHS |
| if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) |
| return DAG.getNode(Opcode, DL, VT, N1, N0); |
| // fold (add_sat c1, c2) -> c3 |
| return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(), |
| N1.getNode()); |
| } |
| |
| // fold (add_sat x, 0) -> x |
| if (isNullConstant(N1)) |
| return N0; |
| |
| // If it cannot overflow, transform into an add. |
| if (Opcode == ISD::UADDSAT) |
| if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) |
| return DAG.getNode(ISD::ADD, DL, VT, N0, N1); |
| |
| return SDValue(); |
| } |
| |
| static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { |
| bool Masked = false; |
| |
| // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization. |
| while (true) { |
| if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) { |
| V = V.getOperand(0); |
| continue; |
| } |
| |
| if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) { |
| Masked = true; |
| V = V.getOperand(0); |
| continue; |
| } |
| |
| break; |
| } |
| |
| // If this is not a carry, return. |
| if (V.getResNo() != 1) |
| return SDValue(); |
| |
| if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY && |
| V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) |
| return SDValue(); |
| |
| EVT VT = V.getNode()->getValueType(0); |
| if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT)) |
| return SDValue(); |
| |
| // If the result is masked, then no matter what kind of bool it is we can |
| // return. If it isn't, then we need to make sure the bool type is either 0 or |
| // 1 and not other values. |
| if (Masked || |
| TLI.getBooleanContents(V.getValueType()) == |
| TargetLoweringBase::ZeroOrOneBooleanContent) |
| return V; |
| |
| return SDValue(); |
| } |
| |
| /// Given the operands of an add/sub operation, see if the 2nd operand is a |
| /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert |
| /// the opcode and bypass the mask operation. |
| static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, |
| SelectionDAG &DAG, const SDLoc &DL) { |
| if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1))) |
| return SDValue(); |
| |
| EVT VT = N0.getValueType(); |
| if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits()) |
| return SDValue(); |
| |
| // add N0, (and (AssertSext X, i1), 1) --> sub N0, X |
| // sub N0, (and (AssertSext X, i1), 1) --> add N0, X |
| return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0)); |
| } |
| |
| /// Helper for doing combines based on N0 and N1 being added to each other. |
| SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, |
| SDNode *LocReference) { |
| EVT VT = N0.getValueType(); |
| SDLoc DL(LocReference); |
| |
| // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) |
| if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && |
| isNullOrNullSplat(N1.getOperand(0).getOperand(0))) |
| return DAG.getNode(ISD::SUB, DL, VT, N0, |
| DAG.getNode(ISD::SHL, DL, VT, |
| N1.getOperand(0).getOperand(1), |
| N1.getOperand(1))); |
| |
| if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL)) |
| return V; |
| |
| // Look for: |
| // add (add x, 1), y |
| // And if the target does not like this form then turn into: |
| // sub y, (xor x, -1) |
| if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && |
| N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) { |
| SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), |
| DAG.getAllOnesConstant(DL, VT)); |
| return DAG.getNode(ISD::SUB, DL, VT, N1, Not); |
| } |
| |
| // Hoist one-use subtraction by non-opaque constant: |
| // (x - C) + y -> (x + y) - C |
| // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. |
| if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && |
| isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { |
| SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1); |
| return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); |
| } |
| // Hoist one-use subtraction from non-opaque constant: |
| // (C - x) + y -> (y - x) + C |
| if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && |
| isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { |
| SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); |
| return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0)); |
| } |
| |
| // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1' |
| // rather than 'add 0/-1' (the zext should get folded). |
| // add (sext i1 Y), X --> sub X, (zext i1 Y) |
| if (N0.getOpcode() == ISD::SIGN_EXTEND && |
| N0.getOperand(0).getScalarValueSizeInBits() == 1 && |
| TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) { |
| SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); |
| return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); |
| } |
| |
| // add X, (sextinreg Y i1) -> sub X, (and Y 1) |
| if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { |
| VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); |
| if (TN->getVT() == MVT::i1) { |
| SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), |
| DAG.getConstant(1, DL, VT)); |
| return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); |
| } |
| } |
| |
| // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) |
| if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) && |
| N1.getResNo() == 0) |
| return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), |
| N0, N1.getOperand(0), N1.getOperand(2)); |
| |
| // (add X, Carry) -> (addcarry X, 0, Carry) |
| if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) |
| if (SDValue Carry = getAsCarry(TLI, N1)) |
| return DAG.getNode(ISD::ADDCARRY, DL, |
| DAG.getVTList(VT, Carry.getValueType()), N0, |
| DAG.getConstant(0, DL, VT), Carry); |
| |
| return SDValue(); |
| } |
| |
| SDValue DAGCombiner::visitADDC(SDNode *N) { |
| SDValue N0 = N->getOperand(0); |
| SDValue N1 = N->getOperand(1); |
| EVT VT = N0.getValueType(); |
| SDLoc DL(N); |
| |
| // If the flag result is dead, turn this into an ADD. |
| if (!N->hasAnyUseOfValue(1)) |
| return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), |
| DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); |
| |
| // canonicalize constant to RHS. |
| ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); |
| ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); |
| if (N0C && !N1C) |
| return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0); |
| |
| // fold (addc x, 0) -> x + no carry out |
| if (isNullConstant(N1)) |
| return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, |
| DL, MVT::Glue)); |
| |
| // If it cannot overflow, transform into an add. |
| if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) |
| return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), |
| DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); |
| |
| return SDValue(); |
| } |
| |
| static SDValue flipBoolean(SDValue V, const SDLoc &DL, |
| SelectionDAG &DAG, const TargetLowering &TLI) { |
| EVT VT = V.getValueType(); |
| |
| SDValue Cst; |
| switch (TLI.getBooleanContents(VT)) { |
| case TargetLowering::ZeroOrOneBooleanContent: |
| case TargetLowering::UndefinedBooleanContent: |
| Cst = DAG.getConstant(1, DL, VT); |
| break; |
| case TargetLowering::ZeroOrNegativeOneBooleanContent: |
| Cst = DAG.getAllOnesConstant(DL, VT); |
| break; |
| } |
| |
| return DAG.getNode(ISD::XOR, DL, VT, V, Cst); |
| } |
| |
| /** |
| * Flips a boolean if it is cheaper to compute. If the Force parameters is set, |
| * then the flip also occurs if computing the inverse is the same cost. |
| * This function returns an empty SDValue in case it cannot flip the boolean |
| * without increasing the cost of the computation. If you want to flip a boolean |
| * no matter what, use flipBoolean. |
| */ |
| static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, |
| const TargetLowering &TLI, |
| bool Force) { |
| if (Force && isa<ConstantSDNode>(V)) |
| return flipBoolean(V, SDLoc(V), DAG, TLI); |
| |
| if (V.getOpcode() != ISD::XOR) |
| return SDValue(); |
| |
| ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false); |
| if (!Const) |
| return SDValue(); |
| |
| EVT VT = V.getValueType(); |
| |
| bool IsFlip = false; |
| switch(TLI.getBooleanContents(VT)) { |
| case TargetLowering::ZeroOrOneBooleanContent: |