| //===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a CFL-base, summary-based alias analysis algorithm. It |
| // does not depend on types. The algorithm is a mixture of the one described in |
| // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast |
| // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by |
| // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a |
| // graph of the uses of a variable, where each node is a memory location, and |
| // each edge is an action that happened on that memory location. The "actions" |
| // can be one of Dereference, Reference, or Assign. The precision of this |
| // analysis is roughly the same as that of an one level context-sensitive |
| // Steensgaard's algorithm. |
| // |
| // Two variables are considered as aliasing iff you can reach one value's node |
| // from the other value's node and the language formed by concatenating all of |
| // the edge labels (actions) conforms to a context-free grammar. |
| // |
| // Because this algorithm requires a graph search on each query, we execute the |
| // algorithm outlined in "Fast algorithms..." (mentioned above) |
| // in order to transform the graph into sets of variables that may alias in |
| // ~nlogn time (n = number of variables), which makes queries take constant |
| // time. |
| //===----------------------------------------------------------------------===// |
| |
| // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and |
| // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because |
| // FunctionPasses are only allowed to inspect the Function that they're being |
| // run on. Realistically, this likely isn't a problem until we allow |
| // FunctionPasses to run concurrently. |
| |
| #include "llvm/Analysis/CFLSteensAliasAnalysis.h" |
| #include "AliasAnalysisSummary.h" |
| #include "CFLGraph.h" |
| #include "StratifiedSets.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/Optional.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/Analysis/TargetLibraryInfo.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/InitializePasses.h" |
| #include "llvm/Pass.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <algorithm> |
| #include <cassert> |
| #include <limits> |
| #include <memory> |
| #include <utility> |
| |
| using namespace llvm; |
| using namespace llvm::cflaa; |
| |
| #define DEBUG_TYPE "cfl-steens-aa" |
| |
| CFLSteensAAResult::CFLSteensAAResult( |
| std::function<const TargetLibraryInfo &(Function &F)> GetTLI) |
| : AAResultBase(), GetTLI(std::move(GetTLI)) {} |
| CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) |
| : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {} |
| CFLSteensAAResult::~CFLSteensAAResult() = default; |
| |
| /// Information we have about a function and would like to keep around. |
| class CFLSteensAAResult::FunctionInfo { |
| StratifiedSets<InstantiatedValue> Sets; |
| AliasSummary Summary; |
| |
| public: |
| FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals, |
| StratifiedSets<InstantiatedValue> S); |
| |
| const StratifiedSets<InstantiatedValue> &getStratifiedSets() const { |
| return Sets; |
| } |
| |
| const AliasSummary &getAliasSummary() const { return Summary; } |
| }; |
| |
| const StratifiedIndex StratifiedLink::SetSentinel = |
| std::numeric_limits<StratifiedIndex>::max(); |
| |
| //===----------------------------------------------------------------------===// |
| // Function declarations that require types defined in the namespace above |
| //===----------------------------------------------------------------------===// |
| |
| /// Determines whether it would be pointless to add the given Value to our sets. |
| static bool canSkipAddingToSets(Value *Val) { |
| // Constants can share instances, which may falsely unify multiple |
| // sets, e.g. in |
| // store i32* null, i32** %ptr1 |
| // store i32* null, i32** %ptr2 |
| // clearly ptr1 and ptr2 should not be unified into the same set, so |
| // we should filter out the (potentially shared) instance to |
| // i32* null. |
| if (isa<Constant>(Val)) { |
| // TODO: Because all of these things are constant, we can determine whether |
| // the data is *actually* mutable at graph building time. This will probably |
| // come for free/cheap with offset awareness. |
| bool CanStoreMutableData = isa<GlobalValue>(Val) || |
| isa<ConstantExpr>(Val) || |
| isa<ConstantAggregate>(Val); |
| return !CanStoreMutableData; |
| } |
| |
| return false; |
| } |
| |
| CFLSteensAAResult::FunctionInfo::FunctionInfo( |
| Function &Fn, const SmallVectorImpl<Value *> &RetVals, |
| StratifiedSets<InstantiatedValue> S) |
| : Sets(std::move(S)) { |
| // Historically, an arbitrary upper-bound of 50 args was selected. We may want |
| // to remove this if it doesn't really matter in practice. |
| if (Fn.arg_size() > MaxSupportedArgsInSummary) |
| return; |
| |
| DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap; |
| |
| // Our intention here is to record all InterfaceValues that share the same |
| // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we |
| // have its StratifiedIndex scanned here and check if the index is presented |
| // in InterfaceMap: if it is not, we add the correspondence to the map; |
| // otherwise, an aliasing relation is found and we add it to |
| // RetParamRelations. |
| |
| auto AddToRetParamRelations = [&](unsigned InterfaceIndex, |
| StratifiedIndex SetIndex) { |
| unsigned Level = 0; |
| while (true) { |
| InterfaceValue CurrValue{InterfaceIndex, Level}; |
| |
| auto Itr = InterfaceMap.find(SetIndex); |
| if (Itr != InterfaceMap.end()) { |
| if (CurrValue != Itr->second) |
| Summary.RetParamRelations.push_back( |
| ExternalRelation{CurrValue, Itr->second, UnknownOffset}); |
| break; |
| } |
| |
| auto &Link = Sets.getLink(SetIndex); |
| InterfaceMap.insert(std::make_pair(SetIndex, CurrValue)); |
| auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs); |
| if (ExternalAttrs.any()) |
| Summary.RetParamAttributes.push_back( |
| ExternalAttribute{CurrValue, ExternalAttrs}); |
| |
| if (!Link.hasBelow()) |
| break; |
| |
| ++Level; |
| SetIndex = Link.Below; |
| } |
| }; |
| |
| // Populate RetParamRelations for return values |
| for (auto *RetVal : RetVals) { |
| assert(RetVal != nullptr); |
| assert(RetVal->getType()->isPointerTy()); |
| auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0}); |
| if (RetInfo.hasValue()) |
| AddToRetParamRelations(0, RetInfo->Index); |
| } |
| |
| // Populate RetParamRelations for parameters |
| unsigned I = 0; |
| for (auto &Param : Fn.args()) { |
| if (Param.getType()->isPointerTy()) { |
| auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0}); |
| if (ParamInfo.hasValue()) |
| AddToRetParamRelations(I + 1, ParamInfo->Index); |
| } |
| ++I; |
| } |
| } |
| |
| // Builds the graph + StratifiedSets for a function. |
| CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) { |
| CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn); |
| StratifiedSetsBuilder<InstantiatedValue> SetBuilder; |
| |
| // Add all CFLGraph nodes and all Dereference edges to StratifiedSets |
| auto &Graph = GraphBuilder.getCFLGraph(); |
| for (const auto &Mapping : Graph.value_mappings()) { |
| auto Val = Mapping.first; |
| if (canSkipAddingToSets(Val)) |
| continue; |
| auto &ValueInfo = Mapping.second; |
| |
| assert(ValueInfo.getNumLevels() > 0); |
| SetBuilder.add(InstantiatedValue{Val, 0}); |
| SetBuilder.noteAttributes(InstantiatedValue{Val, 0}, |
| ValueInfo.getNodeInfoAtLevel(0).Attr); |
| for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) { |
| SetBuilder.add(InstantiatedValue{Val, I + 1}); |
| SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1}, |
| ValueInfo.getNodeInfoAtLevel(I + 1).Attr); |
| SetBuilder.addBelow(InstantiatedValue{Val, I}, |
| InstantiatedValue{Val, I + 1}); |
| } |
| } |
| |
| // Add all assign edges to StratifiedSets |
| for (const auto &Mapping : Graph.value_mappings()) { |
| auto Val = Mapping.first; |
| if (canSkipAddingToSets(Val)) |
| continue; |
| auto &ValueInfo = Mapping.second; |
| |
| for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { |
| auto Src = InstantiatedValue{Val, I}; |
| for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) |
| SetBuilder.addWith(Src, Edge.Other); |
| } |
| } |
| |
| return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build()); |
| } |
| |
| void CFLSteensAAResult::scan(Function *Fn) { |
| auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>())); |
| (void)InsertPair; |
| assert(InsertPair.second && |
| "Trying to scan a function that has already been cached"); |
| |
| // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call |
| // may get evaluated after operator[], potentially triggering a DenseMap |
| // resize and invalidating the reference returned by operator[] |
| auto FunInfo = buildSetsFrom(Fn); |
| Cache[Fn] = std::move(FunInfo); |
| |
| Handles.emplace_front(Fn, this); |
| } |
| |
| void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); } |
| |
| /// Ensures that the given function is available in the cache, and returns the |
| /// entry. |
| const Optional<CFLSteensAAResult::FunctionInfo> & |
| CFLSteensAAResult::ensureCached(Function *Fn) { |
| auto Iter = Cache.find(Fn); |
| if (Iter == Cache.end()) { |
| scan(Fn); |
| Iter = Cache.find(Fn); |
| assert(Iter != Cache.end()); |
| assert(Iter->second.hasValue()); |
| } |
| return Iter->second; |
| } |
| |
| const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) { |
| auto &FunInfo = ensureCached(&Fn); |
| if (FunInfo.hasValue()) |
| return &FunInfo->getAliasSummary(); |
| else |
| return nullptr; |
| } |
| |
| AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, |
| const MemoryLocation &LocB) { |
| auto *ValA = const_cast<Value *>(LocA.Ptr); |
| auto *ValB = const_cast<Value *>(LocB.Ptr); |
| |
| if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy()) |
| return NoAlias; |
| |
| Function *Fn = nullptr; |
| Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA)); |
| Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB)); |
| if (!MaybeFnA && !MaybeFnB) { |
| // The only times this is known to happen are when globals + InlineAsm are |
| // involved |
| LLVM_DEBUG( |
| dbgs() |
| << "CFLSteensAA: could not extract parent function information.\n"); |
| return MayAlias; |
| } |
| |
| if (MaybeFnA) { |
| Fn = MaybeFnA; |
| assert((!MaybeFnB || MaybeFnB == MaybeFnA) && |
| "Interprocedural queries not supported"); |
| } else { |
| Fn = MaybeFnB; |
| } |
| |
| assert(Fn != nullptr); |
| auto &MaybeInfo = ensureCached(Fn); |
| assert(MaybeInfo.hasValue()); |
| |
| auto &Sets = MaybeInfo->getStratifiedSets(); |
| auto MaybeA = Sets.find(InstantiatedValue{ValA, 0}); |
| if (!MaybeA.hasValue()) |
| return MayAlias; |
| |
| auto MaybeB = Sets.find(InstantiatedValue{ValB, 0}); |
| if (!MaybeB.hasValue()) |
| return MayAlias; |
| |
| auto SetA = *MaybeA; |
| auto SetB = *MaybeB; |
| auto AttrsA = Sets.getLink(SetA.Index).Attrs; |
| auto AttrsB = Sets.getLink(SetB.Index).Attrs; |
| |
| // If both values are local (meaning the corresponding set has attribute |
| // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them: |
| // they may-alias each other if and only if they are in the same set. |
| // If at least one value is non-local (meaning it either is global/argument or |
| // it comes from unknown sources like integer cast), the situation becomes a |
| // bit more interesting. We follow three general rules described below: |
| // - Non-local values may alias each other |
| // - AttrNone values do not alias any non-local values |
| // - AttrEscaped do not alias globals/arguments, but they may alias |
| // AttrUnknown values |
| if (SetA.Index == SetB.Index) |
| return MayAlias; |
| if (AttrsA.none() || AttrsB.none()) |
| return NoAlias; |
| if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB)) |
| return MayAlias; |
| if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB)) |
| return MayAlias; |
| return NoAlias; |
| } |
| |
| AnalysisKey CFLSteensAA::Key; |
| |
| CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) { |
| auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & { |
| return AM.getResult<TargetLibraryAnalysis>(F); |
| }; |
| return CFLSteensAAResult(GetTLI); |
| } |
| |
| char CFLSteensAAWrapperPass::ID = 0; |
| INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa", |
| "Unification-Based CFL Alias Analysis", false, true) |
| |
| ImmutablePass *llvm::createCFLSteensAAWrapperPass() { |
| return new CFLSteensAAWrapperPass(); |
| } |
| |
| CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) { |
| initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry()); |
| } |
| |
| void CFLSteensAAWrapperPass::initializePass() { |
| auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { |
| return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
| }; |
| Result.reset(new CFLSteensAAResult(GetTLI)); |
| } |
| |
| void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
| AU.setPreservesAll(); |
| AU.addRequired<TargetLibraryInfoWrapperPass>(); |
| } |