| //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file is a part of SanitizerBinaryMetadata. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/Triple.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/IR/Constant.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/IR/GlobalVariable.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/LLVMContext.h" |
| #include "llvm/IR/MDBuilder.h" |
| #include "llvm/IR/Metadata.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/InitializePasses.h" |
| #include "llvm/Pass.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Transforms/Instrumentation.h" |
| #include "llvm/Transforms/Utils/ModuleUtils.h" |
| |
| #include <array> |
| #include <cstdint> |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "sanmd" |
| |
| namespace { |
| |
| //===--- Constants --------------------------------------------------------===// |
| |
| constexpr uint32_t kVersionBase = 1; // occupies lower 16 bits |
| constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized |
| constexpr int kCtorDtorPriority = 2; |
| |
| // Pairs of names of initialization callback functions and which section |
| // contains the relevant metadata. |
| class MetadataInfo { |
| public: |
| const StringRef FunctionPrefix; |
| const StringRef SectionSuffix; |
| const uint32_t FeatureMask; |
| |
| static const MetadataInfo Covered; |
| static const MetadataInfo Atomics; |
| |
| private: |
| // Forbid construction elsewhere. |
| explicit constexpr MetadataInfo(StringRef FunctionPrefix, |
| StringRef SectionSuffix, uint32_t Feature) |
| : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix), |
| FeatureMask(Feature) {} |
| }; |
| const MetadataInfo MetadataInfo::Covered{"__sanitizer_metadata_covered", |
| kSanitizerBinaryMetadataCoveredSection, |
| kSanitizerBinaryMetadataNone}; |
| const MetadataInfo MetadataInfo::Atomics{"__sanitizer_metadata_atomics", |
| kSanitizerBinaryMetadataAtomicsSection, |
| kSanitizerBinaryMetadataAtomics}; |
| |
| // The only instances of MetadataInfo are the constants above, so a set of |
| // them may simply store pointers to them. To deterministically generate code, |
| // we need to use a set with stable iteration order, such as SetVector. |
| using MetadataInfoSet = SetVector<const MetadataInfo *>; |
| |
| //===--- Command-line options ---------------------------------------------===// |
| |
| cl::opt<bool> ClWeakCallbacks( |
| "sanitizer-metadata-weak-callbacks", |
| cl::desc("Declare callbacks extern weak, and only call if non-null."), |
| cl::Hidden, cl::init(true)); |
| |
| cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered", |
| cl::desc("Emit PCs for covered functions."), |
| cl::Hidden, cl::init(false)); |
| cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics", |
| cl::desc("Emit PCs for atomic operations."), |
| cl::Hidden, cl::init(false)); |
| cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar", |
| cl::desc("Emit PCs for start of functions that are " |
| "subject for use-after-return checking"), |
| cl::Hidden, cl::init(false)); |
| |
| //===--- Statistics -------------------------------------------------------===// |
| |
| STATISTIC(NumMetadataCovered, "Metadata attached to covered functions"); |
| STATISTIC(NumMetadataAtomics, "Metadata attached to atomics"); |
| STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions"); |
| |
| //===----------------------------------------------------------------------===// |
| |
| // Apply opt overrides. |
| SanitizerBinaryMetadataOptions && |
| transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) { |
| Opts.Covered |= ClEmitCovered; |
| Opts.Atomics |= ClEmitAtomics; |
| Opts.UAR |= ClEmitUAR; |
| return std::move(Opts); |
| } |
| |
| class SanitizerBinaryMetadata { |
| public: |
| SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts) |
| : Mod(M), Options(transformOptionsFromCl(std::move(Opts))), |
| TargetTriple(M.getTargetTriple()), IRB(M.getContext()) { |
| // FIXME: Make it work with other formats. |
| assert(TargetTriple.isOSBinFormatELF() && "ELF only"); |
| } |
| |
| bool run(); |
| |
| private: |
| // Return enabled feature mask of per-instruction metadata. |
| uint32_t getEnabledPerInstructionFeature() const { |
| uint32_t FeatureMask = 0; |
| if (Options.Atomics) |
| FeatureMask |= MetadataInfo::Atomics.FeatureMask; |
| return FeatureMask; |
| } |
| |
| uint32_t getVersion() const { |
| uint32_t Version = kVersionBase; |
| const auto CM = Mod.getCodeModel(); |
| if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large)) |
| Version |= kVersionPtrSizeRel; |
| return Version; |
| } |
| |
| void runOn(Function &F, MetadataInfoSet &MIS); |
| |
| // Determines which set of metadata to collect for this instruction. |
| // |
| // Returns true if covered metadata is required to unambiguously interpret |
| // other metadata. For example, if we are interested in atomics metadata, any |
| // function with memory operations (atomic or not) requires covered metadata |
| // to determine if a memory operation is atomic or not in modules compiled |
| // with SanitizerBinaryMetadata. |
| bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB, |
| uint32_t &FeatureMask); |
| |
| // Get start/end section marker pointer. |
| GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty); |
| |
| // Returns the target-dependent section name. |
| StringRef getSectionName(StringRef SectionSuffix); |
| |
| // Returns the section start marker name. |
| Twine getSectionStart(StringRef SectionSuffix); |
| |
| // Returns the section end marker name. |
| Twine getSectionEnd(StringRef SectionSuffix); |
| |
| Module &Mod; |
| const SanitizerBinaryMetadataOptions Options; |
| const Triple TargetTriple; |
| IRBuilder<> IRB; |
| }; |
| |
| bool SanitizerBinaryMetadata::run() { |
| MetadataInfoSet MIS; |
| |
| for (Function &F : Mod) |
| runOn(F, MIS); |
| |
| if (MIS.empty()) |
| return false; |
| |
| // |
| // Setup constructors and call all initialization functions for requested |
| // metadata features. |
| // |
| |
| auto *Int8PtrTy = IRB.getInt8PtrTy(); |
| auto *Int8PtrPtrTy = PointerType::getUnqual(Int8PtrTy); |
| auto *Int32Ty = IRB.getInt32Ty(); |
| const std::array<Type *, 3> InitTypes = {Int32Ty, Int8PtrPtrTy, Int8PtrPtrTy}; |
| auto *Version = ConstantInt::get(Int32Ty, getVersion()); |
| |
| for (const MetadataInfo *MI : MIS) { |
| const std::array<Value *, InitTypes.size()> InitArgs = { |
| Version, |
| getSectionMarker(getSectionStart(MI->SectionSuffix), Int8PtrTy), |
| getSectionMarker(getSectionEnd(MI->SectionSuffix), Int8PtrTy), |
| }; |
| // We declare the _add and _del functions as weak, and only call them if |
| // there is a valid symbol linked. This allows building binaries with |
| // semantic metadata, but without having callbacks. When a tool that wants |
| // the metadata is linked which provides the callbacks, they will be called. |
| Function *Ctor = |
| createSanitizerCtorAndInitFunctions( |
| Mod, (MI->FunctionPrefix + ".module_ctor").str(), |
| (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs, |
| /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) |
| .first; |
| Function *Dtor = |
| createSanitizerCtorAndInitFunctions( |
| Mod, (MI->FunctionPrefix + ".module_dtor").str(), |
| (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs, |
| /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) |
| .first; |
| Constant *CtorData = nullptr; |
| Constant *DtorData = nullptr; |
| if (TargetTriple.supportsCOMDAT()) { |
| // Use COMDAT to deduplicate constructor/destructor function. |
| Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName())); |
| Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName())); |
| CtorData = Ctor; |
| DtorData = Dtor; |
| } |
| appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorData); |
| appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorData); |
| } |
| |
| return true; |
| } |
| |
| void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) { |
| if (F.empty()) |
| return; |
| if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) |
| return; |
| // Don't touch available_externally functions, their actual body is elsewhere. |
| if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) |
| return; |
| |
| MDBuilder MDB(F.getContext()); |
| |
| // The metadata features enabled for this function, stored along covered |
| // metadata (if enabled). |
| uint32_t FeatureMask = getEnabledPerInstructionFeature(); |
| // Don't emit unnecessary covered metadata for all functions to save space. |
| bool RequiresCovered = false; |
| // We can only understand if we need to set UAR feature after looking |
| // at the instructions. So we need to check instructions even if FeatureMask |
| // is empty. |
| if (FeatureMask || Options.UAR) { |
| for (BasicBlock &BB : F) |
| for (Instruction &I : BB) |
| RequiresCovered |= runOn(I, MIS, MDB, FeatureMask); |
| } |
| |
| if (F.isVarArg()) |
| FeatureMask &= ~kSanitizerBinaryMetadataUAR; |
| if (FeatureMask & kSanitizerBinaryMetadataUAR) { |
| RequiresCovered = true; |
| NumMetadataUAR++; |
| } |
| |
| // Covered metadata is always emitted if explicitly requested, otherwise only |
| // if some other metadata requires it to unambiguously interpret it for |
| // modules compiled with SanitizerBinaryMetadata. |
| if (Options.Covered || (FeatureMask && RequiresCovered)) { |
| NumMetadataCovered++; |
| const auto *MI = &MetadataInfo::Covered; |
| MIS.insert(MI); |
| const StringRef Section = getSectionName(MI->SectionSuffix); |
| // The feature mask will be placed after the size (32 bit) of the function, |
| // so in total one covered entry will use `sizeof(void*) + 4 + 4`. |
| Constant *CFM = IRB.getInt32(FeatureMask); |
| F.setMetadata(LLVMContext::MD_pcsections, |
| MDB.createPCSections({{Section, {CFM}}})); |
| } |
| } |
| |
| bool isUARSafeCall(CallInst *CI) { |
| auto *F = CI->getCalledFunction(); |
| // There are no intrinsic functions that leak arguments. |
| // If the called function does not return, the current function |
| // does not return as well, so no possibility of use-after-return. |
| // Sanitizer function also don't leak or don't return. |
| // It's safe to both pass pointers to local variables to them |
| // and to tail-call them. |
| return F && (F->isIntrinsic() || F->doesNotReturn() || |
| F->getName().startswith("__asan_") || |
| F->getName().startswith("__hwsan_") || |
| F->getName().startswith("__ubsan_") || |
| F->getName().startswith("__msan_") || |
| F->getName().startswith("__tsan_")); |
| } |
| |
| bool hasUseAfterReturnUnsafeUses(Value &V) { |
| for (User *U : V.users()) { |
| if (auto *I = dyn_cast<Instruction>(U)) { |
| if (I->isLifetimeStartOrEnd() || I->isDroppable()) |
| continue; |
| if (auto *CI = dyn_cast<CallInst>(U)) { |
| if (isUARSafeCall(CI)) |
| continue; |
| } |
| if (isa<LoadInst>(U)) |
| continue; |
| if (auto *SI = dyn_cast<StoreInst>(U)) { |
| // If storing TO the alloca, then the address isn't taken. |
| if (SI->getOperand(1) == &V) |
| continue; |
| } |
| if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) { |
| if (!hasUseAfterReturnUnsafeUses(*GEPI)) |
| continue; |
| } else if (auto *BCI = dyn_cast<BitCastInst>(U)) { |
| if (!hasUseAfterReturnUnsafeUses(*BCI)) |
| continue; |
| } |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| bool useAfterReturnUnsafe(Instruction &I) { |
| if (isa<AllocaInst>(I)) |
| return hasUseAfterReturnUnsafeUses(I); |
| // Tail-called functions are not necessary intercepted |
| // at runtime because there is no call instruction. |
| // So conservatively mark the caller as requiring checking. |
| else if (auto *CI = dyn_cast<CallInst>(&I)) |
| return CI->isTailCall() && !isUARSafeCall(CI); |
| return false; |
| } |
| |
| bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS, |
| MDBuilder &MDB, uint32_t &FeatureMask) { |
| SmallVector<const MetadataInfo *, 1> InstMetadata; |
| bool RequiresCovered = false; |
| |
| if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) { |
| if (useAfterReturnUnsafe(I)) |
| FeatureMask |= kSanitizerBinaryMetadataUAR; |
| } |
| |
| if (Options.Atomics && I.mayReadOrWriteMemory()) { |
| auto SSID = getAtomicSyncScopeID(&I); |
| if (SSID.has_value() && *SSID != SyncScope::SingleThread) { |
| NumMetadataAtomics++; |
| InstMetadata.push_back(&MetadataInfo::Atomics); |
| } |
| RequiresCovered = true; |
| } |
| |
| // Attach MD_pcsections to instruction. |
| if (!InstMetadata.empty()) { |
| MIS.insert(InstMetadata.begin(), InstMetadata.end()); |
| SmallVector<MDBuilder::PCSection, 1> Sections; |
| for (const auto &MI : InstMetadata) |
| Sections.push_back({getSectionName(MI->SectionSuffix), {}}); |
| I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections)); |
| } |
| |
| return RequiresCovered; |
| } |
| |
| GlobalVariable * |
| SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) { |
| // Use ExternalWeak so that if all sections are discarded due to section |
| // garbage collection, the linker will not report undefined symbol errors. |
| auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false, |
| GlobalVariable::ExternalWeakLinkage, |
| /*Initializer=*/nullptr, MarkerName); |
| Marker->setVisibility(GlobalValue::HiddenVisibility); |
| return Marker; |
| } |
| |
| StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) { |
| // FIXME: Other TargetTriple (req. string pool) |
| return SectionSuffix; |
| } |
| |
| Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { |
| return "__start_" + SectionSuffix; |
| } |
| |
| Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { |
| return "__stop_" + SectionSuffix; |
| } |
| |
| } // namespace |
| |
| SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass( |
| SanitizerBinaryMetadataOptions Opts) |
| : Options(std::move(Opts)) {} |
| |
| PreservedAnalyses |
| SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) { |
| SanitizerBinaryMetadata Pass(M, Options); |
| if (Pass.run()) |
| return PreservedAnalyses::none(); |
| return PreservedAnalyses::all(); |
| } |