| //===- GsymCreator.cpp ----------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/DebugInfo/GSYM/GsymCreator.h" |
| #include "llvm/DebugInfo/GSYM/FileWriter.h" |
| #include "llvm/DebugInfo/GSYM/Header.h" |
| #include "llvm/DebugInfo/GSYM/LineTable.h" |
| #include "llvm/MC/StringTableBuilder.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| #include <algorithm> |
| #include <cassert> |
| #include <functional> |
| #include <vector> |
| |
| using namespace llvm; |
| using namespace gsym; |
| |
| GsymCreator::GsymCreator(bool Quiet) |
| : StrTab(StringTableBuilder::ELF), Quiet(Quiet) { |
| insertFile(StringRef()); |
| } |
| |
| uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) { |
| llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); |
| llvm::StringRef filename = llvm::sys::path::filename(Path, Style); |
| // We must insert the strings first, then call the FileEntry constructor. |
| // If we inline the insertString() function call into the constructor, the |
| // call order is undefined due to parameter lists not having any ordering |
| // requirements. |
| const uint32_t Dir = insertString(directory); |
| const uint32_t Base = insertString(filename); |
| FileEntry FE(Dir, Base); |
| |
| std::lock_guard<std::mutex> Guard(Mutex); |
| const auto NextIndex = Files.size(); |
| // Find FE in hash map and insert if not present. |
| auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); |
| if (R.second) |
| Files.emplace_back(FE); |
| return R.first->second; |
| } |
| |
| llvm::Error GsymCreator::save(StringRef Path, |
| llvm::support::endianness ByteOrder) const { |
| std::error_code EC; |
| raw_fd_ostream OutStrm(Path, EC); |
| if (EC) |
| return llvm::errorCodeToError(EC); |
| FileWriter O(OutStrm, ByteOrder); |
| return encode(O); |
| } |
| |
| llvm::Error GsymCreator::encode(FileWriter &O) const { |
| std::lock_guard<std::mutex> Guard(Mutex); |
| if (Funcs.empty()) |
| return createStringError(std::errc::invalid_argument, |
| "no functions to encode"); |
| if (!Finalized) |
| return createStringError(std::errc::invalid_argument, |
| "GsymCreator wasn't finalized prior to encoding"); |
| |
| if (Funcs.size() > UINT32_MAX) |
| return createStringError(std::errc::invalid_argument, |
| "too many FunctionInfos"); |
| |
| const uint64_t MinAddr = |
| BaseAddress ? *BaseAddress : Funcs.front().startAddress(); |
| const uint64_t MaxAddr = Funcs.back().startAddress(); |
| const uint64_t AddrDelta = MaxAddr - MinAddr; |
| Header Hdr; |
| Hdr.Magic = GSYM_MAGIC; |
| Hdr.Version = GSYM_VERSION; |
| Hdr.AddrOffSize = 0; |
| Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); |
| Hdr.BaseAddress = MinAddr; |
| Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); |
| Hdr.StrtabOffset = 0; // We will fix this up later. |
| Hdr.StrtabSize = 0; // We will fix this up later. |
| memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); |
| if (UUID.size() > sizeof(Hdr.UUID)) |
| return createStringError(std::errc::invalid_argument, |
| "invalid UUID size %u", (uint32_t)UUID.size()); |
| // Set the address offset size correctly in the GSYM header. |
| if (AddrDelta <= UINT8_MAX) |
| Hdr.AddrOffSize = 1; |
| else if (AddrDelta <= UINT16_MAX) |
| Hdr.AddrOffSize = 2; |
| else if (AddrDelta <= UINT32_MAX) |
| Hdr.AddrOffSize = 4; |
| else |
| Hdr.AddrOffSize = 8; |
| // Copy the UUID value if we have one. |
| if (UUID.size() > 0) |
| memcpy(Hdr.UUID, UUID.data(), UUID.size()); |
| // Write out the header. |
| llvm::Error Err = Hdr.encode(O); |
| if (Err) |
| return Err; |
| |
| // Write out the address offsets. |
| O.alignTo(Hdr.AddrOffSize); |
| for (const auto &FuncInfo : Funcs) { |
| uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; |
| switch (Hdr.AddrOffSize) { |
| case 1: |
| O.writeU8(static_cast<uint8_t>(AddrOffset)); |
| break; |
| case 2: |
| O.writeU16(static_cast<uint16_t>(AddrOffset)); |
| break; |
| case 4: |
| O.writeU32(static_cast<uint32_t>(AddrOffset)); |
| break; |
| case 8: |
| O.writeU64(AddrOffset); |
| break; |
| } |
| } |
| |
| // Write out all zeros for the AddrInfoOffsets. |
| O.alignTo(4); |
| const off_t AddrInfoOffsetsOffset = O.tell(); |
| for (size_t i = 0, n = Funcs.size(); i < n; ++i) |
| O.writeU32(0); |
| |
| // Write out the file table |
| O.alignTo(4); |
| assert(!Files.empty()); |
| assert(Files[0].Dir == 0); |
| assert(Files[0].Base == 0); |
| size_t NumFiles = Files.size(); |
| if (NumFiles > UINT32_MAX) |
| return createStringError(std::errc::invalid_argument, "too many files"); |
| O.writeU32(static_cast<uint32_t>(NumFiles)); |
| for (auto File : Files) { |
| O.writeU32(File.Dir); |
| O.writeU32(File.Base); |
| } |
| |
| // Write out the sting table. |
| const off_t StrtabOffset = O.tell(); |
| StrTab.write(O.get_stream()); |
| const off_t StrtabSize = O.tell() - StrtabOffset; |
| std::vector<uint32_t> AddrInfoOffsets; |
| |
| // Write out the address infos for each function info. |
| for (const auto &FuncInfo : Funcs) { |
| if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) |
| AddrInfoOffsets.push_back(OffsetOrErr.get()); |
| else |
| return OffsetOrErr.takeError(); |
| } |
| // Fixup the string table offset and size in the header |
| O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); |
| O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); |
| |
| // Fixup all address info offsets |
| uint64_t Offset = 0; |
| for (auto AddrInfoOffset : AddrInfoOffsets) { |
| O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); |
| Offset += 4; |
| } |
| return ErrorSuccess(); |
| } |
| |
| // Similar to std::remove_if, but the predicate is binary and it is passed both |
| // the previous and the current element. |
| template <class ForwardIt, class BinaryPredicate> |
| static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt, |
| BinaryPredicate Pred) { |
| if (FirstIt != LastIt) { |
| auto PrevIt = FirstIt++; |
| FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) { |
| return Pred(*PrevIt++, Curr); |
| }); |
| if (FirstIt != LastIt) |
| for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;) |
| if (!Pred(*PrevIt, *CurrIt)) { |
| PrevIt = FirstIt; |
| *FirstIt++ = std::move(*CurrIt); |
| } |
| } |
| return FirstIt; |
| } |
| |
| llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { |
| std::lock_guard<std::mutex> Guard(Mutex); |
| if (Finalized) |
| return createStringError(std::errc::invalid_argument, "already finalized"); |
| Finalized = true; |
| |
| // Sort function infos so we can emit sorted functions. |
| llvm::sort(Funcs); |
| |
| // Don't let the string table indexes change by finalizing in order. |
| StrTab.finalizeInOrder(); |
| |
| // Remove duplicates function infos that have both entries from debug info |
| // (DWARF or Breakpad) and entries from the SymbolTable. |
| // |
| // Also handle overlapping function. Usually there shouldn't be any, but they |
| // can and do happen in some rare cases. |
| // |
| // (a) (b) (c) |
| // ^ ^ ^ ^ |
| // |X |Y |X ^ |X |
| // | | | |Y | ^ |
| // | | | v v |Y |
| // v v v v |
| // |
| // In (a) and (b), Y is ignored and X will be reported for the full range. |
| // In (c), both functions will be included in the result and lookups for an |
| // address in the intersection will return Y because of binary search. |
| // |
| // Note that in case of (b), we cannot include Y in the result because then |
| // we wouldn't find any function for range (end of Y, end of X) |
| // with binary search |
| auto NumBefore = Funcs.size(); |
| Funcs.erase( |
| removeIfBinary(Funcs.begin(), Funcs.end(), |
| [&](const auto &Prev, const auto &Curr) { |
| // Empty ranges won't intersect, but we still need to |
| // catch the case where we have multiple symbols at the |
| // same address and coalesce them. |
| const bool ranges_equal = Prev.Range == Curr.Range; |
| if (ranges_equal || Prev.Range.intersects(Curr.Range)) { |
| // Overlapping ranges or empty identical ranges. |
| if (ranges_equal) { |
| // Same address range. Check if one is from debug |
| // info and the other is from a symbol table. If |
| // so, then keep the one with debug info. Our |
| // sorting guarantees that entries with matching |
| // address ranges that have debug info are last in |
| // the sort. |
| if (Prev == Curr) { |
| // FunctionInfo entries match exactly (range, |
| // lines, inlines) |
| |
| // We used to output a warning here, but this was |
| // so frequent on some binaries, in particular |
| // when those were built with GCC, that it slowed |
| // down processing extremely. |
| return true; |
| } else { |
| if (!Prev.hasRichInfo() && Curr.hasRichInfo()) { |
| // Same address range, one with no debug info |
| // (symbol) and the next with debug info. Keep |
| // the latter. |
| return true; |
| } else { |
| if (!Quiet) { |
| OS << "warning: same address range contains " |
| "different debug " |
| << "info. Removing:\n" |
| << Prev << "\nIn favor of this one:\n" |
| << Curr << "\n"; |
| } |
| return true; |
| } |
| } |
| } else { |
| if (!Quiet) { // print warnings about overlaps |
| OS << "warning: function ranges overlap:\n" |
| << Prev << "\n" |
| << Curr << "\n"; |
| } |
| } |
| } else if (Prev.Range.size() == 0 && |
| Curr.Range.contains(Prev.Range.start())) { |
| if (!Quiet) { |
| OS << "warning: removing symbol:\n" |
| << Prev << "\nKeeping:\n" |
| << Curr << "\n"; |
| } |
| return true; |
| } |
| |
| return false; |
| }), |
| Funcs.end()); |
| |
| // If our last function info entry doesn't have a size and if we have valid |
| // text ranges, we should set the size of the last entry since any search for |
| // a high address might match our last entry. By fixing up this size, we can |
| // help ensure we don't cause lookups to always return the last symbol that |
| // has no size when doing lookups. |
| if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { |
| if (auto Range = |
| ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) { |
| Funcs.back().Range = {Funcs.back().Range.start(), Range->end()}; |
| } |
| } |
| OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " |
| << Funcs.size() << " total\n"; |
| return Error::success(); |
| } |
| |
| uint32_t GsymCreator::insertString(StringRef S, bool Copy) { |
| if (S.empty()) |
| return 0; |
| |
| // The hash can be calculated outside the lock. |
| CachedHashStringRef CHStr(S); |
| std::lock_guard<std::mutex> Guard(Mutex); |
| if (Copy) { |
| // We need to provide backing storage for the string if requested |
| // since StringTableBuilder stores references to strings. Any string |
| // that comes from a section in an object file doesn't need to be |
| // copied, but any string created by code will need to be copied. |
| // This allows GsymCreator to be really fast when parsing DWARF and |
| // other object files as most strings don't need to be copied. |
| if (!StrTab.contains(CHStr)) |
| CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(), |
| CHStr.hash()}; |
| } |
| return StrTab.add(CHStr); |
| } |
| |
| void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { |
| std::lock_guard<std::mutex> Guard(Mutex); |
| Ranges.insert(FI.Range); |
| Funcs.emplace_back(std::move(FI)); |
| } |
| |
| void GsymCreator::forEachFunctionInfo( |
| std::function<bool(FunctionInfo &)> const &Callback) { |
| std::lock_guard<std::mutex> Guard(Mutex); |
| for (auto &FI : Funcs) { |
| if (!Callback(FI)) |
| break; |
| } |
| } |
| |
| void GsymCreator::forEachFunctionInfo( |
| std::function<bool(const FunctionInfo &)> const &Callback) const { |
| std::lock_guard<std::mutex> Guard(Mutex); |
| for (const auto &FI : Funcs) { |
| if (!Callback(FI)) |
| break; |
| } |
| } |
| |
| size_t GsymCreator::getNumFunctionInfos() const { |
| std::lock_guard<std::mutex> Guard(Mutex); |
| return Funcs.size(); |
| } |
| |
| bool GsymCreator::IsValidTextAddress(uint64_t Addr) const { |
| if (ValidTextRanges) |
| return ValidTextRanges->contains(Addr); |
| return true; // No valid text ranges has been set, so accept all ranges. |
| } |
| |
| bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const { |
| std::lock_guard<std::mutex> Guard(Mutex); |
| return Ranges.contains(Addr); |
| } |