| //===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the writeArchive function. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Object/ArchiveWriter.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/BinaryFormat/Magic.h" |
| #include "llvm/IR/LLVMContext.h" |
| #include "llvm/Object/Archive.h" |
| #include "llvm/Object/ObjectFile.h" |
| #include "llvm/Object/SymbolicFile.h" |
| #include "llvm/Support/EndianStream.h" |
| #include "llvm/Support/Errc.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/Format.h" |
| #include "llvm/Support/Path.h" |
| #include "llvm/Support/ToolOutputFile.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| #if !defined(_MSC_VER) && !defined(__MINGW32__) |
| #include <unistd.h> |
| #else |
| #include <io.h> |
| #endif |
| |
| using namespace llvm; |
| |
| NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) |
| : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), |
| MemberName(BufRef.getBufferIdentifier()) {} |
| |
| Expected<NewArchiveMember> |
| NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, |
| bool Deterministic) { |
| Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef(); |
| if (!BufOrErr) |
| return BufOrErr.takeError(); |
| |
| NewArchiveMember M; |
| assert(M.IsNew == false); |
| M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); |
| M.MemberName = M.Buf->getBufferIdentifier(); |
| if (!Deterministic) { |
| auto ModTimeOrErr = OldMember.getLastModified(); |
| if (!ModTimeOrErr) |
| return ModTimeOrErr.takeError(); |
| M.ModTime = ModTimeOrErr.get(); |
| Expected<unsigned> UIDOrErr = OldMember.getUID(); |
| if (!UIDOrErr) |
| return UIDOrErr.takeError(); |
| M.UID = UIDOrErr.get(); |
| Expected<unsigned> GIDOrErr = OldMember.getGID(); |
| if (!GIDOrErr) |
| return GIDOrErr.takeError(); |
| M.GID = GIDOrErr.get(); |
| Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode(); |
| if (!AccessModeOrErr) |
| return AccessModeOrErr.takeError(); |
| M.Perms = AccessModeOrErr.get(); |
| } |
| return std::move(M); |
| } |
| |
| Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, |
| bool Deterministic) { |
| sys::fs::file_status Status; |
| int FD; |
| if (auto EC = sys::fs::openFileForRead(FileName, FD)) |
| return errorCodeToError(EC); |
| assert(FD != -1); |
| |
| if (auto EC = sys::fs::status(FD, Status)) |
| return errorCodeToError(EC); |
| |
| // Opening a directory doesn't make sense. Let it fail. |
| // Linux cannot open directories with open(2), although |
| // cygwin and *bsd can. |
| if (Status.type() == sys::fs::file_type::directory_file) |
| return errorCodeToError(make_error_code(errc::is_a_directory)); |
| |
| ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr = |
| MemoryBuffer::getOpenFile(FD, FileName, Status.getSize(), false); |
| if (!MemberBufferOrErr) |
| return errorCodeToError(MemberBufferOrErr.getError()); |
| |
| if (close(FD) != 0) |
| return errorCodeToError(std::error_code(errno, std::generic_category())); |
| |
| NewArchiveMember M; |
| M.IsNew = true; |
| M.Buf = std::move(*MemberBufferOrErr); |
| M.MemberName = M.Buf->getBufferIdentifier(); |
| if (!Deterministic) { |
| M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( |
| Status.getLastModificationTime()); |
| M.UID = Status.getUser(); |
| M.GID = Status.getGroup(); |
| M.Perms = Status.permissions(); |
| } |
| return std::move(M); |
| } |
| |
| template <typename T> |
| static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) { |
| uint64_t OldPos = OS.tell(); |
| OS << Data; |
| unsigned SizeSoFar = OS.tell() - OldPos; |
| assert(SizeSoFar <= Size && "Data doesn't fit in Size"); |
| OS.indent(Size - SizeSoFar); |
| } |
| |
| static bool isBSDLike(object::Archive::Kind Kind) { |
| switch (Kind) { |
| case object::Archive::K_GNU: |
| case object::Archive::K_GNU64: |
| return false; |
| case object::Archive::K_BSD: |
| case object::Archive::K_DARWIN: |
| return true; |
| case object::Archive::K_DARWIN64: |
| case object::Archive::K_COFF: |
| break; |
| } |
| llvm_unreachable("not supported for writting"); |
| } |
| |
| template <class T> |
| static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { |
| support::endian::write(Out, Val, |
| isBSDLike(Kind) ? support::little : support::big); |
| } |
| |
| static void printRestOfMemberHeader( |
| raw_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime, |
| unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { |
| printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); |
| |
| // The format has only 6 chars for uid and gid. Truncate if the provided |
| // values don't fit. |
| printWithSpacePadding(Out, UID % 1000000, 6); |
| printWithSpacePadding(Out, GID % 1000000, 6); |
| |
| printWithSpacePadding(Out, format("%o", Perms), 8); |
| printWithSpacePadding(Out, Size, 10); |
| Out << "`\n"; |
| } |
| |
| static void |
| printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name, |
| const sys::TimePoint<std::chrono::seconds> &ModTime, |
| unsigned UID, unsigned GID, unsigned Perms, |
| unsigned Size) { |
| printWithSpacePadding(Out, Twine(Name) + "/", 16); |
| printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); |
| } |
| |
| static void |
| printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name, |
| const sys::TimePoint<std::chrono::seconds> &ModTime, |
| unsigned UID, unsigned GID, unsigned Perms, |
| unsigned Size) { |
| uint64_t PosAfterHeader = Pos + 60 + Name.size(); |
| // Pad so that even 64 bit object files are aligned. |
| unsigned Pad = OffsetToAlignment(PosAfterHeader, 8); |
| unsigned NameWithPadding = Name.size() + Pad; |
| printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16); |
| printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, |
| NameWithPadding + Size); |
| Out << Name; |
| while (Pad--) |
| Out.write(uint8_t(0)); |
| } |
| |
| static bool useStringTable(bool Thin, StringRef Name) { |
| return Thin || Name.size() >= 16 || Name.contains('/'); |
| } |
| |
| // Compute the relative path from From to To. |
| static std::string computeRelativePath(StringRef From, StringRef To) { |
| if (sys::path::is_absolute(From) || sys::path::is_absolute(To)) |
| return To; |
| |
| StringRef DirFrom = sys::path::parent_path(From); |
| auto FromI = sys::path::begin(DirFrom); |
| auto ToI = sys::path::begin(To); |
| while (*FromI == *ToI) { |
| ++FromI; |
| ++ToI; |
| } |
| |
| SmallString<128> Relative; |
| for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI) |
| sys::path::append(Relative, ".."); |
| |
| for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI) |
| sys::path::append(Relative, *ToI); |
| |
| #ifdef _WIN32 |
| // Replace backslashes with slashes so that the path is portable between *nix |
| // and Windows. |
| std::replace(Relative.begin(), Relative.end(), '\\', '/'); |
| #endif |
| |
| return Relative.str(); |
| } |
| |
| static bool is64BitKind(object::Archive::Kind Kind) { |
| switch (Kind) { |
| case object::Archive::K_GNU: |
| case object::Archive::K_BSD: |
| case object::Archive::K_DARWIN: |
| case object::Archive::K_COFF: |
| return false; |
| case object::Archive::K_DARWIN64: |
| case object::Archive::K_GNU64: |
| return true; |
| } |
| llvm_unreachable("not supported for writting"); |
| } |
| |
| static void addToStringTable(raw_ostream &Out, StringRef ArcName, |
| const NewArchiveMember &M, bool Thin) { |
| StringRef ID = M.Buf->getBufferIdentifier(); |
| if (Thin) { |
| if (M.IsNew) |
| Out << computeRelativePath(ArcName, ID); |
| else |
| Out << ID; |
| } else |
| Out << M.MemberName; |
| Out << "/\n"; |
| } |
| |
| static void printMemberHeader(raw_ostream &Out, uint64_t Pos, |
| raw_ostream &StringTable, |
| object::Archive::Kind Kind, bool Thin, |
| StringRef ArcName, const NewArchiveMember &M, |
| unsigned Size) { |
| if (isBSDLike(Kind)) |
| return printBSDMemberHeader(Out, Pos, M.MemberName, M.ModTime, M.UID, M.GID, |
| M.Perms, Size); |
| if (!useStringTable(Thin, M.MemberName)) |
| return printGNUSmallMemberHeader(Out, M.MemberName, M.ModTime, M.UID, M.GID, |
| M.Perms, Size); |
| Out << '/'; |
| uint64_t NamePos = StringTable.tell(); |
| addToStringTable(StringTable, ArcName, M, Thin); |
| printWithSpacePadding(Out, NamePos, 15); |
| printRestOfMemberHeader(Out, M.ModTime, M.UID, M.GID, M.Perms, Size); |
| } |
| |
| namespace { |
| struct MemberData { |
| std::vector<unsigned> Symbols; |
| std::string Header; |
| StringRef Data; |
| StringRef Padding; |
| }; |
| } // namespace |
| |
| static MemberData computeStringTable(StringRef Names) { |
| unsigned Size = Names.size(); |
| unsigned Pad = OffsetToAlignment(Size, 2); |
| std::string Header; |
| raw_string_ostream Out(Header); |
| printWithSpacePadding(Out, "//", 48); |
| printWithSpacePadding(Out, Size + Pad, 10); |
| Out << "`\n"; |
| Out.flush(); |
| return {{}, std::move(Header), Names, Pad ? "\n" : ""}; |
| } |
| |
| static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) { |
| using namespace std::chrono; |
| |
| if (!Deterministic) |
| return time_point_cast<seconds>(system_clock::now()); |
| return sys::TimePoint<seconds>(); |
| } |
| |
| static bool isArchiveSymbol(const object::BasicSymbolRef &S) { |
| uint32_t Symflags = S.getFlags(); |
| if (Symflags & object::SymbolRef::SF_FormatSpecific) |
| return false; |
| if (!(Symflags & object::SymbolRef::SF_Global)) |
| return false; |
| if (Symflags & object::SymbolRef::SF_Undefined) |
| return false; |
| return true; |
| } |
| |
| static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, |
| uint64_t Val) { |
| if (is64BitKind(Kind)) |
| print<uint64_t>(Out, Kind, Val); |
| else |
| print<uint32_t>(Out, Kind, Val); |
| } |
| |
| static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, |
| bool Deterministic, ArrayRef<MemberData> Members, |
| StringRef StringTable) { |
| if (StringTable.empty()) |
| return; |
| |
| unsigned NumSyms = 0; |
| for (const MemberData &M : Members) |
| NumSyms += M.Symbols.size(); |
| |
| unsigned Size = 0; |
| Size += is64BitKind(Kind) ? 8 : 4; // Number of entries |
| if (isBSDLike(Kind)) |
| Size += NumSyms * 8; // Table |
| else if (is64BitKind(Kind)) |
| Size += NumSyms * 8; // Table |
| else |
| Size += NumSyms * 4; // Table |
| if (isBSDLike(Kind)) |
| Size += 4; // byte count |
| Size += StringTable.size(); |
| // ld64 expects the members to be 8-byte aligned for 64-bit content and at |
| // least 4-byte aligned for 32-bit content. Opt for the larger encoding |
| // uniformly. |
| // We do this for all bsd formats because it simplifies aligning members. |
| unsigned Alignment = isBSDLike(Kind) ? 8 : 2; |
| unsigned Pad = OffsetToAlignment(Size, Alignment); |
| Size += Pad; |
| |
| if (isBSDLike(Kind)) |
| printBSDMemberHeader(Out, Out.tell(), "__.SYMDEF", now(Deterministic), 0, 0, |
| 0, Size); |
| else if (is64BitKind(Kind)) |
| printGNUSmallMemberHeader(Out, "/SYM64", now(Deterministic), 0, 0, 0, Size); |
| else |
| printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, Size); |
| |
| uint64_t Pos = Out.tell() + Size; |
| |
| if (isBSDLike(Kind)) |
| print<uint32_t>(Out, Kind, NumSyms * 8); |
| else |
| printNBits(Out, Kind, NumSyms); |
| |
| for (const MemberData &M : Members) { |
| for (unsigned StringOffset : M.Symbols) { |
| if (isBSDLike(Kind)) |
| print<uint32_t>(Out, Kind, StringOffset); |
| printNBits(Out, Kind, Pos); // member offset |
| } |
| Pos += M.Header.size() + M.Data.size() + M.Padding.size(); |
| } |
| |
| if (isBSDLike(Kind)) |
| // byte count of the string table |
| print<uint32_t>(Out, Kind, StringTable.size()); |
| Out << StringTable; |
| |
| while (Pad--) |
| Out.write(uint8_t(0)); |
| } |
| |
| static Expected<std::vector<unsigned>> |
| getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { |
| std::vector<unsigned> Ret; |
| LLVMContext Context; |
| |
| Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr = |
| object::SymbolicFile::createSymbolicFile(Buf, llvm::file_magic::unknown, |
| &Context); |
| if (!ObjOrErr) { |
| // FIXME: check only for "not an object file" errors. |
| consumeError(ObjOrErr.takeError()); |
| return Ret; |
| } |
| |
| HasObject = true; |
| object::SymbolicFile &Obj = *ObjOrErr.get(); |
| for (const object::BasicSymbolRef &S : Obj.symbols()) { |
| if (!isArchiveSymbol(S)) |
| continue; |
| Ret.push_back(SymNames.tell()); |
| if (auto EC = S.printName(SymNames)) |
| return errorCodeToError(EC); |
| SymNames << '\0'; |
| } |
| return Ret; |
| } |
| |
| static Expected<std::vector<MemberData>> |
| computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, |
| object::Archive::Kind Kind, bool Thin, StringRef ArcName, |
| ArrayRef<NewArchiveMember> NewMembers) { |
| static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; |
| |
| // This ignores the symbol table, but we only need the value mod 8 and the |
| // symbol table is aligned to be a multiple of 8 bytes |
| uint64_t Pos = 0; |
| |
| std::vector<MemberData> Ret; |
| bool HasObject = false; |
| for (const NewArchiveMember &M : NewMembers) { |
| std::string Header; |
| raw_string_ostream Out(Header); |
| |
| MemoryBufferRef Buf = M.Buf->getMemBufferRef(); |
| StringRef Data = Thin ? "" : Buf.getBuffer(); |
| |
| // ld64 expects the members to be 8-byte aligned for 64-bit content and at |
| // least 4-byte aligned for 32-bit content. Opt for the larger encoding |
| // uniformly. This matches the behaviour with cctools and ensures that ld64 |
| // is happy with archives that we generate. |
| unsigned MemberPadding = Kind == object::Archive::K_DARWIN |
| ? OffsetToAlignment(Data.size(), 8) |
| : 0; |
| unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2); |
| StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); |
| |
| printMemberHeader(Out, Pos, StringTable, Kind, Thin, ArcName, M, |
| Buf.getBufferSize() + MemberPadding); |
| Out.flush(); |
| |
| Expected<std::vector<unsigned>> Symbols = |
| getSymbols(Buf, SymNames, HasObject); |
| if (auto E = Symbols.takeError()) |
| return std::move(E); |
| |
| Pos += Header.size() + Data.size() + Padding.size(); |
| Ret.push_back({std::move(*Symbols), std::move(Header), Data, Padding}); |
| } |
| // If there are no symbols, emit an empty symbol table, to satisfy Solaris |
| // tools, older versions of which expect a symbol table in a non-empty |
| // archive, regardless of whether there are any symbols in it. |
| if (HasObject && SymNames.tell() == 0) |
| SymNames << '\0' << '\0' << '\0'; |
| return Ret; |
| } |
| |
| Error llvm::writeArchive(StringRef ArcName, |
| ArrayRef<NewArchiveMember> NewMembers, |
| bool WriteSymtab, object::Archive::Kind Kind, |
| bool Deterministic, bool Thin, |
| std::unique_ptr<MemoryBuffer> OldArchiveBuf) { |
| assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); |
| |
| SmallString<0> SymNamesBuf; |
| raw_svector_ostream SymNames(SymNamesBuf); |
| SmallString<0> StringTableBuf; |
| raw_svector_ostream StringTable(StringTableBuf); |
| |
| Expected<std::vector<MemberData>> DataOrErr = |
| computeMemberData(StringTable, SymNames, Kind, Thin, ArcName, NewMembers); |
| if (Error E = DataOrErr.takeError()) |
| return E; |
| std::vector<MemberData> &Data = *DataOrErr; |
| |
| if (!StringTableBuf.empty()) |
| Data.insert(Data.begin(), computeStringTable(StringTableBuf)); |
| |
| // We would like to detect if we need to switch to a 64-bit symbol table. |
| if (WriteSymtab) { |
| uint64_t MaxOffset = 0; |
| uint64_t LastOffset = MaxOffset; |
| for (const auto& M : Data) { |
| // Record the start of the member's offset |
| LastOffset = MaxOffset; |
| // Account for the size of each part associated with the member. |
| MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size(); |
| // We assume 32-bit symbols to see if 32-bit symbols are possible or not. |
| MaxOffset += M.Symbols.size() * 4; |
| } |
| |
| // The SYM64 format is used when an archive's member offsets are larger than |
| // 32-bits can hold. The need for this shift in format is detected by |
| // writeArchive. To test this we need to generate a file with a member that |
| // has an offset larger than 32-bits but this demands a very slow test. To |
| // speed the test up we use this environment variable to pretend like the |
| // cutoff happens before 32-bits and instead happens at some much smaller |
| // value. |
| const char *Sym64Env = std::getenv("SYM64_THRESHOLD"); |
| int Sym64Threshold = 32; |
| if (Sym64Env) |
| StringRef(Sym64Env).getAsInteger(10, Sym64Threshold); |
| |
| // If LastOffset isn't going to fit in a 32-bit varible we need to switch |
| // to 64-bit. Note that the file can be larger than 4GB as long as the last |
| // member starts before the 4GB offset. |
| if (LastOffset >= (1ULL << Sym64Threshold)) |
| Kind = object::Archive::K_GNU64; |
| } |
| |
| Expected<sys::fs::TempFile> Temp = |
| sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a"); |
| if (!Temp) |
| return Temp.takeError(); |
| |
| raw_fd_ostream Out(Temp->FD, false); |
| if (Thin) |
| Out << "!<thin>\n"; |
| else |
| Out << "!<arch>\n"; |
| |
| if (WriteSymtab) |
| writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf); |
| |
| for (const MemberData &M : Data) |
| Out << M.Header << M.Data << M.Padding; |
| |
| Out.flush(); |
| |
| // At this point, we no longer need whatever backing memory |
| // was used to generate the NewMembers. On Windows, this buffer |
| // could be a mapped view of the file we want to replace (if |
| // we're updating an existing archive, say). In that case, the |
| // rename would still succeed, but it would leave behind a |
| // temporary file (actually the original file renamed) because |
| // a file cannot be deleted while there's a handle open on it, |
| // only renamed. So by freeing this buffer, this ensures that |
| // the last open handle on the destination file, if any, is |
| // closed before we attempt to rename. |
| OldArchiveBuf.reset(); |
| |
| return Temp->keep(ArcName); |
| } |