Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1 | //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===// |
| 2 | // |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains data definitions and a reader and builder for a symbol |
| 10 | // table for LLVM IR. Its purpose is to allow linkers and other consumers of |
| 11 | // bitcode files to efficiently read the symbol table for symbol resolution |
| 12 | // purposes without needing to construct a module in memory. |
| 13 | // |
| 14 | // As with most object files the symbol table has two parts: the symbol table |
| 15 | // itself and a string table which is referenced by the symbol table. |
| 16 | // |
| 17 | // A symbol table corresponds to a single bitcode file, which may consist of |
| 18 | // multiple modules, so symbol tables may likewise contain symbols for multiple |
| 19 | // modules. |
| 20 | // |
| 21 | //===----------------------------------------------------------------------===// |
| 22 | |
| 23 | #ifndef LLVM_OBJECT_IRSYMTAB_H |
| 24 | #define LLVM_OBJECT_IRSYMTAB_H |
| 25 | |
| 26 | #include "llvm/ADT/ArrayRef.h" |
| 27 | #include "llvm/ADT/StringRef.h" |
| 28 | #include "llvm/ADT/iterator_range.h" |
| 29 | #include "llvm/IR/GlobalValue.h" |
| 30 | #include "llvm/Object/SymbolicFile.h" |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 31 | #include "llvm/Support/Allocator.h" |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 32 | #include "llvm/Support/Endian.h" |
| 33 | #include "llvm/Support/Error.h" |
| 34 | #include <cassert> |
| 35 | #include <cstdint> |
| 36 | #include <vector> |
| 37 | |
| 38 | namespace llvm { |
| 39 | |
| 40 | struct BitcodeFileContents; |
| 41 | class StringTableBuilder; |
| 42 | |
| 43 | namespace irsymtab { |
| 44 | |
| 45 | namespace storage { |
| 46 | |
| 47 | // The data structures in this namespace define the low-level serialization |
| 48 | // format. Clients that just want to read a symbol table should use the |
| 49 | // irsymtab::Reader class. |
| 50 | |
| 51 | using Word = support::ulittle32_t; |
| 52 | |
| 53 | /// A reference to a string in the string table. |
| 54 | struct Str { |
| 55 | Word Offset, Size; |
| 56 | |
| 57 | StringRef get(StringRef Strtab) const { |
| 58 | return {Strtab.data() + Offset, Size}; |
| 59 | } |
| 60 | }; |
| 61 | |
| 62 | /// A reference to a range of objects in the symbol table. |
| 63 | template <typename T> struct Range { |
| 64 | Word Offset, Size; |
| 65 | |
| 66 | ArrayRef<T> get(StringRef Symtab) const { |
| 67 | return {reinterpret_cast<const T *>(Symtab.data() + Offset), Size}; |
| 68 | } |
| 69 | }; |
| 70 | |
| 71 | /// Describes the range of a particular module's symbols within the symbol |
| 72 | /// table. |
| 73 | struct Module { |
| 74 | Word Begin, End; |
| 75 | |
| 76 | /// The index of the first Uncommon for this Module. |
| 77 | Word UncBegin; |
| 78 | }; |
| 79 | |
| 80 | /// This is equivalent to an IR comdat. |
| 81 | struct Comdat { |
| 82 | Str Name; |
| 83 | }; |
| 84 | |
| 85 | /// Contains the information needed by linkers for symbol resolution, as well as |
| 86 | /// by the LTO implementation itself. |
| 87 | struct Symbol { |
| 88 | /// The mangled symbol name. |
| 89 | Str Name; |
| 90 | |
| 91 | /// The unmangled symbol name, or the empty string if this is not an IR |
| 92 | /// symbol. |
| 93 | Str IRName; |
| 94 | |
| 95 | /// The index into Header::Comdats, or -1 if not a comdat member. |
| 96 | Word ComdatIndex; |
| 97 | |
| 98 | Word Flags; |
| 99 | enum FlagBits { |
| 100 | FB_visibility, // 2 bits |
| 101 | FB_has_uncommon = FB_visibility + 2, |
| 102 | FB_undefined, |
| 103 | FB_weak, |
| 104 | FB_common, |
| 105 | FB_indirect, |
| 106 | FB_used, |
| 107 | FB_tls, |
| 108 | FB_may_omit, |
| 109 | FB_global, |
| 110 | FB_format_specific, |
| 111 | FB_unnamed_addr, |
| 112 | FB_executable, |
| 113 | }; |
| 114 | }; |
| 115 | |
| 116 | /// This data structure contains rarely used symbol fields and is optionally |
| 117 | /// referenced by a Symbol. |
| 118 | struct Uncommon { |
| 119 | Word CommonSize, CommonAlign; |
| 120 | |
| 121 | /// COFF-specific: the name of the symbol that a weak external resolves to |
| 122 | /// if not defined. |
| 123 | Str COFFWeakExternFallbackName; |
| 124 | |
| 125 | /// Specified section name, if any. |
| 126 | Str SectionName; |
| 127 | }; |
| 128 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 129 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 130 | struct Header { |
| 131 | /// Version number of the symtab format. This number should be incremented |
| 132 | /// when the format changes, but it does not need to be incremented if a |
| 133 | /// change to LLVM would cause it to create a different symbol table. |
| 134 | Word Version; |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 135 | enum { kCurrentVersion = 2 }; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 136 | |
| 137 | /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION). |
| 138 | /// Consumers should rebuild the symbol table from IR if the producer's |
| 139 | /// version does not match the consumer's version due to potential differences |
| 140 | /// in symbol table format, symbol enumeration order and so on. |
| 141 | Str Producer; |
| 142 | |
| 143 | Range<Module> Modules; |
| 144 | Range<Comdat> Comdats; |
| 145 | Range<Symbol> Symbols; |
| 146 | Range<Uncommon> Uncommons; |
| 147 | |
| 148 | Str TargetTriple, SourceFileName; |
| 149 | |
| 150 | /// COFF-specific: linker directives. |
| 151 | Str COFFLinkerOpts; |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 152 | |
| 153 | /// Dependent Library Specifiers |
| 154 | Range<Str> DependentLibraries; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 155 | }; |
| 156 | |
| 157 | } // end namespace storage |
| 158 | |
| 159 | /// Fills in Symtab and StrtabBuilder with a valid symbol and string table for |
| 160 | /// Mods. |
| 161 | Error build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, |
| 162 | StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc); |
| 163 | |
| 164 | /// This represents a symbol that has been read from a storage::Symbol and |
| 165 | /// possibly a storage::Uncommon. |
| 166 | struct Symbol { |
| 167 | // Copied from storage::Symbol. |
| 168 | StringRef Name, IRName; |
| 169 | int ComdatIndex; |
| 170 | uint32_t Flags; |
| 171 | |
| 172 | // Copied from storage::Uncommon. |
| 173 | uint32_t CommonSize, CommonAlign; |
| 174 | StringRef COFFWeakExternFallbackName; |
| 175 | StringRef SectionName; |
| 176 | |
| 177 | /// Returns the mangled symbol name. |
| 178 | StringRef getName() const { return Name; } |
| 179 | |
| 180 | /// Returns the unmangled symbol name, or the empty string if this is not an |
| 181 | /// IR symbol. |
| 182 | StringRef getIRName() const { return IRName; } |
| 183 | |
| 184 | /// Returns the index into the comdat table (see Reader::getComdatTable()), or |
| 185 | /// -1 if not a comdat member. |
| 186 | int getComdatIndex() const { return ComdatIndex; } |
| 187 | |
| 188 | using S = storage::Symbol; |
| 189 | |
| 190 | GlobalValue::VisibilityTypes getVisibility() const { |
| 191 | return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3); |
| 192 | } |
| 193 | |
| 194 | bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; } |
| 195 | bool isWeak() const { return (Flags >> S::FB_weak) & 1; } |
| 196 | bool isCommon() const { return (Flags >> S::FB_common) & 1; } |
| 197 | bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; } |
| 198 | bool isUsed() const { return (Flags >> S::FB_used) & 1; } |
| 199 | bool isTLS() const { return (Flags >> S::FB_tls) & 1; } |
| 200 | |
| 201 | bool canBeOmittedFromSymbolTable() const { |
| 202 | return (Flags >> S::FB_may_omit) & 1; |
| 203 | } |
| 204 | |
| 205 | bool isGlobal() const { return (Flags >> S::FB_global) & 1; } |
| 206 | bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; } |
| 207 | bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; } |
| 208 | bool isExecutable() const { return (Flags >> S::FB_executable) & 1; } |
| 209 | |
| 210 | uint64_t getCommonSize() const { |
| 211 | assert(isCommon()); |
| 212 | return CommonSize; |
| 213 | } |
| 214 | |
| 215 | uint32_t getCommonAlignment() const { |
| 216 | assert(isCommon()); |
| 217 | return CommonAlign; |
| 218 | } |
| 219 | |
| 220 | /// COFF-specific: for weak externals, returns the name of the symbol that is |
| 221 | /// used as a fallback if the weak external remains undefined. |
| 222 | StringRef getCOFFWeakExternalFallback() const { |
| 223 | assert(isWeak() && isIndirect()); |
| 224 | return COFFWeakExternFallbackName; |
| 225 | } |
| 226 | |
| 227 | StringRef getSectionName() const { return SectionName; } |
| 228 | }; |
| 229 | |
| 230 | /// This class can be used to read a Symtab and Strtab produced by |
| 231 | /// irsymtab::build. |
| 232 | class Reader { |
| 233 | StringRef Symtab, Strtab; |
| 234 | |
| 235 | ArrayRef<storage::Module> Modules; |
| 236 | ArrayRef<storage::Comdat> Comdats; |
| 237 | ArrayRef<storage::Symbol> Symbols; |
| 238 | ArrayRef<storage::Uncommon> Uncommons; |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 239 | ArrayRef<storage::Str> DependentLibraries; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 240 | |
| 241 | StringRef str(storage::Str S) const { return S.get(Strtab); } |
| 242 | |
| 243 | template <typename T> ArrayRef<T> range(storage::Range<T> R) const { |
| 244 | return R.get(Symtab); |
| 245 | } |
| 246 | |
| 247 | const storage::Header &header() const { |
| 248 | return *reinterpret_cast<const storage::Header *>(Symtab.data()); |
| 249 | } |
| 250 | |
| 251 | public: |
| 252 | class SymbolRef; |
| 253 | |
| 254 | Reader() = default; |
| 255 | Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) { |
| 256 | Modules = range(header().Modules); |
| 257 | Comdats = range(header().Comdats); |
| 258 | Symbols = range(header().Symbols); |
| 259 | Uncommons = range(header().Uncommons); |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 260 | DependentLibraries = range(header().DependentLibraries); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 261 | } |
| 262 | |
| 263 | using symbol_range = iterator_range<object::content_iterator<SymbolRef>>; |
| 264 | |
| 265 | /// Returns the symbol table for the entire bitcode file. |
| 266 | /// The symbols enumerated by this method are ephemeral, but they can be |
| 267 | /// copied into an irsymtab::Symbol object. |
| 268 | symbol_range symbols() const; |
| 269 | |
| 270 | size_t getNumModules() const { return Modules.size(); } |
| 271 | |
| 272 | /// Returns a slice of the symbol table for the I'th module in the file. |
| 273 | /// The symbols enumerated by this method are ephemeral, but they can be |
| 274 | /// copied into an irsymtab::Symbol object. |
| 275 | symbol_range module_symbols(unsigned I) const; |
| 276 | |
| 277 | StringRef getTargetTriple() const { return str(header().TargetTriple); } |
| 278 | |
| 279 | /// Returns the source file path specified at compile time. |
| 280 | StringRef getSourceFileName() const { return str(header().SourceFileName); } |
| 281 | |
| 282 | /// Returns a table with all the comdats used by this file. |
| 283 | std::vector<StringRef> getComdatTable() const { |
| 284 | std::vector<StringRef> ComdatTable; |
| 285 | ComdatTable.reserve(Comdats.size()); |
| 286 | for (auto C : Comdats) |
| 287 | ComdatTable.push_back(str(C.Name)); |
| 288 | return ComdatTable; |
| 289 | } |
| 290 | |
| 291 | /// COFF-specific: returns linker options specified in the input file. |
| 292 | StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); } |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 293 | |
| 294 | /// Returns dependent library specifiers |
| 295 | std::vector<StringRef> getDependentLibraries() const { |
| 296 | std::vector<StringRef> Specifiers; |
| 297 | Specifiers.reserve(DependentLibraries.size()); |
| 298 | for (auto S : DependentLibraries) { |
| 299 | Specifiers.push_back(str(S)); |
| 300 | } |
| 301 | return Specifiers; |
| 302 | } |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 303 | }; |
| 304 | |
| 305 | /// Ephemeral symbols produced by Reader::symbols() and |
| 306 | /// Reader::module_symbols(). |
| 307 | class Reader::SymbolRef : public Symbol { |
| 308 | const storage::Symbol *SymI, *SymE; |
| 309 | const storage::Uncommon *UncI; |
| 310 | const Reader *R; |
| 311 | |
| 312 | void read() { |
| 313 | if (SymI == SymE) |
| 314 | return; |
| 315 | |
| 316 | Name = R->str(SymI->Name); |
| 317 | IRName = R->str(SymI->IRName); |
| 318 | ComdatIndex = SymI->ComdatIndex; |
| 319 | Flags = SymI->Flags; |
| 320 | |
| 321 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) { |
| 322 | CommonSize = UncI->CommonSize; |
| 323 | CommonAlign = UncI->CommonAlign; |
| 324 | COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName); |
| 325 | SectionName = R->str(UncI->SectionName); |
| 326 | } else |
| 327 | // Reset this field so it can be queried unconditionally for all symbols. |
| 328 | SectionName = ""; |
| 329 | } |
| 330 | |
| 331 | public: |
| 332 | SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE, |
| 333 | const storage::Uncommon *UncI, const Reader *R) |
| 334 | : SymI(SymI), SymE(SymE), UncI(UncI), R(R) { |
| 335 | read(); |
| 336 | } |
| 337 | |
| 338 | void moveNext() { |
| 339 | ++SymI; |
| 340 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) |
| 341 | ++UncI; |
| 342 | read(); |
| 343 | } |
| 344 | |
| 345 | bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; } |
| 346 | }; |
| 347 | |
| 348 | inline Reader::symbol_range Reader::symbols() const { |
| 349 | return {SymbolRef(Symbols.begin(), Symbols.end(), Uncommons.begin(), this), |
| 350 | SymbolRef(Symbols.end(), Symbols.end(), nullptr, this)}; |
| 351 | } |
| 352 | |
| 353 | inline Reader::symbol_range Reader::module_symbols(unsigned I) const { |
| 354 | const storage::Module &M = Modules[I]; |
| 355 | const storage::Symbol *MBegin = Symbols.begin() + M.Begin, |
| 356 | *MEnd = Symbols.begin() + M.End; |
| 357 | return {SymbolRef(MBegin, MEnd, Uncommons.begin() + M.UncBegin, this), |
| 358 | SymbolRef(MEnd, MEnd, nullptr, this)}; |
| 359 | } |
| 360 | |
| 361 | /// The contents of the irsymtab in a bitcode file. Any underlying data for the |
| 362 | /// irsymtab are owned by Symtab and Strtab. |
| 363 | struct FileContents { |
| 364 | SmallVector<char, 0> Symtab, Strtab; |
| 365 | Reader TheReader; |
| 366 | }; |
| 367 | |
| 368 | /// Reads the contents of a bitcode file, creating its irsymtab if necessary. |
| 369 | Expected<FileContents> readBitcode(const BitcodeFileContents &BFC); |
| 370 | |
| 371 | } // end namespace irsymtab |
| 372 | } // end namespace llvm |
| 373 | |
| 374 | #endif // LLVM_OBJECT_IRSYMTAB_H |