Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1 | //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===// |
| 2 | // |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This header defines interfaces to read LLVM bitcode files/streams. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef LLVM_BITCODE_BITCODEREADER_H |
| 14 | #define LLVM_BITCODE_BITCODEREADER_H |
| 15 | |
| 16 | #include "llvm/ADT/ArrayRef.h" |
| 17 | #include "llvm/ADT/StringRef.h" |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 18 | #include "llvm/Bitstream/BitCodes.h" |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 19 | #include "llvm/IR/ModuleSummaryIndex.h" |
| 20 | #include "llvm/Support/Endian.h" |
| 21 | #include "llvm/Support/Error.h" |
| 22 | #include "llvm/Support/ErrorOr.h" |
| 23 | #include "llvm/Support/MemoryBuffer.h" |
| 24 | #include <cstdint> |
| 25 | #include <memory> |
| 26 | #include <string> |
| 27 | #include <system_error> |
| 28 | #include <vector> |
| 29 | namespace llvm { |
| 30 | |
| 31 | class LLVMContext; |
| 32 | class Module; |
| 33 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame^] | 34 | typedef llvm::function_ref<Optional<std::string>(StringRef)> |
| 35 | DataLayoutCallbackTy; |
| 36 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 37 | // These functions are for converting Expected/Error values to |
| 38 | // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: |
| 39 | // Remove these functions once no longer needed by the C and libLTO APIs. |
| 40 | |
| 41 | std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); |
| 42 | |
| 43 | template <typename T> |
| 44 | ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { |
| 45 | if (!Val) |
| 46 | return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); |
| 47 | return std::move(*Val); |
| 48 | } |
| 49 | |
| 50 | struct BitcodeFileContents; |
| 51 | |
| 52 | /// Basic information extracted from a bitcode module to be used for LTO. |
| 53 | struct BitcodeLTOInfo { |
| 54 | bool IsThinLTO; |
| 55 | bool HasSummary; |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 56 | bool EnableSplitLTOUnit; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 57 | }; |
| 58 | |
| 59 | /// Represents a module in a bitcode file. |
| 60 | class BitcodeModule { |
| 61 | // This covers the identification (if present) and module blocks. |
| 62 | ArrayRef<uint8_t> Buffer; |
| 63 | StringRef ModuleIdentifier; |
| 64 | |
| 65 | // The string table used to interpret this module. |
| 66 | StringRef Strtab; |
| 67 | |
| 68 | // The bitstream location of the IDENTIFICATION_BLOCK. |
| 69 | uint64_t IdentificationBit; |
| 70 | |
| 71 | // The bitstream location of this module's MODULE_BLOCK. |
| 72 | uint64_t ModuleBit; |
| 73 | |
| 74 | BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, |
| 75 | uint64_t IdentificationBit, uint64_t ModuleBit) |
| 76 | : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), |
| 77 | IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} |
| 78 | |
| 79 | // Calls the ctor. |
| 80 | friend Expected<BitcodeFileContents> |
| 81 | getBitcodeFileContents(MemoryBufferRef Buffer); |
| 82 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame^] | 83 | Expected<std::unique_ptr<Module>> |
| 84 | getModuleImpl(LLVMContext &Context, bool MaterializeAll, |
| 85 | bool ShouldLazyLoadMetadata, bool IsImporting, |
| 86 | DataLayoutCallbackTy DataLayoutCallback); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 87 | |
| 88 | public: |
| 89 | StringRef getBuffer() const { |
| 90 | return StringRef((const char *)Buffer.begin(), Buffer.size()); |
| 91 | } |
| 92 | |
| 93 | StringRef getStrtab() const { return Strtab; } |
| 94 | |
| 95 | StringRef getModuleIdentifier() const { return ModuleIdentifier; } |
| 96 | |
| 97 | /// Read the bitcode module and prepare for lazy deserialization of function |
| 98 | /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. |
| 99 | /// If IsImporting is true, this module is being parsed for ThinLTO |
| 100 | /// importing into another module. |
| 101 | Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context, |
| 102 | bool ShouldLazyLoadMetadata, |
| 103 | bool IsImporting); |
| 104 | |
| 105 | /// Read the entire bitcode module and return it. |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame^] | 106 | Expected<std::unique_ptr<Module>> parseModule( |
| 107 | LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback = |
| 108 | [](StringRef) { return None; }); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 109 | |
| 110 | /// Returns information about the module to be used for LTO: whether to |
| 111 | /// compile with ThinLTO, and whether it has a summary. |
| 112 | Expected<BitcodeLTOInfo> getLTOInfo(); |
| 113 | |
| 114 | /// Parse the specified bitcode buffer, returning the module summary index. |
| 115 | Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); |
| 116 | |
| 117 | /// Parse the specified bitcode buffer and merge its module summary index |
| 118 | /// into CombinedIndex. |
| 119 | Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, |
| 120 | uint64_t ModuleId); |
| 121 | }; |
| 122 | |
| 123 | struct BitcodeFileContents { |
| 124 | std::vector<BitcodeModule> Mods; |
| 125 | StringRef Symtab, StrtabForSymtab; |
| 126 | }; |
| 127 | |
| 128 | /// Returns the contents of a bitcode file. This includes the raw contents of |
| 129 | /// the symbol table embedded in the bitcode file. Clients which require a |
| 130 | /// symbol table should prefer to use irsymtab::read instead of this function |
| 131 | /// because it creates a reader for the irsymtab and handles upgrading bitcode |
| 132 | /// files without a symbol table or with an old symbol table. |
| 133 | Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer); |
| 134 | |
| 135 | /// Returns a list of modules in the specified bitcode buffer. |
| 136 | Expected<std::vector<BitcodeModule>> |
| 137 | getBitcodeModuleList(MemoryBufferRef Buffer); |
| 138 | |
| 139 | /// Read the header of the specified bitcode buffer and prepare for lazy |
| 140 | /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, |
| 141 | /// lazily load metadata as well. If IsImporting is true, this module is |
| 142 | /// being parsed for ThinLTO importing into another module. |
| 143 | Expected<std::unique_ptr<Module>> |
| 144 | getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, |
| 145 | bool ShouldLazyLoadMetadata = false, |
| 146 | bool IsImporting = false); |
| 147 | |
| 148 | /// Like getLazyBitcodeModule, except that the module takes ownership of |
| 149 | /// the memory buffer if successful. If successful, this moves Buffer. On |
| 150 | /// error, this *does not* move Buffer. If IsImporting is true, this module is |
| 151 | /// being parsed for ThinLTO importing into another module. |
| 152 | Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( |
| 153 | std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, |
| 154 | bool ShouldLazyLoadMetadata = false, bool IsImporting = false); |
| 155 | |
| 156 | /// Read the header of the specified bitcode buffer and extract just the |
| 157 | /// triple information. If successful, this returns a string. On error, this |
| 158 | /// returns "". |
| 159 | Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); |
| 160 | |
| 161 | /// Return true if \p Buffer contains a bitcode file with ObjC code (category |
| 162 | /// or class) in it. |
| 163 | Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); |
| 164 | |
| 165 | /// Read the header of the specified bitcode buffer and extract just the |
| 166 | /// producer string information. If successful, this returns a string. On |
| 167 | /// error, this returns "". |
| 168 | Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); |
| 169 | |
| 170 | /// Read the specified bitcode file, returning the module. |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame^] | 171 | Expected<std::unique_ptr<Module>> parseBitcodeFile( |
| 172 | MemoryBufferRef Buffer, LLVMContext &Context, |
| 173 | DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { |
| 174 | return None; |
| 175 | }); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 176 | |
| 177 | /// Returns LTO information for the specified bitcode file. |
| 178 | Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer); |
| 179 | |
| 180 | /// Parse the specified bitcode buffer, returning the module summary index. |
| 181 | Expected<std::unique_ptr<ModuleSummaryIndex>> |
| 182 | getModuleSummaryIndex(MemoryBufferRef Buffer); |
| 183 | |
| 184 | /// Parse the specified bitcode buffer and merge the index into CombinedIndex. |
| 185 | Error readModuleSummaryIndex(MemoryBufferRef Buffer, |
| 186 | ModuleSummaryIndex &CombinedIndex, |
| 187 | uint64_t ModuleId); |
| 188 | |
| 189 | /// Parse the module summary index out of an IR file and return the module |
| 190 | /// summary index object if found, or an empty summary if not. If Path refers |
| 191 | /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then |
| 192 | /// this function will return nullptr. |
| 193 | Expected<std::unique_ptr<ModuleSummaryIndex>> |
| 194 | getModuleSummaryIndexForFile(StringRef Path, |
| 195 | bool IgnoreEmptyThinLTOIndexFile = false); |
| 196 | |
| 197 | /// isBitcodeWrapper - Return true if the given bytes are the magic bytes |
| 198 | /// for an LLVM IR bitcode wrapper. |
| 199 | inline bool isBitcodeWrapper(const unsigned char *BufPtr, |
| 200 | const unsigned char *BufEnd) { |
| 201 | // See if you can find the hidden message in the magic bytes :-). |
| 202 | // (Hint: it's a little-endian encoding.) |
| 203 | return BufPtr != BufEnd && |
| 204 | BufPtr[0] == 0xDE && |
| 205 | BufPtr[1] == 0xC0 && |
| 206 | BufPtr[2] == 0x17 && |
| 207 | BufPtr[3] == 0x0B; |
| 208 | } |
| 209 | |
| 210 | /// isRawBitcode - Return true if the given bytes are the magic bytes for |
| 211 | /// raw LLVM IR bitcode (without a wrapper). |
| 212 | inline bool isRawBitcode(const unsigned char *BufPtr, |
| 213 | const unsigned char *BufEnd) { |
| 214 | // These bytes sort of have a hidden message, but it's not in |
| 215 | // little-endian this time, and it's a little redundant. |
| 216 | return BufPtr != BufEnd && |
| 217 | BufPtr[0] == 'B' && |
| 218 | BufPtr[1] == 'C' && |
| 219 | BufPtr[2] == 0xc0 && |
| 220 | BufPtr[3] == 0xde; |
| 221 | } |
| 222 | |
| 223 | /// isBitcode - Return true if the given bytes are the magic bytes for |
| 224 | /// LLVM IR bitcode, either with or without a wrapper. |
| 225 | inline bool isBitcode(const unsigned char *BufPtr, |
| 226 | const unsigned char *BufEnd) { |
| 227 | return isBitcodeWrapper(BufPtr, BufEnd) || |
| 228 | isRawBitcode(BufPtr, BufEnd); |
| 229 | } |
| 230 | |
| 231 | /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special |
| 232 | /// header for padding or other reasons. The format of this header is: |
| 233 | /// |
| 234 | /// struct bc_header { |
| 235 | /// uint32_t Magic; // 0x0B17C0DE |
| 236 | /// uint32_t Version; // Version, currently always 0. |
| 237 | /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. |
| 238 | /// uint32_t BitcodeSize; // Size of traditional bitcode file. |
| 239 | /// ... potentially other gunk ... |
| 240 | /// }; |
| 241 | /// |
| 242 | /// This function is called when we find a file with a matching magic number. |
| 243 | /// In this case, skip down to the subsection of the file that is actually a |
| 244 | /// BC file. |
| 245 | /// If 'VerifyBufferSize' is true, check that the buffer is large enough to |
| 246 | /// contain the whole bitcode file. |
| 247 | inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, |
| 248 | const unsigned char *&BufEnd, |
| 249 | bool VerifyBufferSize) { |
| 250 | // Must contain the offset and size field! |
| 251 | if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) |
| 252 | return true; |
| 253 | |
| 254 | unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); |
| 255 | unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); |
| 256 | uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size; |
| 257 | |
| 258 | // Verify that Offset+Size fits in the file. |
| 259 | if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr)) |
| 260 | return true; |
| 261 | BufPtr += Offset; |
| 262 | BufEnd = BufPtr+Size; |
| 263 | return false; |
| 264 | } |
| 265 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame^] | 266 | APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits); |
| 267 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 268 | const std::error_category &BitcodeErrorCategory(); |
| 269 | enum class BitcodeError { CorruptedBitcode = 1 }; |
| 270 | inline std::error_code make_error_code(BitcodeError E) { |
| 271 | return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); |
| 272 | } |
| 273 | |
| 274 | } // end namespace llvm |
| 275 | |
| 276 | namespace std { |
| 277 | |
| 278 | template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {}; |
| 279 | |
| 280 | } // end namespace std |
| 281 | |
| 282 | #endif // LLVM_BITCODE_BITCODEREADER_H |