blob: a82791c8720ba80d707d193bb15ddc23e4922815 [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2//
Andrew Walbran16937d02019-10-22 13:54:20 +01003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01006//
7//===----------------------------------------------------------------------===//
8//
9// This header defines interfaces to read LLVM bitcode files/streams.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_BITCODE_BITCODEREADER_H
14#define LLVM_BITCODE_BITCODEREADER_H
15
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/StringRef.h"
Andrew Walbran3d2c1972020-04-07 12:24:26 +010018#include "llvm/Bitstream/BitCodes.h"
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010019#include "llvm/IR/ModuleSummaryIndex.h"
20#include "llvm/Support/Endian.h"
21#include "llvm/Support/Error.h"
22#include "llvm/Support/ErrorOr.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include <cstdint>
25#include <memory>
26#include <string>
27#include <system_error>
28#include <vector>
29namespace llvm {
30
31class LLVMContext;
32class Module;
33
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020034typedef llvm::function_ref<Optional<std::string>(StringRef)>
35 DataLayoutCallbackTy;
36
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010037 // These functions are for converting Expected/Error values to
38 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
39 // Remove these functions once no longer needed by the C and libLTO APIs.
40
41 std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
42
43 template <typename T>
44 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
45 if (!Val)
46 return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
47 return std::move(*Val);
48 }
49
50 struct BitcodeFileContents;
51
52 /// Basic information extracted from a bitcode module to be used for LTO.
53 struct BitcodeLTOInfo {
54 bool IsThinLTO;
55 bool HasSummary;
Andrew Walbran16937d02019-10-22 13:54:20 +010056 bool EnableSplitLTOUnit;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010057 };
58
59 /// Represents a module in a bitcode file.
60 class BitcodeModule {
61 // This covers the identification (if present) and module blocks.
62 ArrayRef<uint8_t> Buffer;
63 StringRef ModuleIdentifier;
64
65 // The string table used to interpret this module.
66 StringRef Strtab;
67
68 // The bitstream location of the IDENTIFICATION_BLOCK.
69 uint64_t IdentificationBit;
70
71 // The bitstream location of this module's MODULE_BLOCK.
72 uint64_t ModuleBit;
73
74 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
75 uint64_t IdentificationBit, uint64_t ModuleBit)
76 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
77 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
78
79 // Calls the ctor.
80 friend Expected<BitcodeFileContents>
81 getBitcodeFileContents(MemoryBufferRef Buffer);
82
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020083 Expected<std::unique_ptr<Module>>
84 getModuleImpl(LLVMContext &Context, bool MaterializeAll,
85 bool ShouldLazyLoadMetadata, bool IsImporting,
86 DataLayoutCallbackTy DataLayoutCallback);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010087
88 public:
89 StringRef getBuffer() const {
90 return StringRef((const char *)Buffer.begin(), Buffer.size());
91 }
92
93 StringRef getStrtab() const { return Strtab; }
94
95 StringRef getModuleIdentifier() const { return ModuleIdentifier; }
96
97 /// Read the bitcode module and prepare for lazy deserialization of function
98 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
99 /// If IsImporting is true, this module is being parsed for ThinLTO
100 /// importing into another module.
101 Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
102 bool ShouldLazyLoadMetadata,
103 bool IsImporting);
104
105 /// Read the entire bitcode module and return it.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200106 Expected<std::unique_ptr<Module>> parseModule(
107 LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback =
108 [](StringRef) { return None; });
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100109
110 /// Returns information about the module to be used for LTO: whether to
111 /// compile with ThinLTO, and whether it has a summary.
112 Expected<BitcodeLTOInfo> getLTOInfo();
113
114 /// Parse the specified bitcode buffer, returning the module summary index.
115 Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
116
117 /// Parse the specified bitcode buffer and merge its module summary index
118 /// into CombinedIndex.
119 Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
120 uint64_t ModuleId);
121 };
122
123 struct BitcodeFileContents {
124 std::vector<BitcodeModule> Mods;
125 StringRef Symtab, StrtabForSymtab;
126 };
127
128 /// Returns the contents of a bitcode file. This includes the raw contents of
129 /// the symbol table embedded in the bitcode file. Clients which require a
130 /// symbol table should prefer to use irsymtab::read instead of this function
131 /// because it creates a reader for the irsymtab and handles upgrading bitcode
132 /// files without a symbol table or with an old symbol table.
133 Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
134
135 /// Returns a list of modules in the specified bitcode buffer.
136 Expected<std::vector<BitcodeModule>>
137 getBitcodeModuleList(MemoryBufferRef Buffer);
138
139 /// Read the header of the specified bitcode buffer and prepare for lazy
140 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
141 /// lazily load metadata as well. If IsImporting is true, this module is
142 /// being parsed for ThinLTO importing into another module.
143 Expected<std::unique_ptr<Module>>
144 getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
145 bool ShouldLazyLoadMetadata = false,
146 bool IsImporting = false);
147
148 /// Like getLazyBitcodeModule, except that the module takes ownership of
149 /// the memory buffer if successful. If successful, this moves Buffer. On
150 /// error, this *does not* move Buffer. If IsImporting is true, this module is
151 /// being parsed for ThinLTO importing into another module.
152 Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
153 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
154 bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
155
156 /// Read the header of the specified bitcode buffer and extract just the
157 /// triple information. If successful, this returns a string. On error, this
158 /// returns "".
159 Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
160
161 /// Return true if \p Buffer contains a bitcode file with ObjC code (category
162 /// or class) in it.
163 Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
164
165 /// Read the header of the specified bitcode buffer and extract just the
166 /// producer string information. If successful, this returns a string. On
167 /// error, this returns "".
168 Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
169
170 /// Read the specified bitcode file, returning the module.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200171 Expected<std::unique_ptr<Module>> parseBitcodeFile(
172 MemoryBufferRef Buffer, LLVMContext &Context,
173 DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
174 return None;
175 });
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100176
177 /// Returns LTO information for the specified bitcode file.
178 Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
179
180 /// Parse the specified bitcode buffer, returning the module summary index.
181 Expected<std::unique_ptr<ModuleSummaryIndex>>
182 getModuleSummaryIndex(MemoryBufferRef Buffer);
183
184 /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
185 Error readModuleSummaryIndex(MemoryBufferRef Buffer,
186 ModuleSummaryIndex &CombinedIndex,
187 uint64_t ModuleId);
188
189 /// Parse the module summary index out of an IR file and return the module
190 /// summary index object if found, or an empty summary if not. If Path refers
191 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
192 /// this function will return nullptr.
193 Expected<std::unique_ptr<ModuleSummaryIndex>>
194 getModuleSummaryIndexForFile(StringRef Path,
195 bool IgnoreEmptyThinLTOIndexFile = false);
196
197 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
198 /// for an LLVM IR bitcode wrapper.
199 inline bool isBitcodeWrapper(const unsigned char *BufPtr,
200 const unsigned char *BufEnd) {
201 // See if you can find the hidden message in the magic bytes :-).
202 // (Hint: it's a little-endian encoding.)
203 return BufPtr != BufEnd &&
204 BufPtr[0] == 0xDE &&
205 BufPtr[1] == 0xC0 &&
206 BufPtr[2] == 0x17 &&
207 BufPtr[3] == 0x0B;
208 }
209
210 /// isRawBitcode - Return true if the given bytes are the magic bytes for
211 /// raw LLVM IR bitcode (without a wrapper).
212 inline bool isRawBitcode(const unsigned char *BufPtr,
213 const unsigned char *BufEnd) {
214 // These bytes sort of have a hidden message, but it's not in
215 // little-endian this time, and it's a little redundant.
216 return BufPtr != BufEnd &&
217 BufPtr[0] == 'B' &&
218 BufPtr[1] == 'C' &&
219 BufPtr[2] == 0xc0 &&
220 BufPtr[3] == 0xde;
221 }
222
223 /// isBitcode - Return true if the given bytes are the magic bytes for
224 /// LLVM IR bitcode, either with or without a wrapper.
225 inline bool isBitcode(const unsigned char *BufPtr,
226 const unsigned char *BufEnd) {
227 return isBitcodeWrapper(BufPtr, BufEnd) ||
228 isRawBitcode(BufPtr, BufEnd);
229 }
230
231 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
232 /// header for padding or other reasons. The format of this header is:
233 ///
234 /// struct bc_header {
235 /// uint32_t Magic; // 0x0B17C0DE
236 /// uint32_t Version; // Version, currently always 0.
237 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
238 /// uint32_t BitcodeSize; // Size of traditional bitcode file.
239 /// ... potentially other gunk ...
240 /// };
241 ///
242 /// This function is called when we find a file with a matching magic number.
243 /// In this case, skip down to the subsection of the file that is actually a
244 /// BC file.
245 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
246 /// contain the whole bitcode file.
247 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
248 const unsigned char *&BufEnd,
249 bool VerifyBufferSize) {
250 // Must contain the offset and size field!
251 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
252 return true;
253
254 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
255 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
256 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
257
258 // Verify that Offset+Size fits in the file.
259 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
260 return true;
261 BufPtr += Offset;
262 BufEnd = BufPtr+Size;
263 return false;
264 }
265
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200266 APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits);
267
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100268 const std::error_category &BitcodeErrorCategory();
269 enum class BitcodeError { CorruptedBitcode = 1 };
270 inline std::error_code make_error_code(BitcodeError E) {
271 return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
272 }
273
274} // end namespace llvm
275
276namespace std {
277
278template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
279
280} // end namespace std
281
282#endif // LLVM_BITCODE_BITCODEREADER_H