blob: 2a4eac77d94652a27837f761d9b93f3d3aac4f32 [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001//===- GsymReader.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
10#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
11
12
13#include "llvm/ADT/ArrayRef.h"
14#include "llvm/DebugInfo/GSYM/FileEntry.h"
15#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
16#include "llvm/DebugInfo/GSYM/Header.h"
17#include "llvm/DebugInfo/GSYM/LineEntry.h"
18#include "llvm/DebugInfo/GSYM/StringTable.h"
19#include "llvm/Support/DataExtractor.h"
20#include "llvm/Support/Endian.h"
21#include "llvm/Support/ErrorOr.h"
22
23#include <inttypes.h>
24#include <memory>
25#include <stdint.h>
26#include <string>
27#include <vector>
28
29namespace llvm {
30class MemoryBuffer;
31class raw_ostream;
32
33namespace gsym {
34
35/// GsymReader is used to read GSYM data from a file or buffer.
36///
37/// This class is optimized for very quick lookups when the endianness matches
38/// the host system. The Header, address table, address info offsets, and file
39/// table is designed to be mmap'ed as read only into memory and used without
40/// any parsing needed. If the endianness doesn't match, we swap these objects
41/// and tables into GsymReader::SwappedData and then point our header and
42/// ArrayRefs to this swapped internal data.
43///
44/// GsymReader objects must use one of the static functions to create an
45/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
46
47class GsymReader {
48 GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
49 llvm::Error parse();
50
51 std::unique_ptr<MemoryBuffer> MemBuffer;
52 StringRef GsymBytes;
53 llvm::support::endianness Endian;
54 const Header *Hdr = nullptr;
55 ArrayRef<uint8_t> AddrOffsets;
56 ArrayRef<uint32_t> AddrInfoOffsets;
57 ArrayRef<FileEntry> Files;
58 StringTable StrTab;
59 /// When the GSYM file's endianness doesn't match the host system then
60 /// we must decode all data structures that need to be swapped into
61 /// local storage and set point the ArrayRef objects above to these swapped
62 /// copies.
63 struct SwappedData {
64 Header Hdr;
65 std::vector<uint8_t> AddrOffsets;
66 std::vector<uint32_t> AddrInfoOffsets;
67 std::vector<FileEntry> Files;
68 };
69 std::unique_ptr<SwappedData> Swap;
70
71public:
72 GsymReader(GsymReader &&RHS);
73 ~GsymReader();
74
75 /// Construct a GsymReader from a file on disk.
76 ///
77 /// \param Path The file path the GSYM file to read.
78 /// \returns An expected GsymReader that contains the object or an error
79 /// object that indicates reason for failing to read the GSYM.
80 static llvm::Expected<GsymReader> openFile(StringRef Path);
81
82 /// Construct a GsymReader from a buffer.
83 ///
84 /// \param Bytes A set of bytes that will be copied and owned by the
85 /// returned object on success.
86 /// \returns An expected GsymReader that contains the object or an error
87 /// object that indicates reason for failing to read the GSYM.
88 static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
89
90 /// Access the GSYM header.
91 /// \returns A native endian version of the GSYM header.
92 const Header &getHeader() const;
93
94 /// Get the full function info for an address.
95 ///
96 /// This should be called when a client will store a copy of the complete
97 /// FunctionInfo for a given address. For one off lookups, use the lookup()
98 /// function below.
99 ///
100 /// Symbolication server processes might want to parse the entire function
101 /// info for a given address and cache it if the process stays around to
102 /// service many symbolication addresses, like for parsing profiling
103 /// information.
104 ///
105 /// \param Addr A virtual address from the orignal object file to lookup.
106 ///
107 /// \returns An expected FunctionInfo that contains the function info object
108 /// or an error object that indicates reason for failing to lookup the
109 /// address.
110 llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
111
112 /// Lookup an address in the a GSYM.
113 ///
114 /// Lookup just the information needed for a specific address \a Addr. This
115 /// function is faster that calling getFunctionInfo() as it will only return
116 /// information that pertains to \a Addr and allows the parsing to skip any
117 /// extra information encoded for other addresses. For example the line table
118 /// parsing can stop when a matching LineEntry has been fouhnd, and the
119 /// InlineInfo can stop parsing early once a match has been found and also
120 /// skip information that doesn't match. This avoids memory allocations and
121 /// is much faster for lookups.
122 ///
123 /// \param Addr A virtual address from the orignal object file to lookup.
124 /// \returns An expected LookupResult that contains only the information
125 /// needed for the current address, or an error object that indicates reason
126 /// for failing to lookup the address.
127 llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
128
129 /// Get a string from the string table.
130 ///
131 /// \param Offset The string table offset for the string to retrieve.
132 /// \returns The string from the strin table.
133 StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
134
135 /// Get the a file entry for the suppplied file index.
136 ///
137 /// Used to convert any file indexes in the FunctionInfo data back into
138 /// files. This function can be used for iteration, but is more commonly used
139 /// for random access when doing lookups.
140 ///
141 /// \param Index An index into the file table.
142 /// \returns An optional FileInfo that will be valid if the file index is
143 /// valid, or llvm::None if the file index is out of bounds,
144 Optional<FileEntry> getFile(uint32_t Index) const {
145 if (Index < Files.size())
146 return Files[Index];
147 return llvm::None;
148 }
149
150 /// Dump the entire Gsym data contained in this object.
151 ///
152 /// \param OS The output stream to dump to.
153 void dump(raw_ostream &OS);
154
155 /// Dump a FunctionInfo object.
156 ///
157 /// This function will convert any string table indexes and file indexes
158 /// into human readable format.
159 ///
160 /// \param OS The output stream to dump to.
161 ///
162 /// \param FI The object to dump.
163 void dump(raw_ostream &OS, const FunctionInfo &FI);
164
165 /// Dump a LineTable object.
166 ///
167 /// This function will convert any string table indexes and file indexes
168 /// into human readable format.
169 ///
170 ///
171 /// \param OS The output stream to dump to.
172 ///
173 /// \param LT The object to dump.
174 void dump(raw_ostream &OS, const LineTable &LT);
175
176 /// Dump a InlineInfo object.
177 ///
178 /// This function will convert any string table indexes and file indexes
179 /// into human readable format.
180 ///
181 /// \param OS The output stream to dump to.
182 ///
183 /// \param II The object to dump.
184 ///
185 /// \param Indent The indentation as number of spaces. Used for recurive
186 /// dumping.
187 void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0);
188
189 /// Dump a FileEntry object.
190 ///
191 /// This function will convert any string table indexes into human readable
192 /// format.
193 ///
194 /// \param OS The output stream to dump to.
195 ///
196 /// \param FE The object to dump.
197 void dump(raw_ostream &OS, Optional<FileEntry> FE);
198
199 /// Get the number of addresses in this Gsym file.
200 uint32_t getNumAddresses() const {
201 return Hdr->NumAddresses;
202 }
203
204 /// Gets an address from the address table.
205 ///
206 /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
207 ///
208 /// \param Index A index into the address table.
209 /// \returns A resolved virtual address for adddress in the address table
210 /// or llvm::None if Index is out of bounds.
211 Optional<uint64_t> getAddress(size_t Index) const;
212
213protected:
214
215 /// Get an appropriate address info offsets array.
216 ///
217 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
218 /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
219 /// internally as a array of bytes that are in the correct endianness. When
220 /// we access this table we must get an array that matches those sizes. This
221 /// templatized helper function is used when accessing address offsets in the
222 /// AddrOffsets member variable.
223 ///
224 /// \returns An ArrayRef of an appropriate address offset size.
225 template <class T> ArrayRef<T>
226 getAddrOffsets() const {
227 return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
228 AddrOffsets.size()/sizeof(T));
229 }
230
231 /// Get an appropriate address from the address table.
232 ///
233 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
234 /// byte address offsets from the The gsym::Header::BaseAddress. The table is
235 /// stored internally as a array of bytes that are in the correct endianness.
236 /// In order to extract an address from the address table we must access the
237 /// address offset using the correct size and then add it to the BaseAddress
238 /// in the header.
239 ///
240 /// \param Index An index into the AddrOffsets array.
241 /// \returns An virtual address that matches the original object file for the
242 /// address as the specified index, or llvm::None if Index is out of bounds.
243 template <class T> Optional<uint64_t>
244 addressForIndex(size_t Index) const {
245 ArrayRef<T> AIO = getAddrOffsets<T>();
246 if (Index < AIO.size())
247 return AIO[Index] + Hdr->BaseAddress;
248 return llvm::None;
249 }
250 /// Lookup an address offset in the AddrOffsets table.
251 ///
252 /// Given an address offset, look it up using a binary search of the
253 /// AddrOffsets table.
254 ///
255 /// \param AddrOffset An address offset, that has already been computed by
256 /// subtracting the gsym::Header::BaseAddress.
257 /// \returns The matching address offset index. This index will be used to
258 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
259 template <class T>
260 llvm::Optional<uint64_t> getAddressOffsetIndex(const uint64_t AddrOffset) const {
261 ArrayRef<T> AIO = getAddrOffsets<T>();
262 const auto Begin = AIO.begin();
263 const auto End = AIO.end();
264 auto Iter = std::lower_bound(Begin, End, AddrOffset);
265 // Watch for addresses that fall between the gsym::Header::BaseAddress and
266 // the first address offset.
267 if (Iter == Begin && AddrOffset < *Begin)
268 return llvm::None;
269 if (Iter == End || AddrOffset < *Iter)
270 --Iter;
271 return std::distance(Begin, Iter);
272 }
273
274 /// Create a GSYM from a memory buffer.
275 ///
276 /// Called by both openFile() and copyBuffer(), this function does all of the
277 /// work of parsing the GSYM file and returning an error.
278 ///
279 /// \param MemBuffer A memory buffer that will transfer ownership into the
280 /// GsymReader.
281 /// \returns An expected GsymReader that contains the object or an error
282 /// object that indicates reason for failing to read the GSYM.
283 static llvm::Expected<llvm::gsym::GsymReader>
284 create(std::unique_ptr<MemoryBuffer> &MemBuffer);
285
286
287 /// Given an address, find the address index.
288 ///
289 /// Binary search the address table and find the matching address index.
290 ///
291 /// \param Addr A virtual address that matches the original object file
292 /// to lookup.
293 /// \returns An index into the address table. This index can be used to
294 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
295 /// Returns an error if the address isn't in the GSYM with details of why.
296 Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
297
298 /// Given an address index, get the offset for the FunctionInfo.
299 ///
300 /// Looking up an address is done by finding the corresponding address
301 /// index for the address. This index is then used to get the offset of the
302 /// FunctionInfo data that we will decode using this function.
303 ///
304 /// \param Index An index into the address table.
305 /// \returns An optional GSYM data offset for the offset of the FunctionInfo
306 /// that needs to be decoded.
307 Optional<uint64_t> getAddressInfoOffset(size_t Index) const;
308};
309
310} // namespace gsym
311} // namespace llvm
312
313#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H