blob: d465420f3d6b3eb7c585f5e96c7cb327041bb9af [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2//
Andrew Walbran16937d02019-10-22 13:54:20 +01003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01006//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading profiling data for instrumentation
10// based PGO and coverage.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15#define LLVM_PROFILEDATA_INSTRPROFREADER_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/IR/ProfileSummary.h"
20#include "llvm/ProfileData/InstrProf.h"
21#include "llvm/Support/Endian.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/LineIterator.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/OnDiskHashTable.h"
26#include "llvm/Support/SwapByteOrder.h"
27#include <algorithm>
28#include <cassert>
29#include <cstddef>
30#include <cstdint>
31#include <iterator>
32#include <memory>
33#include <utility>
34#include <vector>
35
36namespace llvm {
37
38class InstrProfReader;
39
40/// A file format agnostic iterator over profiling data.
41class InstrProfIterator : public std::iterator<std::input_iterator_tag,
42 NamedInstrProfRecord> {
43 InstrProfReader *Reader = nullptr;
44 value_type Record;
45
46 void Increment();
47
48public:
49 InstrProfIterator() = default;
50 InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
51
52 InstrProfIterator &operator++() { Increment(); return *this; }
53 bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
54 bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
55 value_type &operator*() { return Record; }
56 value_type *operator->() { return &Record; }
57};
58
59/// Base class and interface for reading profiling data of any known instrprof
60/// format. Provides an iterator over NamedInstrProfRecords.
61class InstrProfReader {
62 instrprof_error LastError = instrprof_error::success;
63
64public:
65 InstrProfReader() = default;
66 virtual ~InstrProfReader() = default;
67
68 /// Read the header. Required before reading first record.
69 virtual Error readHeader() = 0;
70
71 /// Read a single record.
72 virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
73
74 /// Iterator over profile data.
75 InstrProfIterator begin() { return InstrProfIterator(this); }
76 InstrProfIterator end() { return InstrProfIterator(); }
77
78 virtual bool isIRLevelProfile() const = 0;
79
80 /// Return the PGO symtab. There are three different readers:
81 /// Raw, Text, and Indexed profile readers. The first two types
82 /// of readers are used only by llvm-profdata tool, while the indexed
83 /// profile reader is also used by llvm-cov tool and the compiler (
84 /// backend or frontend). Since creating PGO symtab can create
85 /// significant runtime and memory overhead (as it touches data
86 /// for the whole program), InstrProfSymtab for the indexed profile
87 /// reader should be created on demand and it is recommended to be
88 /// only used for dumping purpose with llvm-proftool, not with the
89 /// compiler.
90 virtual InstrProfSymtab &getSymtab() = 0;
91
92protected:
93 std::unique_ptr<InstrProfSymtab> Symtab;
94
95 /// Set the current error and return same.
96 Error error(instrprof_error Err) {
97 LastError = Err;
98 if (Err == instrprof_error::success)
99 return Error::success();
100 return make_error<InstrProfError>(Err);
101 }
102
103 Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
104
105 /// Clear the current error and return a successful one.
106 Error success() { return error(instrprof_error::success); }
107
108public:
109 /// Return true if the reader has finished reading the profile data.
110 bool isEOF() { return LastError == instrprof_error::eof; }
111
112 /// Return true if the reader encountered an error reading profiling data.
113 bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
114
115 /// Get the current error.
116 Error getError() {
117 if (hasError())
118 return make_error<InstrProfError>(LastError);
119 return Error::success();
120 }
121
122 /// Factory method to create an appropriately typed reader for the given
123 /// instrprof file.
124 static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
125
126 static Expected<std::unique_ptr<InstrProfReader>>
127 create(std::unique_ptr<MemoryBuffer> Buffer);
128};
129
130/// Reader for the simple text based instrprof format.
131///
132/// This format is a simple text format that's suitable for test data. Records
133/// are separated by one or more blank lines, and record fields are separated by
134/// new lines.
135///
136/// Each record consists of a function name, a function hash, a number of
137/// counters, and then each counter value, in that order.
138class TextInstrProfReader : public InstrProfReader {
139private:
140 /// The profile data file contents.
141 std::unique_ptr<MemoryBuffer> DataBuffer;
142 /// Iterator over the profile data.
143 line_iterator Line;
144 bool IsIRLevelProfile = false;
145
146 Error readValueProfileData(InstrProfRecord &Record);
147
148public:
149 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
150 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
151 TextInstrProfReader(const TextInstrProfReader &) = delete;
152 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
153
154 /// Return true if the given buffer is in text instrprof format.
155 static bool hasFormat(const MemoryBuffer &Buffer);
156
157 bool isIRLevelProfile() const override { return IsIRLevelProfile; }
158
159 /// Read the header.
160 Error readHeader() override;
161
162 /// Read a single record.
163 Error readNextRecord(NamedInstrProfRecord &Record) override;
164
165 InstrProfSymtab &getSymtab() override {
166 assert(Symtab.get());
167 return *Symtab.get();
168 }
169};
170
171/// Reader for the raw instrprof binary format from runtime.
172///
173/// This format is a raw memory dump of the instrumentation-baed profiling data
174/// from the runtime. It has no index.
175///
176/// Templated on the unsigned type whose size matches pointers on the platform
177/// that wrote the profile.
178template <class IntPtrT>
179class RawInstrProfReader : public InstrProfReader {
180private:
181 /// The profile data file contents.
182 std::unique_ptr<MemoryBuffer> DataBuffer;
183 bool ShouldSwapBytes;
184 // The value of the version field of the raw profile data header. The lower 56
185 // bits specifies the format version and the most significant 8 bits specify
186 // the variant types of the profile.
187 uint64_t Version;
188 uint64_t CountersDelta;
189 uint64_t NamesDelta;
190 const RawInstrProf::ProfileData<IntPtrT> *Data;
191 const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
192 const uint64_t *CountersStart;
193 const char *NamesStart;
194 uint64_t NamesSize;
195 // After value profile is all read, this pointer points to
196 // the header of next profile data (if exists)
197 const uint8_t *ValueDataStart;
198 uint32_t ValueKindLast;
199 uint32_t CurValueDataSize;
200
201public:
202 RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
203 : DataBuffer(std::move(DataBuffer)) {}
204 RawInstrProfReader(const RawInstrProfReader &) = delete;
205 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
206
207 static bool hasFormat(const MemoryBuffer &DataBuffer);
208 Error readHeader() override;
209 Error readNextRecord(NamedInstrProfRecord &Record) override;
210
211 bool isIRLevelProfile() const override {
212 return (Version & VARIANT_MASK_IR_PROF) != 0;
213 }
214
215 InstrProfSymtab &getSymtab() override {
216 assert(Symtab.get());
217 return *Symtab.get();
218 }
219
220private:
221 Error createSymtab(InstrProfSymtab &Symtab);
222 Error readNextHeader(const char *CurrentPos);
223 Error readHeader(const RawInstrProf::Header &Header);
224
225 template <class IntT> IntT swap(IntT Int) const {
226 return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
227 }
228
229 support::endianness getDataEndianness() const {
230 support::endianness HostEndian = getHostEndianness();
231 if (!ShouldSwapBytes)
232 return HostEndian;
233 if (HostEndian == support::little)
234 return support::big;
235 else
236 return support::little;
237 }
238
239 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
240 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
241 }
242
243 Error readName(NamedInstrProfRecord &Record);
244 Error readFuncHash(NamedInstrProfRecord &Record);
245 Error readRawCounts(InstrProfRecord &Record);
246 Error readValueProfilingData(InstrProfRecord &Record);
247 bool atEnd() const { return Data == DataEnd; }
248
249 void advanceData() {
250 Data++;
251 ValueDataStart += CurValueDataSize;
252 }
253
254 const char *getNextHeaderPos() const {
255 assert(atEnd());
256 return (const char *)ValueDataStart;
257 }
258
259 const uint64_t *getCounter(IntPtrT CounterPtr) const {
260 ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
261 return CountersStart + Offset;
262 }
263
264 StringRef getName(uint64_t NameRef) const {
265 return Symtab->getFuncName(swap(NameRef));
266 }
267};
268
269using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
270using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
271
272namespace IndexedInstrProf {
273
274enum class HashT : uint32_t;
275
276} // end namespace IndexedInstrProf
277
278/// Trait for lookups into the on-disk hash table for the binary instrprof
279/// format.
280class InstrProfLookupTrait {
281 std::vector<NamedInstrProfRecord> DataBuffer;
282 IndexedInstrProf::HashT HashType;
283 unsigned FormatVersion;
284 // Endianness of the input value profile data.
285 // It should be LE by default, but can be changed
286 // for testing purpose.
287 support::endianness ValueProfDataEndianness = support::little;
288
289public:
290 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
291 : HashType(HashType), FormatVersion(FormatVersion) {}
292
293 using data_type = ArrayRef<NamedInstrProfRecord>;
294
295 using internal_key_type = StringRef;
296 using external_key_type = StringRef;
297 using hash_value_type = uint64_t;
298 using offset_type = uint64_t;
299
300 static bool EqualKey(StringRef A, StringRef B) { return A == B; }
301 static StringRef GetInternalKey(StringRef K) { return K; }
302 static StringRef GetExternalKey(StringRef K) { return K; }
303
304 hash_value_type ComputeHash(StringRef K);
305
306 static std::pair<offset_type, offset_type>
307 ReadKeyDataLength(const unsigned char *&D) {
308 using namespace support;
309
310 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
311 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
312 return std::make_pair(KeyLen, DataLen);
313 }
314
315 StringRef ReadKey(const unsigned char *D, offset_type N) {
316 return StringRef((const char *)D, N);
317 }
318
319 bool readValueProfilingData(const unsigned char *&D,
320 const unsigned char *const End);
321 data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
322
323 // Used for testing purpose only.
324 void setValueProfDataEndianness(support::endianness Endianness) {
325 ValueProfDataEndianness = Endianness;
326 }
327};
328
329struct InstrProfReaderIndexBase {
330 virtual ~InstrProfReaderIndexBase() = default;
331
332 // Read all the profile records with the same key pointed to the current
333 // iterator.
334 virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
335
336 // Read all the profile records with the key equal to FuncName
337 virtual Error getRecords(StringRef FuncName,
338 ArrayRef<NamedInstrProfRecord> &Data) = 0;
339 virtual void advanceToNextKey() = 0;
340 virtual bool atEnd() const = 0;
341 virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
342 virtual uint64_t getVersion() const = 0;
343 virtual bool isIRLevelProfile() const = 0;
344 virtual Error populateSymtab(InstrProfSymtab &) = 0;
345};
346
347using OnDiskHashTableImplV3 =
348 OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
349
350template <typename HashTableImpl>
Andrew Walbran16937d02019-10-22 13:54:20 +0100351class InstrProfReaderItaniumRemapper;
352
353template <typename HashTableImpl>
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100354class InstrProfReaderIndex : public InstrProfReaderIndexBase {
355private:
356 std::unique_ptr<HashTableImpl> HashTable;
357 typename HashTableImpl::data_iterator RecordIterator;
358 uint64_t FormatVersion;
359
Andrew Walbran16937d02019-10-22 13:54:20 +0100360 friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
361
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100362public:
363 InstrProfReaderIndex(const unsigned char *Buckets,
364 const unsigned char *const Payload,
365 const unsigned char *const Base,
366 IndexedInstrProf::HashT HashType, uint64_t Version);
367 ~InstrProfReaderIndex() override = default;
368
369 Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
370 Error getRecords(StringRef FuncName,
371 ArrayRef<NamedInstrProfRecord> &Data) override;
372 void advanceToNextKey() override { RecordIterator++; }
373
374 bool atEnd() const override {
375 return RecordIterator == HashTable->data_end();
376 }
377
378 void setValueProfDataEndianness(support::endianness Endianness) override {
379 HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
380 }
381
382 uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
383
384 bool isIRLevelProfile() const override {
385 return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
386 }
387
388 Error populateSymtab(InstrProfSymtab &Symtab) override {
389 return Symtab.create(HashTable->keys());
390 }
391};
392
Andrew Walbran16937d02019-10-22 13:54:20 +0100393/// Name matcher supporting fuzzy matching of symbol names to names in profiles.
394class InstrProfReaderRemapper {
395public:
396 virtual ~InstrProfReaderRemapper() {}
397 virtual Error populateRemappings() { return Error::success(); }
398 virtual Error getRecords(StringRef FuncName,
399 ArrayRef<NamedInstrProfRecord> &Data) = 0;
400};
401
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100402/// Reader for the indexed binary instrprof format.
403class IndexedInstrProfReader : public InstrProfReader {
404private:
405 /// The profile data file contents.
406 std::unique_ptr<MemoryBuffer> DataBuffer;
Andrew Walbran16937d02019-10-22 13:54:20 +0100407 /// The profile remapping file contents.
408 std::unique_ptr<MemoryBuffer> RemappingBuffer;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100409 /// The index into the profile data.
410 std::unique_ptr<InstrProfReaderIndexBase> Index;
Andrew Walbran16937d02019-10-22 13:54:20 +0100411 /// The profile remapping file contents.
412 std::unique_ptr<InstrProfReaderRemapper> Remapper;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100413 /// Profile summary data.
414 std::unique_ptr<ProfileSummary> Summary;
415 // Index to the current record in the record array.
416 unsigned RecordIndex;
417
418 // Read the profile summary. Return a pointer pointing to one byte past the
419 // end of the summary data if it exists or the input \c Cur.
420 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
421 const unsigned char *Cur);
422
423public:
Andrew Walbran16937d02019-10-22 13:54:20 +0100424 IndexedInstrProfReader(
425 std::unique_ptr<MemoryBuffer> DataBuffer,
426 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
427 : DataBuffer(std::move(DataBuffer)),
428 RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100429 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
430 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
431
432 /// Return the profile version.
433 uint64_t getVersion() const { return Index->getVersion(); }
434 bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
435
436 /// Return true if the given buffer is in an indexed instrprof format.
437 static bool hasFormat(const MemoryBuffer &DataBuffer);
438
439 /// Read the file header.
440 Error readHeader() override;
441 /// Read a single record.
442 Error readNextRecord(NamedInstrProfRecord &Record) override;
443
444 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
445 Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
446 uint64_t FuncHash);
447
448 /// Fill Counts with the profile data for the given function name.
449 Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
450 std::vector<uint64_t> &Counts);
451
452 /// Return the maximum of all known function counts.
453 uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); }
454
455 /// Factory method to create an indexed reader.
456 static Expected<std::unique_ptr<IndexedInstrProfReader>>
Andrew Walbran16937d02019-10-22 13:54:20 +0100457 create(const Twine &Path, const Twine &RemappingPath = "");
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100458
459 static Expected<std::unique_ptr<IndexedInstrProfReader>>
Andrew Walbran16937d02019-10-22 13:54:20 +0100460 create(std::unique_ptr<MemoryBuffer> Buffer,
461 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100462
463 // Used for testing purpose only.
464 void setValueProfDataEndianness(support::endianness Endianness) {
465 Index->setValueProfDataEndianness(Endianness);
466 }
467
468 // See description in the base class. This interface is designed
469 // to be used by llvm-profdata (for dumping). Avoid using this when
470 // the client is the compiler.
471 InstrProfSymtab &getSymtab() override;
472 ProfileSummary &getSummary() { return *(Summary.get()); }
473};
474
475} // end namespace llvm
476
477#endif // LLVM_PROFILEDATA_INSTRPROFREADER_H