blob: 73751faab88e6813b2e011678bae89fecf3df30b [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2//
Andrew Walbran16937d02019-10-22 13:54:20 +01003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01006//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading profiling data for instrumentation
10// based PGO and coverage.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15#define LLVM_PROFILEDATA_INSTRPROFREADER_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/IR/ProfileSummary.h"
20#include "llvm/ProfileData/InstrProf.h"
21#include "llvm/Support/Endian.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/LineIterator.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/OnDiskHashTable.h"
26#include "llvm/Support/SwapByteOrder.h"
27#include <algorithm>
28#include <cassert>
29#include <cstddef>
30#include <cstdint>
31#include <iterator>
32#include <memory>
33#include <utility>
34#include <vector>
35
36namespace llvm {
37
38class InstrProfReader;
39
40/// A file format agnostic iterator over profiling data.
41class InstrProfIterator : public std::iterator<std::input_iterator_tag,
42 NamedInstrProfRecord> {
43 InstrProfReader *Reader = nullptr;
44 value_type Record;
45
46 void Increment();
47
48public:
49 InstrProfIterator() = default;
50 InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
51
52 InstrProfIterator &operator++() { Increment(); return *this; }
53 bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
54 bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
55 value_type &operator*() { return Record; }
56 value_type *operator->() { return &Record; }
57};
58
59/// Base class and interface for reading profiling data of any known instrprof
60/// format. Provides an iterator over NamedInstrProfRecords.
61class InstrProfReader {
62 instrprof_error LastError = instrprof_error::success;
63
64public:
65 InstrProfReader() = default;
66 virtual ~InstrProfReader() = default;
67
68 /// Read the header. Required before reading first record.
69 virtual Error readHeader() = 0;
70
71 /// Read a single record.
72 virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
73
74 /// Iterator over profile data.
75 InstrProfIterator begin() { return InstrProfIterator(this); }
76 InstrProfIterator end() { return InstrProfIterator(); }
77
78 virtual bool isIRLevelProfile() const = 0;
79
Andrew Walbran3d2c1972020-04-07 12:24:26 +010080 virtual bool hasCSIRLevelProfile() const = 0;
81
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010082 /// Return the PGO symtab. There are three different readers:
83 /// Raw, Text, and Indexed profile readers. The first two types
84 /// of readers are used only by llvm-profdata tool, while the indexed
85 /// profile reader is also used by llvm-cov tool and the compiler (
86 /// backend or frontend). Since creating PGO symtab can create
87 /// significant runtime and memory overhead (as it touches data
88 /// for the whole program), InstrProfSymtab for the indexed profile
89 /// reader should be created on demand and it is recommended to be
90 /// only used for dumping purpose with llvm-proftool, not with the
91 /// compiler.
92 virtual InstrProfSymtab &getSymtab() = 0;
93
Andrew Walbran3d2c1972020-04-07 12:24:26 +010094 /// Compute the sum of counts and return in Sum.
95 void accumuateCounts(CountSumOrPercent &Sum, bool IsCS);
96
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010097protected:
98 std::unique_ptr<InstrProfSymtab> Symtab;
99
100 /// Set the current error and return same.
101 Error error(instrprof_error Err) {
102 LastError = Err;
103 if (Err == instrprof_error::success)
104 return Error::success();
105 return make_error<InstrProfError>(Err);
106 }
107
108 Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
109
110 /// Clear the current error and return a successful one.
111 Error success() { return error(instrprof_error::success); }
112
113public:
114 /// Return true if the reader has finished reading the profile data.
115 bool isEOF() { return LastError == instrprof_error::eof; }
116
117 /// Return true if the reader encountered an error reading profiling data.
118 bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
119
120 /// Get the current error.
121 Error getError() {
122 if (hasError())
123 return make_error<InstrProfError>(LastError);
124 return Error::success();
125 }
126
127 /// Factory method to create an appropriately typed reader for the given
128 /// instrprof file.
129 static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
130
131 static Expected<std::unique_ptr<InstrProfReader>>
132 create(std::unique_ptr<MemoryBuffer> Buffer);
133};
134
135/// Reader for the simple text based instrprof format.
136///
137/// This format is a simple text format that's suitable for test data. Records
138/// are separated by one or more blank lines, and record fields are separated by
139/// new lines.
140///
141/// Each record consists of a function name, a function hash, a number of
142/// counters, and then each counter value, in that order.
143class TextInstrProfReader : public InstrProfReader {
144private:
145 /// The profile data file contents.
146 std::unique_ptr<MemoryBuffer> DataBuffer;
147 /// Iterator over the profile data.
148 line_iterator Line;
149 bool IsIRLevelProfile = false;
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100150 bool HasCSIRLevelProfile = false;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100151
152 Error readValueProfileData(InstrProfRecord &Record);
153
154public:
155 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
156 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
157 TextInstrProfReader(const TextInstrProfReader &) = delete;
158 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
159
160 /// Return true if the given buffer is in text instrprof format.
161 static bool hasFormat(const MemoryBuffer &Buffer);
162
163 bool isIRLevelProfile() const override { return IsIRLevelProfile; }
164
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100165 bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
166
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100167 /// Read the header.
168 Error readHeader() override;
169
170 /// Read a single record.
171 Error readNextRecord(NamedInstrProfRecord &Record) override;
172
173 InstrProfSymtab &getSymtab() override {
174 assert(Symtab.get());
175 return *Symtab.get();
176 }
177};
178
179/// Reader for the raw instrprof binary format from runtime.
180///
181/// This format is a raw memory dump of the instrumentation-baed profiling data
182/// from the runtime. It has no index.
183///
184/// Templated on the unsigned type whose size matches pointers on the platform
185/// that wrote the profile.
186template <class IntPtrT>
187class RawInstrProfReader : public InstrProfReader {
188private:
189 /// The profile data file contents.
190 std::unique_ptr<MemoryBuffer> DataBuffer;
191 bool ShouldSwapBytes;
192 // The value of the version field of the raw profile data header. The lower 56
193 // bits specifies the format version and the most significant 8 bits specify
194 // the variant types of the profile.
195 uint64_t Version;
196 uint64_t CountersDelta;
197 uint64_t NamesDelta;
198 const RawInstrProf::ProfileData<IntPtrT> *Data;
199 const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
200 const uint64_t *CountersStart;
201 const char *NamesStart;
202 uint64_t NamesSize;
203 // After value profile is all read, this pointer points to
204 // the header of next profile data (if exists)
205 const uint8_t *ValueDataStart;
206 uint32_t ValueKindLast;
207 uint32_t CurValueDataSize;
208
209public:
210 RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
211 : DataBuffer(std::move(DataBuffer)) {}
212 RawInstrProfReader(const RawInstrProfReader &) = delete;
213 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
214
215 static bool hasFormat(const MemoryBuffer &DataBuffer);
216 Error readHeader() override;
217 Error readNextRecord(NamedInstrProfRecord &Record) override;
218
219 bool isIRLevelProfile() const override {
220 return (Version & VARIANT_MASK_IR_PROF) != 0;
221 }
222
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100223 bool hasCSIRLevelProfile() const override {
224 return (Version & VARIANT_MASK_CSIR_PROF) != 0;
225 }
226
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100227 InstrProfSymtab &getSymtab() override {
228 assert(Symtab.get());
229 return *Symtab.get();
230 }
231
232private:
233 Error createSymtab(InstrProfSymtab &Symtab);
234 Error readNextHeader(const char *CurrentPos);
235 Error readHeader(const RawInstrProf::Header &Header);
236
237 template <class IntT> IntT swap(IntT Int) const {
238 return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
239 }
240
241 support::endianness getDataEndianness() const {
242 support::endianness HostEndian = getHostEndianness();
243 if (!ShouldSwapBytes)
244 return HostEndian;
245 if (HostEndian == support::little)
246 return support::big;
247 else
248 return support::little;
249 }
250
251 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
252 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
253 }
254
255 Error readName(NamedInstrProfRecord &Record);
256 Error readFuncHash(NamedInstrProfRecord &Record);
257 Error readRawCounts(InstrProfRecord &Record);
258 Error readValueProfilingData(InstrProfRecord &Record);
259 bool atEnd() const { return Data == DataEnd; }
260
261 void advanceData() {
262 Data++;
263 ValueDataStart += CurValueDataSize;
264 }
265
266 const char *getNextHeaderPos() const {
267 assert(atEnd());
268 return (const char *)ValueDataStart;
269 }
270
271 const uint64_t *getCounter(IntPtrT CounterPtr) const {
272 ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
273 return CountersStart + Offset;
274 }
275
276 StringRef getName(uint64_t NameRef) const {
277 return Symtab->getFuncName(swap(NameRef));
278 }
279};
280
281using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
282using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
283
284namespace IndexedInstrProf {
285
286enum class HashT : uint32_t;
287
288} // end namespace IndexedInstrProf
289
290/// Trait for lookups into the on-disk hash table for the binary instrprof
291/// format.
292class InstrProfLookupTrait {
293 std::vector<NamedInstrProfRecord> DataBuffer;
294 IndexedInstrProf::HashT HashType;
295 unsigned FormatVersion;
296 // Endianness of the input value profile data.
297 // It should be LE by default, but can be changed
298 // for testing purpose.
299 support::endianness ValueProfDataEndianness = support::little;
300
301public:
302 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
303 : HashType(HashType), FormatVersion(FormatVersion) {}
304
305 using data_type = ArrayRef<NamedInstrProfRecord>;
306
307 using internal_key_type = StringRef;
308 using external_key_type = StringRef;
309 using hash_value_type = uint64_t;
310 using offset_type = uint64_t;
311
312 static bool EqualKey(StringRef A, StringRef B) { return A == B; }
313 static StringRef GetInternalKey(StringRef K) { return K; }
314 static StringRef GetExternalKey(StringRef K) { return K; }
315
316 hash_value_type ComputeHash(StringRef K);
317
318 static std::pair<offset_type, offset_type>
319 ReadKeyDataLength(const unsigned char *&D) {
320 using namespace support;
321
322 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
323 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
324 return std::make_pair(KeyLen, DataLen);
325 }
326
327 StringRef ReadKey(const unsigned char *D, offset_type N) {
328 return StringRef((const char *)D, N);
329 }
330
331 bool readValueProfilingData(const unsigned char *&D,
332 const unsigned char *const End);
333 data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
334
335 // Used for testing purpose only.
336 void setValueProfDataEndianness(support::endianness Endianness) {
337 ValueProfDataEndianness = Endianness;
338 }
339};
340
341struct InstrProfReaderIndexBase {
342 virtual ~InstrProfReaderIndexBase() = default;
343
344 // Read all the profile records with the same key pointed to the current
345 // iterator.
346 virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
347
348 // Read all the profile records with the key equal to FuncName
349 virtual Error getRecords(StringRef FuncName,
350 ArrayRef<NamedInstrProfRecord> &Data) = 0;
351 virtual void advanceToNextKey() = 0;
352 virtual bool atEnd() const = 0;
353 virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
354 virtual uint64_t getVersion() const = 0;
355 virtual bool isIRLevelProfile() const = 0;
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100356 virtual bool hasCSIRLevelProfile() const = 0;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100357 virtual Error populateSymtab(InstrProfSymtab &) = 0;
358};
359
360using OnDiskHashTableImplV3 =
361 OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
362
363template <typename HashTableImpl>
Andrew Walbran16937d02019-10-22 13:54:20 +0100364class InstrProfReaderItaniumRemapper;
365
366template <typename HashTableImpl>
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100367class InstrProfReaderIndex : public InstrProfReaderIndexBase {
368private:
369 std::unique_ptr<HashTableImpl> HashTable;
370 typename HashTableImpl::data_iterator RecordIterator;
371 uint64_t FormatVersion;
372
Andrew Walbran16937d02019-10-22 13:54:20 +0100373 friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
374
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100375public:
376 InstrProfReaderIndex(const unsigned char *Buckets,
377 const unsigned char *const Payload,
378 const unsigned char *const Base,
379 IndexedInstrProf::HashT HashType, uint64_t Version);
380 ~InstrProfReaderIndex() override = default;
381
382 Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
383 Error getRecords(StringRef FuncName,
384 ArrayRef<NamedInstrProfRecord> &Data) override;
385 void advanceToNextKey() override { RecordIterator++; }
386
387 bool atEnd() const override {
388 return RecordIterator == HashTable->data_end();
389 }
390
391 void setValueProfDataEndianness(support::endianness Endianness) override {
392 HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
393 }
394
395 uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
396
397 bool isIRLevelProfile() const override {
398 return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
399 }
400
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100401 bool hasCSIRLevelProfile() const override {
402 return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
403 }
404
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100405 Error populateSymtab(InstrProfSymtab &Symtab) override {
406 return Symtab.create(HashTable->keys());
407 }
408};
409
Andrew Walbran16937d02019-10-22 13:54:20 +0100410/// Name matcher supporting fuzzy matching of symbol names to names in profiles.
411class InstrProfReaderRemapper {
412public:
413 virtual ~InstrProfReaderRemapper() {}
414 virtual Error populateRemappings() { return Error::success(); }
415 virtual Error getRecords(StringRef FuncName,
416 ArrayRef<NamedInstrProfRecord> &Data) = 0;
417};
418
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100419/// Reader for the indexed binary instrprof format.
420class IndexedInstrProfReader : public InstrProfReader {
421private:
422 /// The profile data file contents.
423 std::unique_ptr<MemoryBuffer> DataBuffer;
Andrew Walbran16937d02019-10-22 13:54:20 +0100424 /// The profile remapping file contents.
425 std::unique_ptr<MemoryBuffer> RemappingBuffer;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100426 /// The index into the profile data.
427 std::unique_ptr<InstrProfReaderIndexBase> Index;
Andrew Walbran16937d02019-10-22 13:54:20 +0100428 /// The profile remapping file contents.
429 std::unique_ptr<InstrProfReaderRemapper> Remapper;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100430 /// Profile summary data.
431 std::unique_ptr<ProfileSummary> Summary;
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100432 /// Context sensitive profile summary data.
433 std::unique_ptr<ProfileSummary> CS_Summary;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100434 // Index to the current record in the record array.
435 unsigned RecordIndex;
436
437 // Read the profile summary. Return a pointer pointing to one byte past the
438 // end of the summary data if it exists or the input \c Cur.
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100439 // \c UseCS indicates whether to use the context-sensitive profile summary.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100440 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100441 const unsigned char *Cur, bool UseCS);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100442
443public:
Andrew Walbran16937d02019-10-22 13:54:20 +0100444 IndexedInstrProfReader(
445 std::unique_ptr<MemoryBuffer> DataBuffer,
446 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
447 : DataBuffer(std::move(DataBuffer)),
448 RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100449 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
450 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
451
452 /// Return the profile version.
453 uint64_t getVersion() const { return Index->getVersion(); }
454 bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100455 bool hasCSIRLevelProfile() const override {
456 return Index->hasCSIRLevelProfile();
457 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100458
459 /// Return true if the given buffer is in an indexed instrprof format.
460 static bool hasFormat(const MemoryBuffer &DataBuffer);
461
462 /// Read the file header.
463 Error readHeader() override;
464 /// Read a single record.
465 Error readNextRecord(NamedInstrProfRecord &Record) override;
466
467 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
468 Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
469 uint64_t FuncHash);
470
471 /// Fill Counts with the profile data for the given function name.
472 Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
473 std::vector<uint64_t> &Counts);
474
475 /// Return the maximum of all known function counts.
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100476 /// \c UseCS indicates whether to use the context-sensitive count.
477 uint64_t getMaximumFunctionCount(bool UseCS) {
478 if (UseCS) {
479 assert(CS_Summary && "No context sensitive profile summary");
480 return CS_Summary->getMaxFunctionCount();
481 } else {
482 assert(Summary && "No profile summary");
483 return Summary->getMaxFunctionCount();
484 }
485 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100486
487 /// Factory method to create an indexed reader.
488 static Expected<std::unique_ptr<IndexedInstrProfReader>>
Andrew Walbran16937d02019-10-22 13:54:20 +0100489 create(const Twine &Path, const Twine &RemappingPath = "");
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100490
491 static Expected<std::unique_ptr<IndexedInstrProfReader>>
Andrew Walbran16937d02019-10-22 13:54:20 +0100492 create(std::unique_ptr<MemoryBuffer> Buffer,
493 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100494
495 // Used for testing purpose only.
496 void setValueProfDataEndianness(support::endianness Endianness) {
497 Index->setValueProfDataEndianness(Endianness);
498 }
499
500 // See description in the base class. This interface is designed
501 // to be used by llvm-profdata (for dumping). Avoid using this when
502 // the client is the compiler.
503 InstrProfSymtab &getSymtab() override;
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100504
505 /// Return the profile summary.
506 /// \c UseCS indicates whether to use the context-sensitive summary.
507 ProfileSummary &getSummary(bool UseCS) {
508 if (UseCS) {
509 assert(CS_Summary && "No context sensitive summary");
510 return *(CS_Summary.get());
511 } else {
512 assert(Summary && "No profile summary");
513 return *(Summary.get());
514 }
515 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100516};
517
518} // end namespace llvm
519
520#endif // LLVM_PROFILEDATA_INSTRPROFREADER_H