blob: 92fe825beefc80d81a1bb7a78dc0d1b6370a8ad3 [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- SampleProfReader.h - Read LLVM sample profile data -------*- C++ -*-===//
2//
Andrew Walbran16937d02019-10-22 13:54:20 +01003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01006//
7//===----------------------------------------------------------------------===//
8//
9// This file contains definitions needed for reading sample profiles.
10//
11// NOTE: If you are making changes to this file format, please remember
12// to document them in the Clang documentation at
13// tools/clang/docs/UsersManual.rst.
14//
15// Text format
16// -----------
17//
18// Sample profiles are written as ASCII text. The file is divided into
19// sections, which correspond to each of the functions executed at runtime.
20// Each section has the following format
21//
22// function1:total_samples:total_head_samples
23// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
24// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
25// ...
26// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
27// offsetA[.discriminator]: fnA:num_of_total_samples
28// offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ]
29// ...
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020030// !CFGChecksum: num
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010031//
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020032// This is a nested tree in which the indentation represents the nesting level
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010033// of the inline stack. There are no blank lines in the file. And the spacing
34// within a single line is fixed. Additional spaces will result in an error
35// while reading the file.
36//
37// Any line starting with the '#' character is completely ignored.
38//
39// Inlined calls are represented with indentation. The Inline stack is a
40// stack of source locations in which the top of the stack represents the
41// leaf function, and the bottom of the stack represents the actual
42// symbol to which the instruction belongs.
43//
44// Function names must be mangled in order for the profile loader to
45// match them in the current translation unit. The two numbers in the
46// function header specify how many total samples were accumulated in the
47// function (first number), and the total number of samples accumulated
48// in the prologue of the function (second number). This head sample
49// count provides an indicator of how frequently the function is invoked.
50//
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020051// There are three types of lines in the function body.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010052//
53// * Sampled line represents the profile information of a source location.
54// * Callsite line represents the profile information of a callsite.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020055// * Metadata line represents extra metadata of the function.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010056//
57// Each sampled line may contain several items. Some are optional (marked
58// below):
59//
60// a. Source line offset. This number represents the line number
61// in the function where the sample was collected. The line number is
62// always relative to the line where symbol of the function is
63// defined. So, if the function has its header at line 280, the offset
64// 13 is at line 293 in the file.
65//
66// Note that this offset should never be a negative number. This could
67// happen in cases like macros. The debug machinery will register the
68// line number at the point of macro expansion. So, if the macro was
69// expanded in a line before the start of the function, the profile
70// converter should emit a 0 as the offset (this means that the optimizers
71// will not be able to associate a meaningful weight to the instructions
72// in the macro).
73//
74// b. [OPTIONAL] Discriminator. This is used if the sampled program
75// was compiled with DWARF discriminator support
76// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
77// DWARF discriminators are unsigned integer values that allow the
78// compiler to distinguish between multiple execution paths on the
79// same source line location.
80//
81// For example, consider the line of code ``if (cond) foo(); else bar();``.
82// If the predicate ``cond`` is true 80% of the time, then the edge
83// into function ``foo`` should be considered to be taken most of the
84// time. But both calls to ``foo`` and ``bar`` are at the same source
85// line, so a sample count at that line is not sufficient. The
86// compiler needs to know which part of that line is taken more
87// frequently.
88//
89// This is what discriminators provide. In this case, the calls to
90// ``foo`` and ``bar`` will be at the same line, but will have
91// different discriminator values. This allows the compiler to correctly
92// set edge weights into ``foo`` and ``bar``.
93//
94// c. Number of samples. This is an integer quantity representing the
95// number of samples collected by the profiler at this source
96// location.
97//
98// d. [OPTIONAL] Potential call targets and samples. If present, this
99// line contains a call instruction. This models both direct and
100// number of samples. For example,
101//
102// 130: 7 foo:3 bar:2 baz:7
103//
104// The above means that at relative line offset 130 there is a call
105// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
106// with ``baz()`` being the relatively more frequently called target.
107//
108// Each callsite line may contain several items. Some are optional.
109//
110// a. Source line offset. This number represents the line number of the
111// callsite that is inlined in the profiled binary.
112//
113// b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line.
114//
115// c. Number of samples. This is an integer quantity representing the
116// total number of samples collected for the inlined instance at this
117// callsite
118//
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200119// Metadata line can occur in lines with one indent only, containing extra
120// information for the top-level function. Furthermore, metadata can only
121// occur after all the body samples and callsite samples.
122// Each metadata line may contain a particular type of metadata, marked by
123// the starting characters annotated with !. We process each metadata line
124// independently, hence each metadata line has to form an independent piece
125// of information that does not require cross-line reference.
126// We support the following types of metadata:
127//
128// a. CFG Checksum (a.k.a. function hash):
129// !CFGChecksum: 12345
130//
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100131//
132// Binary format
133// -------------
134//
135// This is a more compact encoding. Numbers are encoded as ULEB128 values
136// and all strings are encoded in a name table. The file is organized in
137// the following sections:
138//
139// MAGIC (uint64_t)
140// File identifier computed by function SPMagic() (0x5350524f463432ff)
141//
142// VERSION (uint32_t)
143// File format version number computed by SPVersion()
144//
145// SUMMARY
146// TOTAL_COUNT (uint64_t)
147// Total number of samples in the profile.
148// MAX_COUNT (uint64_t)
149// Maximum value of samples on a line.
150// MAX_FUNCTION_COUNT (uint64_t)
151// Maximum number of samples at function entry (head samples).
152// NUM_COUNTS (uint64_t)
153// Number of lines with samples.
154// NUM_FUNCTIONS (uint64_t)
155// Number of functions with samples.
156// NUM_DETAILED_SUMMARY_ENTRIES (size_t)
157// Number of entries in detailed summary
158// DETAILED_SUMMARY
159// A list of detailed summary entry. Each entry consists of
160// CUTOFF (uint32_t)
161// Required percentile of total sample count expressed as a fraction
162// multiplied by 1000000.
163// MIN_COUNT (uint64_t)
164// The minimum number of samples required to reach the target
165// CUTOFF.
166// NUM_COUNTS (uint64_t)
167// Number of samples to get to the desrired percentile.
168//
169// NAME TABLE
170// SIZE (uint32_t)
171// Number of entries in the name table.
172// NAMES
173// A NUL-separated list of SIZE strings.
174//
175// FUNCTION BODY (one for each uninlined function body present in the profile)
176// HEAD_SAMPLES (uint64_t) [only for top-level functions]
177// Total number of samples collected at the head (prologue) of the
178// function.
179// NOTE: This field should only be present for top-level functions
180// (i.e., not inlined into any caller). Inlined function calls
181// have no prologue, so they don't need this.
182// NAME_IDX (uint32_t)
183// Index into the name table indicating the function name.
184// SAMPLES (uint64_t)
185// Total number of samples collected in this function.
186// NRECS (uint32_t)
187// Total number of sampling records this function's profile.
188// BODY RECORDS
189// A list of NRECS entries. Each entry contains:
190// OFFSET (uint32_t)
191// Line offset from the start of the function.
192// DISCRIMINATOR (uint32_t)
193// Discriminator value (see description of discriminators
194// in the text format documentation above).
195// SAMPLES (uint64_t)
196// Number of samples collected at this location.
197// NUM_CALLS (uint32_t)
198// Number of non-inlined function calls made at this location. In the
199// case of direct calls, this number will always be 1. For indirect
200// calls (virtual functions and function pointers) this will
201// represent all the actual functions called at runtime.
202// CALL_TARGETS
203// A list of NUM_CALLS entries for each called function:
204// NAME_IDX (uint32_t)
205// Index into the name table with the callee name.
206// SAMPLES (uint64_t)
207// Number of samples collected at the call site.
208// NUM_INLINED_FUNCTIONS (uint32_t)
209// Number of callees inlined into this function.
210// INLINED FUNCTION RECORDS
211// A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
212// callees.
213// OFFSET (uint32_t)
214// Line offset from the start of the function.
215// DISCRIMINATOR (uint32_t)
216// Discriminator value (see description of discriminators
217// in the text format documentation above).
218// FUNCTION BODY
219// A FUNCTION BODY entry describing the inlined function.
220//===----------------------------------------------------------------------===//
221
222#ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H
223#define LLVM_PROFILEDATA_SAMPLEPROFREADER_H
224
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200225#include "llvm/ADT/Optional.h"
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100226#include "llvm/ADT/SmallVector.h"
227#include "llvm/ADT/StringMap.h"
228#include "llvm/ADT/StringRef.h"
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100229#include "llvm/IR/DiagnosticInfo.h"
230#include "llvm/IR/Function.h"
231#include "llvm/IR/LLVMContext.h"
232#include "llvm/IR/ProfileSummary.h"
233#include "llvm/ProfileData/GCOV.h"
234#include "llvm/ProfileData/SampleProf.h"
235#include "llvm/Support/Debug.h"
236#include "llvm/Support/ErrorOr.h"
237#include "llvm/Support/MemoryBuffer.h"
Andrew Walbran16937d02019-10-22 13:54:20 +0100238#include "llvm/Support/SymbolRemappingReader.h"
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100239#include <algorithm>
240#include <cstdint>
241#include <memory>
242#include <string>
243#include <system_error>
244#include <vector>
245
246namespace llvm {
247
248class raw_ostream;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200249class Twine;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100250
251namespace sampleprof {
252
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200253class SampleProfileReader;
254
255/// SampleProfileReaderItaniumRemapper remaps the profile data from a
256/// sample profile data reader, by applying a provided set of equivalences
257/// between components of the symbol names in the profile.
258class SampleProfileReaderItaniumRemapper {
259public:
260 SampleProfileReaderItaniumRemapper(std::unique_ptr<MemoryBuffer> B,
261 std::unique_ptr<SymbolRemappingReader> SRR,
262 SampleProfileReader &R)
263 : Buffer(std::move(B)), Remappings(std::move(SRR)), Reader(R) {
264 assert(Remappings && "Remappings cannot be nullptr");
265 }
266
267 /// Create a remapper from the given remapping file. The remapper will
268 /// be used for profile read in by Reader.
269 static ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
270 create(const std::string Filename, SampleProfileReader &Reader,
271 LLVMContext &C);
272
273 /// Create a remapper from the given Buffer. The remapper will
274 /// be used for profile read in by Reader.
275 static ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
276 create(std::unique_ptr<MemoryBuffer> &B, SampleProfileReader &Reader,
277 LLVMContext &C);
278
279 /// Apply remappings to the profile read by Reader.
280 void applyRemapping(LLVMContext &Ctx);
281
282 bool hasApplied() { return RemappingApplied; }
283
284 /// Insert function name into remapper.
285 void insert(StringRef FunctionName) { Remappings->insert(FunctionName); }
286
287 /// Query whether there is equivalent in the remapper which has been
288 /// inserted.
289 bool exist(StringRef FunctionName) {
290 return Remappings->lookup(FunctionName);
291 }
292
293 /// Return the equivalent name in the profile for \p FunctionName if
294 /// it exists.
295 Optional<StringRef> lookUpNameInProfile(StringRef FunctionName);
296
297private:
298 // The buffer holding the content read from remapping file.
299 std::unique_ptr<MemoryBuffer> Buffer;
300 std::unique_ptr<SymbolRemappingReader> Remappings;
301 // Map remapping key to the name in the profile. By looking up the
302 // key in the remapper, a given new name can be mapped to the
303 // cannonical name using the NameMap.
304 DenseMap<SymbolRemappingReader::Key, StringRef> NameMap;
305 // The Reader the remapper is servicing.
306 SampleProfileReader &Reader;
307 // Indicate whether remapping has been applied to the profile read
308 // by Reader -- by calling applyRemapping.
309 bool RemappingApplied = false;
310};
311
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100312/// Sample-based profile reader.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100313///
314/// Each profile contains sample counts for all the functions
315/// executed. Inside each function, statements are annotated with the
316/// collected samples on all the instructions associated with that
317/// statement.
318///
319/// For this to produce meaningful data, the program needs to be
320/// compiled with some debug information (at minimum, line numbers:
321/// -gline-tables-only). Otherwise, it will be impossible to match IR
322/// instructions to the line numbers collected by the profiler.
323///
324/// From the profile file, we are interested in collecting the
325/// following information:
326///
327/// * A list of functions included in the profile (mangled names).
328///
329/// * For each function F:
330/// 1. The total number of samples collected in F.
331///
332/// 2. The samples collected at each line in F. To provide some
333/// protection against source code shuffling, line numbers should
334/// be relative to the start of the function.
335///
336/// The reader supports two file formats: text and binary. The text format
337/// is useful for debugging and testing, while the binary format is more
338/// compact and I/O efficient. They can both be used interchangeably.
339class SampleProfileReader {
340public:
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100341 SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
342 SampleProfileFormat Format = SPF_None)
343 : Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {}
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100344
345 virtual ~SampleProfileReader() = default;
346
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100347 /// Read and validate the file header.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100348 virtual std::error_code readHeader() = 0;
349
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200350 /// The interface to read sample profiles from the associated file.
351 std::error_code read() {
352 if (std::error_code EC = readImpl())
353 return EC;
354 if (Remapper)
355 Remapper->applyRemapping(Ctx);
356 FunctionSamples::UseMD5 = useMD5();
357 return sampleprof_error::success;
358 }
359
360 /// The implementaion to read sample profiles from the associated file.
361 virtual std::error_code readImpl() = 0;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100362
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100363 /// Print the profile for \p FName on stream \p OS.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100364 void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs());
365
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200366 virtual void collectFuncsFrom(const Module &M) {}
Andrew Scull0372a572018-11-16 15:47:06 +0000367
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100368 /// Print all the profiles on stream \p OS.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100369 void dump(raw_ostream &OS = dbgs());
370
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100371 /// Return the samples collected for function \p F.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100372 FunctionSamples *getSamplesFor(const Function &F) {
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100373 // The function name may have been updated by adding suffix. Call
374 // a helper to (optionally) strip off suffixes so that we can
375 // match against the original function name in the profile.
376 StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
377 return getSamplesFor(CanonName);
Andrew Walbran16937d02019-10-22 13:54:20 +0100378 }
379
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200380 /// Return the samples collected for function \p F, create empty
381 /// FunctionSamples if it doesn't exist.
382 FunctionSamples *getOrCreateSamplesFor(const Function &F) {
383 std::string FGUID;
384 StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
385 CanonName = getRepInFormat(CanonName, useMD5(), FGUID);
386 return &Profiles[CanonName];
387 }
388
Andrew Walbran16937d02019-10-22 13:54:20 +0100389 /// Return the samples collected for function \p F.
390 virtual FunctionSamples *getSamplesFor(StringRef Fname) {
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100391 std::string FGUID;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200392 Fname = getRepInFormat(Fname, useMD5(), FGUID);
Andrew Walbran16937d02019-10-22 13:54:20 +0100393 auto It = Profiles.find(Fname);
394 if (It != Profiles.end())
395 return &It->second;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200396
397 if (Remapper) {
398 if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) {
399 auto It = Profiles.find(*NameInProfile);
400 if (It != Profiles.end())
401 return &It->second;
402 }
403 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100404 return nullptr;
405 }
406
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100407 /// Return all the profiles.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100408 StringMap<FunctionSamples> &getProfiles() { return Profiles; }
409
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100410 /// Report a parse error message.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200411 void reportError(int64_t LineNumber, const Twine &Msg) const {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100412 Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(),
413 LineNumber, Msg));
414 }
415
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100416 /// Create a sample profile reader appropriate to the file format.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200417 /// Create a remapper underlying if RemapFilename is not empty.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100418 static ErrorOr<std::unique_ptr<SampleProfileReader>>
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200419 create(const std::string Filename, LLVMContext &C,
420 const std::string RemapFilename = "");
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100421
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100422 /// Create a sample profile reader from the supplied memory buffer.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200423 /// Create a remapper underlying if RemapFilename is not empty.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100424 static ErrorOr<std::unique_ptr<SampleProfileReader>>
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200425 create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
426 const std::string RemapFilename = "");
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100427
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100428 /// Return the profile summary.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200429 ProfileSummary &getSummary() const { return *(Summary.get()); }
430
431 MemoryBuffer *getBuffer() const { return Buffer.get(); }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100432
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100433 /// \brief Return the profile format.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200434 SampleProfileFormat getFormat() const { return Format; }
435
436 /// Whether input profile is based on pseudo probes.
437 bool profileIsProbeBased() const { return ProfileIsProbeBased; }
438
439 /// Whether input profile is fully context-sensitive
440 bool profileIsCS() const { return ProfileIsCS; }
441
442 virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() {
443 return nullptr;
444 };
445
446 /// It includes all the names that have samples either in outline instance
447 /// or inline instance.
448 virtual std::vector<StringRef> *getNameTable() { return nullptr; }
449 virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; };
450
451 /// Return whether names in the profile are all MD5 numbers.
452 virtual bool useMD5() { return false; }
453
454 SampleProfileReaderItaniumRemapper *getRemapper() { return Remapper.get(); }
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100455
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100456protected:
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100457 /// Map every function to its associated profile.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100458 ///
459 /// The profile of every function executed at runtime is collected
460 /// in the structure FunctionSamples. This maps function objects
461 /// to their corresponding profiles.
462 StringMap<FunctionSamples> Profiles;
463
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100464 /// LLVM context used to emit diagnostics.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100465 LLVMContext &Ctx;
466
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100467 /// Memory buffer holding the profile file.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100468 std::unique_ptr<MemoryBuffer> Buffer;
469
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100470 /// Profile summary information.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100471 std::unique_ptr<ProfileSummary> Summary;
472
Andrew Walbran16937d02019-10-22 13:54:20 +0100473 /// Take ownership of the summary of this reader.
474 static std::unique_ptr<ProfileSummary>
475 takeSummary(SampleProfileReader &Reader) {
476 return std::move(Reader.Summary);
477 }
478
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100479 /// Compute summary for this profile.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100480 void computeSummary();
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100481
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200482 std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
483
484 /// \brief Whether samples are collected based on pseudo probes.
485 bool ProfileIsProbeBased = false;
486
487 bool ProfileIsCS = false;
488
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100489 /// \brief The format of sample.
490 SampleProfileFormat Format = SPF_None;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100491};
492
493class SampleProfileReaderText : public SampleProfileReader {
494public:
495 SampleProfileReaderText(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100496 : SampleProfileReader(std::move(B), C, SPF_Text) {}
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100497
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100498 /// Read and validate the file header.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100499 std::error_code readHeader() override { return sampleprof_error::success; }
500
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100501 /// Read sample profiles from the associated file.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200502 std::error_code readImpl() override;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100503
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100504 /// Return true if \p Buffer is in the format supported by this class.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100505 static bool hasFormat(const MemoryBuffer &Buffer);
506};
507
508class SampleProfileReaderBinary : public SampleProfileReader {
509public:
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100510 SampleProfileReaderBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
511 SampleProfileFormat Format = SPF_None)
512 : SampleProfileReader(std::move(B), C, Format) {}
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100513
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100514 /// Read and validate the file header.
Andrew Scull0372a572018-11-16 15:47:06 +0000515 virtual std::error_code readHeader() override;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100516
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100517 /// Read sample profiles from the associated file.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200518 std::error_code readImpl() override;
519
520 /// It includes all the names that have samples either in outline instance
521 /// or inline instance.
522 virtual std::vector<StringRef> *getNameTable() override { return &NameTable; }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100523
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100524protected:
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100525 /// Read a numeric value of type T from the profile.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100526 ///
527 /// If an error occurs during decoding, a diagnostic message is emitted and
528 /// EC is set.
529 ///
530 /// \returns the read value.
531 template <typename T> ErrorOr<T> readNumber();
532
Andrew Scull0372a572018-11-16 15:47:06 +0000533 /// Read a numeric value of type T from the profile. The value is saved
534 /// without encoded.
535 template <typename T> ErrorOr<T> readUnencodedNumber();
536
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100537 /// Read a string from the profile.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100538 ///
539 /// If an error occurs during decoding, a diagnostic message is emitted and
540 /// EC is set.
541 ///
542 /// \returns the read value.
543 ErrorOr<StringRef> readString();
544
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100545 /// Read the string index and check whether it overflows the table.
546 template <typename T> inline ErrorOr<uint32_t> readStringIndex(T &Table);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100547
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100548 /// Return true if we've reached the end of file.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100549 bool at_eof() const { return Data >= End; }
550
Andrew Scull0372a572018-11-16 15:47:06 +0000551 /// Read the next function profile instance.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200552 std::error_code readFuncProfile(const uint8_t *Start);
Andrew Scull0372a572018-11-16 15:47:06 +0000553
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100554 /// Read the contents of the given profile instance.
555 std::error_code readProfile(FunctionSamples &FProfile);
556
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200557 /// Read the contents of Magic number and Version number.
558 std::error_code readMagicIdent();
559
560 /// Read profile summary.
561 std::error_code readSummary();
562
563 /// Read the whole name table.
564 virtual std::error_code readNameTable();
565
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100566 /// Points to the current location in the buffer.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100567 const uint8_t *Data = nullptr;
568
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100569 /// Points to the end of the buffer.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100570 const uint8_t *End = nullptr;
571
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200572 /// Function name table.
573 std::vector<StringRef> NameTable;
574
575 /// Read a string indirectly via the name table.
576 virtual ErrorOr<StringRef> readStringFromTable();
577
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100578private:
579 std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100580 virtual std::error_code verifySPMagic(uint64_t Magic) = 0;
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100581};
582
583class SampleProfileReaderRawBinary : public SampleProfileReaderBinary {
584private:
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100585 virtual std::error_code verifySPMagic(uint64_t Magic) override;
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100586
587public:
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200588 SampleProfileReaderRawBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
589 SampleProfileFormat Format = SPF_Binary)
590 : SampleProfileReaderBinary(std::move(B), C, Format) {}
591
592 /// \brief Return true if \p Buffer is in the format supported by this class.
593 static bool hasFormat(const MemoryBuffer &Buffer);
594};
595
596/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase defines
597/// the basic structure of the extensible binary format.
598/// The format is organized in sections except the magic and version number
599/// at the beginning. There is a section table before all the sections, and
600/// each entry in the table describes the entry type, start, size and
601/// attributes. The format in each section is defined by the section itself.
602///
603/// It is easy to add a new section while maintaining the backward
604/// compatibility of the profile. Nothing extra needs to be done. If we want
605/// to extend an existing section, like add cache misses information in
606/// addition to the sample count in the profile body, we can add a new section
607/// with the extension and retire the existing section, and we could choose
608/// to keep the parser of the old section if we want the reader to be able
609/// to read both new and old format profile.
610///
611/// SampleProfileReaderExtBinary/SampleProfileWriterExtBinary define the
612/// commonly used sections of a profile in extensible binary format. It is
613/// possible to define other types of profile inherited from
614/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase.
615class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
616private:
617 std::error_code decompressSection(const uint8_t *SecStart,
618 const uint64_t SecSize,
619 const uint8_t *&DecompressBuf,
620 uint64_t &DecompressBufSize);
621
622 BumpPtrAllocator Allocator;
623
624protected:
625 std::vector<SecHdrTableEntry> SecHdrTable;
626 std::error_code readSecHdrTableEntry(uint32_t Idx);
627 std::error_code readSecHdrTable();
628
629 std::error_code readFuncMetadata();
630 std::error_code readFuncOffsetTable();
631 std::error_code readFuncProfiles();
632 std::error_code readMD5NameTable();
633 std::error_code readNameTableSec(bool IsMD5);
634 std::error_code readProfileSymbolList();
635
636 virtual std::error_code readHeader() override;
637 virtual std::error_code verifySPMagic(uint64_t Magic) override = 0;
638 virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size,
639 const SecHdrTableEntry &Entry);
640 // placeholder for subclasses to dispatch their own section readers.
641 virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0;
642 virtual ErrorOr<StringRef> readStringFromTable() override;
643
644 std::unique_ptr<ProfileSymbolList> ProfSymList;
645
646 /// The table mapping from function name to the offset of its FunctionSample
647 /// towards file start.
648 DenseMap<StringRef, uint64_t> FuncOffsetTable;
649 /// The set containing the functions to use when compiling a module.
650 DenseSet<StringRef> FuncsToUse;
651 /// Use all functions from the input profile.
652 bool UseAllFuncs = true;
653
654 /// Use fixed length MD5 instead of ULEB128 encoding so NameTable doesn't
655 /// need to be read in up front and can be directly accessed using index.
656 bool FixedLengthMD5 = false;
657 /// The starting address of NameTable containing fixed length MD5.
658 const uint8_t *MD5NameMemStart = nullptr;
659
660 /// If MD5 is used in NameTable section, the section saves uint64_t data.
661 /// The uint64_t data has to be converted to a string and then the string
662 /// will be used to initialize StringRef in NameTable.
663 /// Note NameTable contains StringRef so it needs another buffer to own
664 /// the string data. MD5StringBuf serves as the string buffer that is
665 /// referenced by NameTable (vector of StringRef). We make sure
666 /// the lifetime of MD5StringBuf is not shorter than that of NameTable.
667 std::unique_ptr<std::vector<std::string>> MD5StringBuf;
668
669public:
670 SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
671 LLVMContext &C, SampleProfileFormat Format)
672 : SampleProfileReaderBinary(std::move(B), C, Format) {}
673
674 /// Read sample profiles in extensible format from the associated file.
675 std::error_code readImpl() override;
676
677 /// Get the total size of all \p Type sections.
678 uint64_t getSectionSize(SecType Type);
679 /// Get the total size of header and all sections.
680 uint64_t getFileSize();
681 virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) override;
682
683 /// Collect functions with definitions in Module \p M.
684 void collectFuncsFrom(const Module &M) override;
685
686 /// Return whether names in the profile are all MD5 numbers.
687 virtual bool useMD5() override { return MD5StringBuf.get(); }
688
689 virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
690 return std::move(ProfSymList);
691 };
692};
693
694class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
695private:
696 virtual std::error_code verifySPMagic(uint64_t Magic) override;
697 virtual std::error_code
698 readCustomSection(const SecHdrTableEntry &Entry) override {
699 return sampleprof_error::success;
700 };
701
702public:
703 SampleProfileReaderExtBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
704 SampleProfileFormat Format = SPF_Ext_Binary)
705 : SampleProfileReaderExtBinaryBase(std::move(B), C, Format) {}
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100706
707 /// \brief Return true if \p Buffer is in the format supported by this class.
708 static bool hasFormat(const MemoryBuffer &Buffer);
709};
710
711class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary {
712private:
713 /// Function name table.
714 std::vector<std::string> NameTable;
Andrew Scull0372a572018-11-16 15:47:06 +0000715 /// The table mapping from function name to the offset of its FunctionSample
716 /// towards file start.
717 DenseMap<StringRef, uint64_t> FuncOffsetTable;
718 /// The set containing the functions to use when compiling a module.
719 DenseSet<StringRef> FuncsToUse;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200720 /// Use all functions from the input profile.
721 bool UseAllFuncs = true;
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100722 virtual std::error_code verifySPMagic(uint64_t Magic) override;
723 virtual std::error_code readNameTable() override;
724 /// Read a string indirectly via the name table.
725 virtual ErrorOr<StringRef> readStringFromTable() override;
Andrew Scull0372a572018-11-16 15:47:06 +0000726 virtual std::error_code readHeader() override;
727 std::error_code readFuncOffsetTable();
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100728
729public:
730 SampleProfileReaderCompactBinary(std::unique_ptr<MemoryBuffer> B,
731 LLVMContext &C)
732 : SampleProfileReaderBinary(std::move(B), C, SPF_Compact_Binary) {}
733
734 /// \brief Return true if \p Buffer is in the format supported by this class.
735 static bool hasFormat(const MemoryBuffer &Buffer);
Andrew Scull0372a572018-11-16 15:47:06 +0000736
737 /// Read samples only for functions to use.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200738 std::error_code readImpl() override;
Andrew Scull0372a572018-11-16 15:47:06 +0000739
740 /// Collect functions to be used when compiling Module \p M.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200741 void collectFuncsFrom(const Module &M) override;
742
743 /// Return whether names in the profile are all MD5 numbers.
744 virtual bool useMD5() override { return true; }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100745};
746
747using InlineCallStack = SmallVector<FunctionSamples *, 10>;
748
749// Supported histogram types in GCC. Currently, we only need support for
750// call target histograms.
751enum HistType {
752 HIST_TYPE_INTERVAL,
753 HIST_TYPE_POW2,
754 HIST_TYPE_SINGLE_VALUE,
755 HIST_TYPE_CONST_DELTA,
756 HIST_TYPE_INDIR_CALL,
757 HIST_TYPE_AVERAGE,
758 HIST_TYPE_IOR,
759 HIST_TYPE_INDIR_CALL_TOPN
760};
761
762class SampleProfileReaderGCC : public SampleProfileReader {
763public:
764 SampleProfileReaderGCC(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100765 : SampleProfileReader(std::move(B), C, SPF_GCC),
766 GcovBuffer(Buffer.get()) {}
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100767
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100768 /// Read and validate the file header.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100769 std::error_code readHeader() override;
770
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100771 /// Read sample profiles from the associated file.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200772 std::error_code readImpl() override;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100773
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100774 /// Return true if \p Buffer is in the format supported by this class.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100775 static bool hasFormat(const MemoryBuffer &Buffer);
776
777protected:
778 std::error_code readNameTable();
779 std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack,
780 bool Update, uint32_t Offset);
781 std::error_code readFunctionProfiles();
782 std::error_code skipNextWord();
783 template <typename T> ErrorOr<T> readNumber();
784 ErrorOr<StringRef> readString();
785
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100786 /// Read the section tag and check that it's the same as \p Expected.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100787 std::error_code readSectionTag(uint32_t Expected);
788
789 /// GCOV buffer containing the profile.
790 GCOVBuffer GcovBuffer;
791
792 /// Function names in this profile.
793 std::vector<std::string> Names;
794
795 /// GCOV tags used to separate sections in the profile file.
796 static const uint32_t GCOVTagAFDOFileNames = 0xaa000000;
797 static const uint32_t GCOVTagAFDOFunction = 0xac000000;
798};
799
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100800} // end namespace sampleprof
801
802} // end namespace llvm
803
804#endif // LLVM_PROFILEDATA_SAMPLEPROFREADER_H