blob: d79ef3b3a9b1eadbe87ae78c32b3f90d21cd591f [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains common definitions used in the reading and writing of
11// sample profile data.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_PROFILEDATA_SAMPLEPROF_H
16#define LLVM_PROFILEDATA_SAMPLEPROF_H
17
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/StringMap.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Function.h"
23#include "llvm/IR/GlobalValue.h"
24#include "llvm/IR/Module.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/ErrorOr.h"
27#include "llvm/Support/MathExtras.h"
28#include <algorithm>
29#include <cstdint>
30#include <map>
31#include <string>
32#include <system_error>
33#include <utility>
34
35namespace llvm {
36
37class raw_ostream;
38
39const std::error_category &sampleprof_category();
40
41enum class sampleprof_error {
42 success = 0,
43 bad_magic,
44 unsupported_version,
45 too_large,
46 truncated,
47 malformed,
48 unrecognized_format,
49 unsupported_writing_format,
50 truncated_name_table,
51 not_implemented,
52 counter_overflow
53};
54
55inline std::error_code make_error_code(sampleprof_error E) {
56 return std::error_code(static_cast<int>(E), sampleprof_category());
57}
58
59inline sampleprof_error MergeResult(sampleprof_error &Accumulator,
60 sampleprof_error Result) {
61 // Prefer first error encountered as later errors may be secondary effects of
62 // the initial problem.
63 if (Accumulator == sampleprof_error::success &&
64 Result != sampleprof_error::success)
65 Accumulator = Result;
66 return Accumulator;
67}
68
69} // end namespace llvm
70
71namespace std {
72
73template <>
74struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {};
75
76} // end namespace std
77
78namespace llvm {
79namespace sampleprof {
80
81static inline uint64_t SPMagic() {
82 return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
83 uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
84 uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) |
85 uint64_t('2') << (64 - 56) | uint64_t(0xff);
86}
87
88static inline uint64_t SPVersion() { return 103; }
89
90/// Represents the relative location of an instruction.
91///
92/// Instruction locations are specified by the line offset from the
93/// beginning of the function (marked by the line where the function
94/// header is) and the discriminator value within that line.
95///
96/// The discriminator value is useful to distinguish instructions
97/// that are on the same line but belong to different basic blocks
98/// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
99struct LineLocation {
100 LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {}
101
102 void print(raw_ostream &OS) const;
103 void dump() const;
104
105 bool operator<(const LineLocation &O) const {
106 return LineOffset < O.LineOffset ||
107 (LineOffset == O.LineOffset && Discriminator < O.Discriminator);
108 }
109
110 uint32_t LineOffset;
111 uint32_t Discriminator;
112};
113
114raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
115
116/// Representation of a single sample record.
117///
118/// A sample record is represented by a positive integer value, which
119/// indicates how frequently was the associated line location executed.
120///
121/// Additionally, if the associated location contains a function call,
122/// the record will hold a list of all the possible called targets. For
123/// direct calls, this will be the exact function being invoked. For
124/// indirect calls (function pointers, virtual table dispatch), this
125/// will be a list of one or more functions.
126class SampleRecord {
127public:
128 using CallTargetMap = StringMap<uint64_t>;
129
130 SampleRecord() = default;
131
132 /// Increment the number of samples for this record by \p S.
133 /// Optionally scale sample count \p S by \p Weight.
134 ///
135 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
136 /// around unsigned integers.
137 sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
138 bool Overflowed;
139 NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed);
140 return Overflowed ? sampleprof_error::counter_overflow
141 : sampleprof_error::success;
142 }
143
144 /// Add called function \p F with samples \p S.
145 /// Optionally scale sample count \p S by \p Weight.
146 ///
147 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
148 /// around unsigned integers.
149 sampleprof_error addCalledTarget(StringRef F, uint64_t S,
150 uint64_t Weight = 1) {
151 uint64_t &TargetSamples = CallTargets[F];
152 bool Overflowed;
153 TargetSamples =
154 SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed);
155 return Overflowed ? sampleprof_error::counter_overflow
156 : sampleprof_error::success;
157 }
158
159 /// Return true if this sample record contains function calls.
160 bool hasCalls() const { return !CallTargets.empty(); }
161
162 uint64_t getSamples() const { return NumSamples; }
163 const CallTargetMap &getCallTargets() const { return CallTargets; }
164
165 /// Merge the samples in \p Other into this record.
166 /// Optionally scale sample counts by \p Weight.
167 sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {
168 sampleprof_error Result = addSamples(Other.getSamples(), Weight);
169 for (const auto &I : Other.getCallTargets()) {
170 MergeResult(Result, addCalledTarget(I.first(), I.second, Weight));
171 }
172 return Result;
173 }
174
175 void print(raw_ostream &OS, unsigned Indent) const;
176 void dump() const;
177
178private:
179 uint64_t NumSamples = 0;
180 CallTargetMap CallTargets;
181};
182
183raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
184
185class FunctionSamples;
186
187using BodySampleMap = std::map<LineLocation, SampleRecord>;
188// NOTE: Using a StringMap here makes parsed profiles consume around 17% more
189// memory, which is *very* significant for large profiles.
190using FunctionSamplesMap = std::map<std::string, FunctionSamples>;
191using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>;
192
193/// Representation of the samples collected for a function.
194///
195/// This data structure contains all the collected samples for the body
196/// of a function. Each sample corresponds to a LineLocation instance
197/// within the body of the function.
198class FunctionSamples {
199public:
200 FunctionSamples() = default;
201
202 void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const;
203 void dump() const;
204
205 sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
206 bool Overflowed;
207 TotalSamples =
208 SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed);
209 return Overflowed ? sampleprof_error::counter_overflow
210 : sampleprof_error::success;
211 }
212
213 sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
214 bool Overflowed;
215 TotalHeadSamples =
216 SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed);
217 return Overflowed ? sampleprof_error::counter_overflow
218 : sampleprof_error::success;
219 }
220
221 sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
222 uint64_t Num, uint64_t Weight = 1) {
223 return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(
224 Num, Weight);
225 }
226
227 sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
228 uint32_t Discriminator,
229 StringRef FName, uint64_t Num,
230 uint64_t Weight = 1) {
231 return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
232 FName, Num, Weight);
233 }
234
235 /// Return the number of samples collected at the given location.
236 /// Each location is specified by \p LineOffset and \p Discriminator.
237 /// If the location is not found in profile, return error.
238 ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
239 uint32_t Discriminator) const {
240 const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
241 if (ret == BodySamples.end())
242 return std::error_code();
243 else
244 return ret->second.getSamples();
245 }
246
247 /// Returns the call target map collected at a given location.
248 /// Each location is specified by \p LineOffset and \p Discriminator.
249 /// If the location is not found in profile, return error.
250 ErrorOr<SampleRecord::CallTargetMap>
251 findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const {
252 const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
253 if (ret == BodySamples.end())
254 return std::error_code();
255 return ret->second.getCallTargets();
256 }
257
258 /// Return the function samples at the given callsite location.
259 FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
260 return CallsiteSamples[Loc];
261 }
262
263 /// Returns the FunctionSamplesMap at the given \p Loc.
264 const FunctionSamplesMap *
265 findFunctionSamplesMapAt(const LineLocation &Loc) const {
266 auto iter = CallsiteSamples.find(Loc);
267 if (iter == CallsiteSamples.end())
268 return nullptr;
269 return &iter->second;
270 }
271
272 /// Returns a pointer to FunctionSamples at the given callsite location \p Loc
273 /// with callee \p CalleeName. If no callsite can be found, relax the
274 /// restriction to return the FunctionSamples at callsite location \p Loc
275 /// with the maximum total sample count.
276 const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc,
277 StringRef CalleeName) const {
278 auto iter = CallsiteSamples.find(Loc);
279 if (iter == CallsiteSamples.end())
280 return nullptr;
281 auto FS = iter->second.find(CalleeName);
282 if (FS != iter->second.end())
283 return &FS->second;
284 // If we cannot find exact match of the callee name, return the FS with
285 // the max total count.
286 uint64_t MaxTotalSamples = 0;
287 const FunctionSamples *R = nullptr;
288 for (const auto &NameFS : iter->second)
289 if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
290 MaxTotalSamples = NameFS.second.getTotalSamples();
291 R = &NameFS.second;
292 }
293 return R;
294 }
295
296 bool empty() const { return TotalSamples == 0; }
297
298 /// Return the total number of samples collected inside the function.
299 uint64_t getTotalSamples() const { return TotalSamples; }
300
301 /// Return the total number of branch samples that have the function as the
302 /// branch target. This should be equivalent to the sample of the first
303 /// instruction of the symbol. But as we directly get this info for raw
304 /// profile without referring to potentially inaccurate debug info, this
305 /// gives more accurate profile data and is preferred for standalone symbols.
306 uint64_t getHeadSamples() const { return TotalHeadSamples; }
307
308 /// Return the sample count of the first instruction of the function.
309 /// The function can be either a standalone symbol or an inlined function.
310 uint64_t getEntrySamples() const {
311 // Use either BodySamples or CallsiteSamples which ever has the smaller
312 // lineno.
313 if (!BodySamples.empty() &&
314 (CallsiteSamples.empty() ||
315 BodySamples.begin()->first < CallsiteSamples.begin()->first))
316 return BodySamples.begin()->second.getSamples();
317 if (!CallsiteSamples.empty()) {
318 uint64_t T = 0;
319 // An indirect callsite may be promoted to several inlined direct calls.
320 // We need to get the sum of them.
321 for (const auto &N_FS : CallsiteSamples.begin()->second)
322 T += N_FS.second.getEntrySamples();
323 return T;
324 }
325 return 0;
326 }
327
328 /// Return all the samples collected in the body of the function.
329 const BodySampleMap &getBodySamples() const { return BodySamples; }
330
331 /// Return all the callsite samples collected in the body of the function.
332 const CallsiteSampleMap &getCallsiteSamples() const {
333 return CallsiteSamples;
334 }
335
336 /// Merge the samples in \p Other into this one.
337 /// Optionally scale samples by \p Weight.
338 sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
339 sampleprof_error Result = sampleprof_error::success;
340 Name = Other.getName();
341 MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight));
342 MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight));
343 for (const auto &I : Other.getBodySamples()) {
344 const LineLocation &Loc = I.first;
345 const SampleRecord &Rec = I.second;
346 MergeResult(Result, BodySamples[Loc].merge(Rec, Weight));
347 }
348 for (const auto &I : Other.getCallsiteSamples()) {
349 const LineLocation &Loc = I.first;
350 FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
351 for (const auto &Rec : I.second)
352 MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight));
353 }
354 return Result;
355 }
356
357 /// Recursively traverses all children, if the total sample count of the
358 /// corresponding function is no less than \p Threshold, add its corresponding
359 /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
360 /// to \p S.
361 void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
362 uint64_t Threshold) const {
363 if (TotalSamples <= Threshold)
364 return;
365 S.insert(Function::getGUID(Name));
366 // Import hot CallTargets, which may not be available in IR because full
367 // profile annotation cannot be done until backend compilation in ThinLTO.
368 for (const auto &BS : BodySamples)
369 for (const auto &TS : BS.second.getCallTargets())
370 if (TS.getValue() > Threshold) {
371 Function *Callee = M->getFunction(TS.getKey());
372 if (!Callee || !Callee->getSubprogram())
373 S.insert(Function::getGUID(TS.getKey()));
374 }
375 for (const auto &CS : CallsiteSamples)
376 for (const auto &NameFS : CS.second)
377 NameFS.second.findInlinedFunctions(S, M, Threshold);
378 }
379
380 /// Set the name of the function.
381 void setName(StringRef FunctionName) { Name = FunctionName; }
382
383 /// Return the function name.
384 const StringRef &getName() const { return Name; }
385
386 /// Returns the line offset to the start line of the subprogram.
387 /// We assume that a single function will not exceed 65535 LOC.
388 static unsigned getOffset(const DILocation *DIL);
389
390 /// \brief Get the FunctionSamples of the inline instance where DIL originates
391 /// from.
392 ///
393 /// The FunctionSamples of the instruction (Machine or IR) associated to
394 /// \p DIL is the inlined instance in which that instruction is coming from.
395 /// We traverse the inline stack of that instruction, and match it with the
396 /// tree nodes in the profile.
397 ///
398 /// \returns the FunctionSamples pointer to the inlined instance.
399 const FunctionSamples *findFunctionSamples(const DILocation *DIL) const;
400
401private:
402 /// Mangled name of the function.
403 StringRef Name;
404
405 /// Total number of samples collected inside this function.
406 ///
407 /// Samples are cumulative, they include all the samples collected
408 /// inside this function and all its inlined callees.
409 uint64_t TotalSamples = 0;
410
411 /// Total number of samples collected at the head of the function.
412 /// This is an approximation of the number of calls made to this function
413 /// at runtime.
414 uint64_t TotalHeadSamples = 0;
415
416 /// Map instruction locations to collected samples.
417 ///
418 /// Each entry in this map contains the number of samples
419 /// collected at the corresponding line offset. All line locations
420 /// are an offset from the start of the function.
421 BodySampleMap BodySamples;
422
423 /// Map call sites to collected samples for the called function.
424 ///
425 /// Each entry in this map corresponds to all the samples
426 /// collected for the inlined function call at the given
427 /// location. For example, given:
428 ///
429 /// void foo() {
430 /// 1 bar();
431 /// ...
432 /// 8 baz();
433 /// }
434 ///
435 /// If the bar() and baz() calls were inlined inside foo(), this
436 /// map will contain two entries. One for all the samples collected
437 /// in the call to bar() at line offset 1, the other for all the samples
438 /// collected in the call to baz() at line offset 8.
439 CallsiteSampleMap CallsiteSamples;
440};
441
442raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
443
444/// Sort a LocationT->SampleT map by LocationT.
445///
446/// It produces a sorted list of <LocationT, SampleT> records by ascending
447/// order of LocationT.
448template <class LocationT, class SampleT> class SampleSorter {
449public:
450 using SamplesWithLoc = std::pair<const LocationT, SampleT>;
451 using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>;
452
453 SampleSorter(const std::map<LocationT, SampleT> &Samples) {
454 for (const auto &I : Samples)
455 V.push_back(&I);
456 std::stable_sort(V.begin(), V.end(),
457 [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
458 return A->first < B->first;
459 });
460 }
461
462 const SamplesWithLocList &get() const { return V; }
463
464private:
465 SamplesWithLocList V;
466};
467
468} // end namespace sampleprof
469} // end namespace llvm
470
471#endif // LLVM_PROFILEDATA_SAMPLEPROF_H