Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1 | //===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===// |
| 2 | // |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_DEBUGINFO_DWARFDEBUGLINE_H |
| 10 | #define LLVM_DEBUGINFO_DWARFDEBUGLINE_H |
| 11 | |
| 12 | #include "llvm/ADT/Optional.h" |
| 13 | #include "llvm/ADT/StringRef.h" |
| 14 | #include "llvm/DebugInfo/DIContext.h" |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 15 | #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 16 | #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" |
| 17 | #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" |
| 18 | #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 19 | #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 20 | #include "llvm/Support/MD5.h" |
| 21 | #include <cstdint> |
| 22 | #include <map> |
| 23 | #include <string> |
| 24 | #include <vector> |
| 25 | |
| 26 | namespace llvm { |
| 27 | |
| 28 | class DWARFUnit; |
| 29 | class raw_ostream; |
| 30 | |
| 31 | class DWARFDebugLine { |
| 32 | public: |
| 33 | struct FileNameEntry { |
| 34 | FileNameEntry() = default; |
| 35 | |
| 36 | DWARFFormValue Name; |
| 37 | uint64_t DirIdx = 0; |
| 38 | uint64_t ModTime = 0; |
| 39 | uint64_t Length = 0; |
| 40 | MD5::MD5Result Checksum; |
| 41 | DWARFFormValue Source; |
| 42 | }; |
| 43 | |
| 44 | /// Tracks which optional content types are present in a DWARF file name |
| 45 | /// entry format. |
| 46 | struct ContentTypeTracker { |
| 47 | ContentTypeTracker() = default; |
| 48 | |
| 49 | /// Whether filename entries provide a modification timestamp. |
| 50 | bool HasModTime = false; |
| 51 | /// Whether filename entries provide a file size. |
| 52 | bool HasLength = false; |
| 53 | /// For v5, whether filename entries provide an MD5 checksum. |
| 54 | bool HasMD5 = false; |
| 55 | /// For v5, whether filename entries provide source text. |
| 56 | bool HasSource = false; |
| 57 | |
| 58 | /// Update tracked content types with \p ContentType. |
| 59 | void trackContentType(dwarf::LineNumberEntryFormat ContentType); |
| 60 | }; |
| 61 | |
| 62 | struct Prologue { |
| 63 | Prologue(); |
| 64 | |
| 65 | /// The size in bytes of the statement information for this compilation unit |
| 66 | /// (not including the total_length field itself). |
| 67 | uint64_t TotalLength; |
| 68 | /// Version, address size (starting in v5), and DWARF32/64 format; these |
| 69 | /// parameters affect interpretation of forms (used in the directory and |
| 70 | /// file tables starting with v5). |
| 71 | dwarf::FormParams FormParams; |
| 72 | /// The number of bytes following the prologue_length field to the beginning |
| 73 | /// of the first byte of the statement program itself. |
| 74 | uint64_t PrologueLength; |
| 75 | /// In v5, size in bytes of a segment selector. |
| 76 | uint8_t SegSelectorSize; |
| 77 | /// The size in bytes of the smallest target machine instruction. Statement |
| 78 | /// program opcodes that alter the address register first multiply their |
| 79 | /// operands by this value. |
| 80 | uint8_t MinInstLength; |
| 81 | /// The maximum number of individual operations that may be encoded in an |
| 82 | /// instruction. |
| 83 | uint8_t MaxOpsPerInst; |
| 84 | /// The initial value of theis_stmtregister. |
| 85 | uint8_t DefaultIsStmt; |
| 86 | /// This parameter affects the meaning of the special opcodes. See below. |
| 87 | int8_t LineBase; |
| 88 | /// This parameter affects the meaning of the special opcodes. See below. |
| 89 | uint8_t LineRange; |
| 90 | /// The number assigned to the first special opcode. |
| 91 | uint8_t OpcodeBase; |
| 92 | /// This tracks which optional file format content types are present. |
| 93 | ContentTypeTracker ContentTypes; |
| 94 | std::vector<uint8_t> StandardOpcodeLengths; |
| 95 | std::vector<DWARFFormValue> IncludeDirectories; |
| 96 | std::vector<FileNameEntry> FileNames; |
| 97 | |
| 98 | const dwarf::FormParams getFormParams() const { return FormParams; } |
| 99 | uint16_t getVersion() const { return FormParams.Version; } |
| 100 | uint8_t getAddressSize() const { return FormParams.AddrSize; } |
| 101 | bool isDWARF64() const { return FormParams.Format == dwarf::DWARF64; } |
| 102 | |
| 103 | uint32_t sizeofTotalLength() const { return isDWARF64() ? 12 : 4; } |
| 104 | |
| 105 | uint32_t sizeofPrologueLength() const { return isDWARF64() ? 8 : 4; } |
| 106 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 107 | bool totalLengthIsValid() const; |
| 108 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 109 | /// Length of the prologue in bytes. |
| 110 | uint32_t getLength() const { |
| 111 | return PrologueLength + sizeofTotalLength() + sizeof(getVersion()) + |
| 112 | sizeofPrologueLength(); |
| 113 | } |
| 114 | |
| 115 | /// Length of the line table data in bytes (not including the prologue). |
| 116 | uint32_t getStatementTableLength() const { |
| 117 | return TotalLength + sizeofTotalLength() - getLength(); |
| 118 | } |
| 119 | |
| 120 | int32_t getMaxLineIncrementForSpecialOpcode() const { |
| 121 | return LineBase + (int8_t)LineRange - 1; |
| 122 | } |
| 123 | |
| 124 | void clear(); |
| 125 | void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 126 | Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, |
| 127 | const DWARFContext &Ctx, const DWARFUnit *U = nullptr); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 128 | }; |
| 129 | |
| 130 | /// Standard .debug_line state machine structure. |
| 131 | struct Row { |
| 132 | explicit Row(bool DefaultIsStmt = false); |
| 133 | |
| 134 | /// Called after a row is appended to the matrix. |
| 135 | void postAppend(); |
| 136 | void reset(bool DefaultIsStmt); |
| 137 | void dump(raw_ostream &OS) const; |
| 138 | |
| 139 | static void dumpTableHeader(raw_ostream &OS); |
| 140 | |
| 141 | static bool orderByAddress(const Row &LHS, const Row &RHS) { |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 142 | return std::tie(LHS.Address.SectionIndex, LHS.Address.Address) < |
| 143 | std::tie(RHS.Address.SectionIndex, RHS.Address.Address); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 144 | } |
| 145 | |
| 146 | /// The program-counter value corresponding to a machine instruction |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 147 | /// generated by the compiler and section index pointing to the section |
| 148 | /// containg this PC. If relocation information is present then section |
| 149 | /// index is the index of the section which contains above address. |
| 150 | /// Otherwise this is object::SectionedAddress::Undef value. |
| 151 | object::SectionedAddress Address; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 152 | /// An unsigned integer indicating a source line number. Lines are numbered |
| 153 | /// beginning at 1. The compiler may emit the value 0 in cases where an |
| 154 | /// instruction cannot be attributed to any source line. |
| 155 | uint32_t Line; |
| 156 | /// An unsigned integer indicating a column number within a source line. |
| 157 | /// Columns are numbered beginning at 1. The value 0 is reserved to indicate |
| 158 | /// that a statement begins at the 'left edge' of the line. |
| 159 | uint16_t Column; |
| 160 | /// An unsigned integer indicating the identity of the source file |
| 161 | /// corresponding to a machine instruction. |
| 162 | uint16_t File; |
| 163 | /// An unsigned integer representing the DWARF path discriminator value |
| 164 | /// for this location. |
| 165 | uint32_t Discriminator; |
| 166 | /// An unsigned integer whose value encodes the applicable instruction set |
| 167 | /// architecture for the current instruction. |
| 168 | uint8_t Isa; |
| 169 | /// A boolean indicating that the current instruction is the beginning of a |
| 170 | /// statement. |
| 171 | uint8_t IsStmt : 1, |
| 172 | /// A boolean indicating that the current instruction is the |
| 173 | /// beginning of a basic block. |
| 174 | BasicBlock : 1, |
| 175 | /// A boolean indicating that the current address is that of the |
| 176 | /// first byte after the end of a sequence of target machine |
| 177 | /// instructions. |
| 178 | EndSequence : 1, |
| 179 | /// A boolean indicating that the current address is one (of possibly |
| 180 | /// many) where execution should be suspended for an entry breakpoint |
| 181 | /// of a function. |
| 182 | PrologueEnd : 1, |
| 183 | /// A boolean indicating that the current address is one (of possibly |
| 184 | /// many) where execution should be suspended for an exit breakpoint |
| 185 | /// of a function. |
| 186 | EpilogueBegin : 1; |
| 187 | }; |
| 188 | |
| 189 | /// Represents a series of contiguous machine instructions. Line table for |
| 190 | /// each compilation unit may consist of multiple sequences, which are not |
| 191 | /// guaranteed to be in the order of ascending instruction address. |
| 192 | struct Sequence { |
| 193 | Sequence(); |
| 194 | |
| 195 | /// Sequence describes instructions at address range [LowPC, HighPC) |
| 196 | /// and is described by line table rows [FirstRowIndex, LastRowIndex). |
| 197 | uint64_t LowPC; |
| 198 | uint64_t HighPC; |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 199 | /// If relocation information is present then this is the index of the |
| 200 | /// section which contains above addresses. Otherwise this is |
| 201 | /// object::SectionedAddress::Undef value. |
| 202 | uint64_t SectionIndex; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 203 | unsigned FirstRowIndex; |
| 204 | unsigned LastRowIndex; |
| 205 | bool Empty; |
| 206 | |
| 207 | void reset(); |
| 208 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 209 | static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) { |
| 210 | return std::tie(LHS.SectionIndex, LHS.HighPC) < |
| 211 | std::tie(RHS.SectionIndex, RHS.HighPC); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 212 | } |
| 213 | |
| 214 | bool isValid() const { |
| 215 | return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex); |
| 216 | } |
| 217 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 218 | bool containsPC(object::SectionedAddress PC) const { |
| 219 | return SectionIndex == PC.SectionIndex && |
| 220 | (LowPC <= PC.Address && PC.Address < HighPC); |
| 221 | } |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 222 | }; |
| 223 | |
| 224 | struct LineTable { |
| 225 | LineTable(); |
| 226 | |
| 227 | /// Represents an invalid row |
| 228 | const uint32_t UnknownRowIndex = UINT32_MAX; |
| 229 | |
| 230 | void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(R); } |
| 231 | |
| 232 | void appendSequence(const DWARFDebugLine::Sequence &S) { |
| 233 | Sequences.push_back(S); |
| 234 | } |
| 235 | |
| 236 | /// Returns the index of the row with file/line info for a given address, |
| 237 | /// or UnknownRowIndex if there is no such row. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 238 | uint32_t lookupAddress(object::SectionedAddress Address) const; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 239 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 240 | bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size, |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 241 | std::vector<uint32_t> &Result) const; |
| 242 | |
| 243 | bool hasFileAtIndex(uint64_t FileIndex) const; |
| 244 | |
| 245 | /// Extracts filename by its index in filename table in prologue. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 246 | /// In Dwarf 4, the files are 1-indexed and the current compilation file |
| 247 | /// name is not represented in the list. In DWARF v5, the files are |
| 248 | /// 0-indexed and the primary source file has the index 0. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 249 | /// Returns true on success. |
| 250 | bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir, |
| 251 | DILineInfoSpecifier::FileLineInfoKind Kind, |
| 252 | std::string &Result) const; |
| 253 | |
| 254 | /// Fills the Result argument with the file and line information |
| 255 | /// corresponding to Address. Returns true on success. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 256 | bool getFileLineInfoForAddress(object::SectionedAddress Address, |
| 257 | const char *CompDir, |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 258 | DILineInfoSpecifier::FileLineInfoKind Kind, |
| 259 | DILineInfo &Result) const; |
| 260 | |
| 261 | void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; |
| 262 | void clear(); |
| 263 | |
| 264 | /// Parse prologue and all rows. |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 265 | Error parse( |
| 266 | DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, |
| 267 | const DWARFContext &Ctx, const DWARFUnit *U, |
| 268 | std::function<void(Error)> RecoverableErrorCallback, |
| 269 | raw_ostream *OS = nullptr); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 270 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 271 | /// Get DWARF-version aware access to the file name entry at the provided |
| 272 | /// index. |
| 273 | const llvm::DWARFDebugLine::FileNameEntry & |
| 274 | getFileNameEntry(uint64_t Index) const; |
| 275 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 276 | using RowVector = std::vector<Row>; |
| 277 | using RowIter = RowVector::const_iterator; |
| 278 | using SequenceVector = std::vector<Sequence>; |
| 279 | using SequenceIter = SequenceVector::const_iterator; |
| 280 | |
| 281 | struct Prologue Prologue; |
| 282 | RowVector Rows; |
| 283 | SequenceVector Sequences; |
| 284 | |
| 285 | private: |
| 286 | uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq, |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 287 | object::SectionedAddress Address) const; |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 288 | Optional<StringRef> |
| 289 | getSourceByIndex(uint64_t FileIndex, |
| 290 | DILineInfoSpecifier::FileLineInfoKind Kind) const; |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 291 | |
| 292 | uint32_t lookupAddressImpl(object::SectionedAddress Address) const; |
| 293 | |
| 294 | bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size, |
| 295 | std::vector<uint32_t> &Result) const; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 296 | }; |
| 297 | |
| 298 | const LineTable *getLineTable(uint32_t Offset) const; |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 299 | Expected<const LineTable *> getOrParseLineTable( |
| 300 | DWARFDataExtractor &DebugLineData, uint32_t Offset, |
| 301 | const DWARFContext &Ctx, const DWARFUnit *U, |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 302 | std::function<void(Error)> RecoverableErrorCallback); |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 303 | |
| 304 | /// Helper to allow for parsing of an entire .debug_line section in sequence. |
| 305 | class SectionParser { |
| 306 | public: |
| 307 | using cu_range = DWARFUnitVector::iterator_range; |
| 308 | using tu_range = DWARFUnitVector::iterator_range; |
| 309 | using LineToUnitMap = std::map<uint64_t, DWARFUnit *>; |
| 310 | |
| 311 | SectionParser(DWARFDataExtractor &Data, const DWARFContext &C, cu_range CUs, |
| 312 | tu_range TUs); |
| 313 | |
| 314 | /// Get the next line table from the section. Report any issues via the |
| 315 | /// callbacks. |
| 316 | /// |
| 317 | /// \param RecoverableErrorCallback - any issues that don't prevent further |
| 318 | /// parsing of the table will be reported through this callback. |
| 319 | /// \param UnrecoverableErrorCallback - any issues that prevent further |
| 320 | /// parsing of the table will be reported through this callback. |
| 321 | /// \param OS - if not null, the parser will print information about the |
| 322 | /// table as it parses it. |
| 323 | LineTable |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 324 | parseNext( |
| 325 | function_ref<void(Error)> RecoverableErrorCallback, |
| 326 | function_ref<void(Error)> UnrecoverableErrorCallback, |
| 327 | raw_ostream *OS = nullptr); |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 328 | |
| 329 | /// Skip the current line table and go to the following line table (if |
| 330 | /// present) immediately. |
| 331 | /// |
| 332 | /// \param ErrorCallback - report any prologue parsing issues via this |
| 333 | /// callback. |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 334 | void skip(function_ref<void(Error)> ErrorCallback); |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 335 | |
| 336 | /// Indicates if the parser has parsed as much as possible. |
| 337 | /// |
| 338 | /// \note Certain problems with the line table structure might mean that |
| 339 | /// parsing stops before the end of the section is reached. |
| 340 | bool done() const { return Done; } |
| 341 | |
| 342 | /// Get the offset the parser has reached. |
| 343 | uint32_t getOffset() const { return Offset; } |
| 344 | |
| 345 | private: |
| 346 | DWARFUnit *prepareToParse(uint32_t Offset); |
| 347 | void moveToNextTable(uint32_t OldOffset, const Prologue &P); |
| 348 | |
| 349 | LineToUnitMap LineToUnit; |
| 350 | |
| 351 | DWARFDataExtractor &DebugLineData; |
| 352 | const DWARFContext &Context; |
| 353 | uint32_t Offset = 0; |
| 354 | bool Done = false; |
| 355 | }; |
| 356 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 357 | private: |
| 358 | struct ParsingState { |
| 359 | ParsingState(struct LineTable *LT); |
| 360 | |
| 361 | void resetRowAndSequence(); |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame] | 362 | void appendRowToMatrix(); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 363 | |
| 364 | /// Line table we're currently parsing. |
| 365 | struct LineTable *LineTable; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 366 | struct Row Row; |
| 367 | struct Sequence Sequence; |
| 368 | }; |
| 369 | |
| 370 | using LineTableMapTy = std::map<uint32_t, LineTable>; |
| 371 | using LineTableIter = LineTableMapTy::iterator; |
| 372 | using LineTableConstIter = LineTableMapTy::const_iterator; |
| 373 | |
| 374 | LineTableMapTy LineTableMap; |
| 375 | }; |
| 376 | |
| 377 | } // end namespace llvm |
| 378 | |
| 379 | #endif // LLVM_DEBUGINFO_DWARFDEBUGLINE_H |