Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 1 | //===- LineTable.h ----------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H |
| 10 | #define LLVM_DEBUGINFO_GSYM_LINETABLE_H |
| 11 | |
| 12 | #include "llvm/DebugInfo/GSYM/LineEntry.h" |
| 13 | #include "llvm/Support/Error.h" |
| 14 | #include <cstdint> |
| 15 | #include <vector> |
| 16 | |
| 17 | namespace llvm { |
| 18 | namespace gsym { |
| 19 | |
| 20 | struct FunctionInfo; |
| 21 | class FileWriter; |
| 22 | |
| 23 | /// LineTable class contains deserialized versions of line tables for each |
| 24 | /// function's address ranges. |
| 25 | /// |
| 26 | /// When saved to disk, the line table is encoded using a modified version of |
| 27 | /// the DWARF line tables that only tracks address to source file and line. |
| 28 | /// |
| 29 | /// ENCODING |
| 30 | /// |
| 31 | /// The line table starts with a small prolog that contains the following |
| 32 | /// values: |
| 33 | /// |
| 34 | /// ENCODING NAME DESCRIPTION |
| 35 | /// ======== =========== ==================================================== |
| 36 | /// SLEB MinDelta The min line delta for special opcodes that advance |
| 37 | /// the address and line number. |
| 38 | /// SLEB MaxDelta The max line delta for single byte opcodes that |
| 39 | /// advance the address and line number. |
| 40 | /// ULEB FirstLine The value of the first source line number to |
| 41 | /// initialize the LineEntry with. |
| 42 | /// |
| 43 | /// Once these prolog items are read, we initialize a LineEntry struct with |
| 44 | /// the start address of the function from the FunctionInfo's address range, |
| 45 | /// a default file index of 1, and the line number set to "FirstLine" from |
| 46 | /// the prolog above: |
| 47 | /// |
| 48 | /// LineEntry Row(BaseAddr, 1, FirstLine); |
| 49 | /// |
| 50 | /// The line table state machine is now initialized and ready to be parsed. |
| 51 | /// The stream that follows this encodes the line entries in a compact |
| 52 | /// form. Some opcodes cause "Row" to be modified and some opcodes may also |
| 53 | /// push "Row" onto the end of the "LineTable.Lines" vector. The end result |
| 54 | /// is a vector of LineEntry structs that is sorted in ascending address |
| 55 | /// order. |
| 56 | /// |
| 57 | /// NORMAL OPCODES |
| 58 | /// |
| 59 | /// The opcodes 0 through 3 are normal in opcodes. Their encoding and |
| 60 | /// descriptions are listed below: |
| 61 | /// |
| 62 | /// ENCODING ENUMERATION VALUE DESCRIPTION |
| 63 | /// ======== ================ ===== ======================================== |
| 64 | /// LTOC_EndSequence 0x00 Parsing is done. |
| 65 | /// ULEB LTOC_SetFile 0x01 Row.File = ULEB |
| 66 | /// ULEB LTOC_AdvancePC 0x02 Row.Addr += ULEB, push "Row". |
| 67 | /// SLEB LTOC_AdvanceLine 0x03 Row.Line += SLEB |
| 68 | /// LTOC_FirstSpecial 0x04 First special opcode (see SPECIAL |
| 69 | /// OPCODES below). |
| 70 | /// |
| 71 | /// SPECIAL OPCODES |
| 72 | /// |
| 73 | /// Opcodes LTOC_FirstSpecial through 255 are special opcodes that always |
| 74 | /// increment both the Row.Addr and Row.Line and push "Row" onto the |
| 75 | /// LineEntry.Lines array. They do this by using some of the bits to |
| 76 | /// increment/decrement the source line number, and some of the bits to |
| 77 | /// increment the address. Line numbers can go up or down when making line |
| 78 | /// tables, where addresses always only increase since line tables are sorted |
| 79 | /// by address. |
| 80 | /// |
| 81 | /// In order to calculate the amount to increment the line and address for |
| 82 | /// these special opcodes, we calculate the number of values reserved for the |
| 83 | /// line increment/decrement using the "MinDelta" and "MaxDelta" from the |
| 84 | /// prolog: |
| 85 | /// |
| 86 | /// const int64_t LineRange = MaxDelta - MinDelta + 1; |
| 87 | /// |
| 88 | /// Then we can adjust the opcode to not include any of the normal opcodes: |
| 89 | /// |
| 90 | /// const uint8_t AdjustedOp = Opcode - LTOC_FirstSpecial; |
| 91 | /// |
| 92 | /// And we can calculate the line offset, and address offset: |
| 93 | /// |
| 94 | /// const int64_t LineDelta = MinDelta + (AdjustedOp % LineRange); |
| 95 | /// const uint64_t AddrDelta = (AdjustedOp / LineRange); |
| 96 | /// |
| 97 | /// And use these to modify our "Row": |
| 98 | /// |
| 99 | /// Row.Line += LineDelta; |
| 100 | /// Row.Addr += AddrDelta; |
| 101 | /// |
| 102 | /// And push a row onto the line table: |
| 103 | /// |
| 104 | /// Lines.push_back(Row); |
| 105 | /// |
| 106 | /// This is verify similar to the way that DWARF encodes its line tables. The |
| 107 | /// only difference is the DWARF line tables have more normal opcodes and the |
| 108 | /// "Row" contains more members, like source column number, bools for end of |
| 109 | /// prologue, beginnging of epilogue, is statement and many others. There are |
| 110 | /// also more complex rules that happen for the extra normal opcodes. By |
| 111 | /// leaving these extra opcodes out, we leave more bits for the special |
| 112 | /// opcodes that allows us to encode line tables in fewer bytes than standard |
| 113 | /// DWARF encodings. |
| 114 | /// |
| 115 | /// Opcodes that will push "Row" onto the LineEntry.Lines include the |
| 116 | /// LTOC_AdvancePC opcode and all special opcodes. All other opcodes |
| 117 | /// only modify the current "Row", or cause the line table to end. |
| 118 | class LineTable { |
| 119 | typedef std::vector<gsym::LineEntry> Collection; |
| 120 | Collection Lines; ///< All line entries in the line table. |
| 121 | public: |
| 122 | /// Lookup a single address within a line table's data. |
| 123 | /// |
| 124 | /// Clients have the option to decode an entire line table using |
| 125 | /// LineTable::decode() or just find a single matching entry using this |
| 126 | /// function. The benefit of using this function is that parsed LineEntry |
| 127 | /// objects that do not match will not be stored in an array. This will avoid |
| 128 | /// memory allocation costs and parsing can stop once a match has been found. |
| 129 | /// |
| 130 | /// \param Data The binary stream to read the data from. This object must |
| 131 | /// have the data for the LineTable object starting at offset zero. The data |
| 132 | /// can contain more data than needed. |
| 133 | /// |
| 134 | /// \param BaseAddr The base address to use when decoding the line table. |
| 135 | /// This will be the FunctionInfo's start address and will be used to |
| 136 | /// initialize the line table row prior to parsing any opcodes. |
| 137 | /// |
| 138 | /// \returns An LineEntry object if a match is found, error otherwise. |
| 139 | static Expected<LineEntry> lookup(DataExtractor &Data, uint64_t BaseAddr, |
| 140 | uint64_t Addr); |
| 141 | |
| 142 | /// Decode an LineTable object from a binary data stream. |
| 143 | /// |
| 144 | /// \param Data The binary stream to read the data from. This object must |
| 145 | /// have the data for the LineTable object starting at offset zero. The data |
| 146 | /// can contain more data than needed. |
| 147 | /// |
| 148 | /// \param BaseAddr The base address to use when decoding the line table. |
| 149 | /// This will be the FunctionInfo's start address and will be used to |
| 150 | /// initialize the line table row prior to parsing any opcodes. |
| 151 | /// |
| 152 | /// \returns An LineTable or an error describing the issue that was |
| 153 | /// encountered during decoding. |
| 154 | static llvm::Expected<LineTable> decode(DataExtractor &Data, |
| 155 | uint64_t BaseAddr); |
| 156 | /// Encode this LineTable object into FileWriter stream. |
| 157 | /// |
| 158 | /// \param O The binary stream to write the data to at the current file |
| 159 | /// position. |
| 160 | /// |
| 161 | /// \param BaseAddr The base address to use when decoding the line table. |
| 162 | /// This will be the FunctionInfo's start address. |
| 163 | /// |
| 164 | /// \returns An error object that indicates success or failure or the |
| 165 | /// encoding process. |
| 166 | llvm::Error encode(FileWriter &O, uint64_t BaseAddr) const; |
| 167 | bool empty() const { return Lines.empty(); } |
| 168 | void clear() { Lines.clear(); } |
| 169 | /// Return the first line entry if the line table isn't empty. |
| 170 | /// |
| 171 | /// \returns An optional line entry with the first line entry if the line |
| 172 | /// table isn't empty, or llvm::None if the line table is emtpy. |
| 173 | Optional<LineEntry> first() const { |
| 174 | if (Lines.empty()) |
| 175 | return llvm::None; |
| 176 | return Lines.front(); |
| 177 | } |
| 178 | /// Return the last line entry if the line table isn't empty. |
| 179 | /// |
| 180 | /// \returns An optional line entry with the last line entry if the line |
| 181 | /// table isn't empty, or llvm::None if the line table is emtpy. |
| 182 | Optional<LineEntry> last() const { |
| 183 | if (Lines.empty()) |
| 184 | return llvm::None; |
| 185 | return Lines.back(); |
| 186 | } |
| 187 | void push(const LineEntry &LE) { |
| 188 | Lines.push_back(LE); |
| 189 | } |
| 190 | size_t isValid() const { |
| 191 | return !Lines.empty(); |
| 192 | } |
| 193 | size_t size() const { |
| 194 | return Lines.size(); |
| 195 | } |
| 196 | LineEntry &get(size_t i) { |
| 197 | assert(i < Lines.size()); |
| 198 | return Lines[i]; |
| 199 | } |
| 200 | const LineEntry &get(size_t i) const { |
| 201 | assert(i < Lines.size()); |
| 202 | return Lines[i]; |
| 203 | } |
| 204 | LineEntry &operator[](size_t i) { |
| 205 | return get(i); |
| 206 | } |
| 207 | const LineEntry &operator[](size_t i) const { |
| 208 | return get(i); |
| 209 | } |
| 210 | bool operator==(const LineTable &RHS) const { |
| 211 | return Lines == RHS.Lines; |
| 212 | } |
| 213 | bool operator!=(const LineTable &RHS) const { |
| 214 | return Lines != RHS.Lines; |
| 215 | } |
| 216 | bool operator<(const LineTable &RHS) const { |
| 217 | const auto LHSSize = Lines.size(); |
| 218 | const auto RHSSize = RHS.Lines.size(); |
| 219 | if (LHSSize == RHSSize) |
| 220 | return Lines < RHS.Lines; |
| 221 | return LHSSize < RHSSize; |
| 222 | } |
| 223 | Collection::const_iterator begin() const { return Lines.begin(); } |
| 224 | Collection::const_iterator end() const { return Lines.end(); } |
| 225 | |
| 226 | }; |
| 227 | |
| 228 | raw_ostream &operator<<(raw_ostream &OS, const gsym::LineTable <); |
| 229 | |
| 230 | } // namespace gsym |
| 231 | } // namespace llvm |
| 232 | |
| 233 | #endif // #ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H |