Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1 | //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===// |
| 2 | // |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H |
| 10 | #define LLVM_SUPPORT_BINARYSTREAMARRAY_H |
| 11 | |
| 12 | #include "llvm/ADT/ArrayRef.h" |
| 13 | #include "llvm/ADT/iterator.h" |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 14 | #include "llvm/Support/Alignment.h" |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 15 | #include "llvm/Support/BinaryStreamRef.h" |
| 16 | #include "llvm/Support/Error.h" |
| 17 | #include <cassert> |
| 18 | #include <cstdint> |
| 19 | |
| 20 | /// Lightweight arrays that are backed by an arbitrary BinaryStream. This file |
| 21 | /// provides two different array implementations. |
| 22 | /// |
| 23 | /// VarStreamArray - Arrays of variable length records. The user specifies |
| 24 | /// an Extractor type that can extract a record from a given offset and |
| 25 | /// return the number of bytes consumed by the record. |
| 26 | /// |
| 27 | /// FixedStreamArray - Arrays of fixed length records. This is similar in |
| 28 | /// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the |
| 29 | /// elements of the array need not be laid out in contiguous memory. |
| 30 | namespace llvm { |
| 31 | |
| 32 | /// VarStreamArrayExtractor is intended to be specialized to provide customized |
| 33 | /// extraction logic. On input it receives a BinaryStreamRef pointing to the |
| 34 | /// beginning of the next record, but where the length of the record is not yet |
| 35 | /// known. Upon completion, it should return an appropriate Error instance if |
| 36 | /// a record could not be extracted, or if one could be extracted it should |
| 37 | /// return success and set Len to the number of bytes this record occupied in |
| 38 | /// the underlying stream, and it should fill out the fields of the value type |
| 39 | /// Item appropriately to represent the current record. |
| 40 | /// |
| 41 | /// You can specialize this template for your own custom value types to avoid |
| 42 | /// having to specify a second template argument to VarStreamArray (documented |
| 43 | /// below). |
| 44 | template <typename T> struct VarStreamArrayExtractor { |
| 45 | // Method intentionally deleted. You must provide an explicit specialization |
| 46 | // with the following method implemented. |
| 47 | Error operator()(BinaryStreamRef Stream, uint32_t &Len, |
| 48 | T &Item) const = delete; |
| 49 | }; |
| 50 | |
| 51 | /// VarStreamArray represents an array of variable length records backed by a |
| 52 | /// stream. This could be a contiguous sequence of bytes in memory, it could |
| 53 | /// be a file on disk, or it could be a PDB stream where bytes are stored as |
| 54 | /// discontiguous blocks in a file. Usually it is desirable to treat arrays |
| 55 | /// as contiguous blocks of memory, but doing so with large PDB files, for |
| 56 | /// example, could mean allocating huge amounts of memory just to allow |
| 57 | /// re-ordering of stream data to be contiguous before iterating over it. By |
| 58 | /// abstracting this out, we need not duplicate this memory, and we can |
| 59 | /// iterate over arrays in arbitrarily formatted streams. Elements are parsed |
| 60 | /// lazily on iteration, so there is no upfront cost associated with building |
| 61 | /// or copying a VarStreamArray, no matter how large it may be. |
| 62 | /// |
| 63 | /// You create a VarStreamArray by specifying a ValueType and an Extractor type. |
| 64 | /// If you do not specify an Extractor type, you are expected to specialize |
| 65 | /// VarStreamArrayExtractor<T> for your ValueType. |
| 66 | /// |
| 67 | /// By default an Extractor is default constructed in the class, but in some |
| 68 | /// cases you might find it useful for an Extractor to maintain state across |
| 69 | /// extractions. In this case you can provide your own Extractor through a |
| 70 | /// secondary constructor. The following examples show various ways of |
| 71 | /// creating a VarStreamArray. |
| 72 | /// |
| 73 | /// // Will use VarStreamArrayExtractor<MyType> as the extractor. |
| 74 | /// VarStreamArray<MyType> MyTypeArray; |
| 75 | /// |
| 76 | /// // Will use a default-constructed MyExtractor as the extractor. |
| 77 | /// VarStreamArray<MyType, MyExtractor> MyTypeArray2; |
| 78 | /// |
| 79 | /// // Will use the specific instance of MyExtractor provided. |
| 80 | /// // MyExtractor need not be default-constructible in this case. |
| 81 | /// MyExtractor E(SomeContext); |
| 82 | /// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E); |
| 83 | /// |
| 84 | |
| 85 | template <typename ValueType, typename Extractor> class VarStreamArrayIterator; |
| 86 | |
| 87 | template <typename ValueType, |
| 88 | typename Extractor = VarStreamArrayExtractor<ValueType>> |
| 89 | class VarStreamArray { |
| 90 | friend class VarStreamArrayIterator<ValueType, Extractor>; |
| 91 | |
| 92 | public: |
| 93 | typedef VarStreamArrayIterator<ValueType, Extractor> Iterator; |
| 94 | |
| 95 | VarStreamArray() = default; |
| 96 | |
| 97 | explicit VarStreamArray(const Extractor &E) : E(E) {} |
| 98 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 99 | explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0) |
| 100 | : Stream(Stream), Skew(Skew) {} |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 101 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 102 | VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0) |
| 103 | : Stream(Stream), E(E), Skew(Skew) {} |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 104 | |
| 105 | Iterator begin(bool *HadError = nullptr) const { |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 106 | return Iterator(*this, E, Skew, nullptr); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 107 | } |
| 108 | |
| 109 | bool valid() const { return Stream.valid(); } |
| 110 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 111 | uint32_t skew() const { return Skew; } |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 112 | Iterator end() const { return Iterator(E); } |
| 113 | |
| 114 | bool empty() const { return Stream.getLength() == 0; } |
| 115 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 116 | VarStreamArray<ValueType, Extractor> substream(uint32_t Begin, |
| 117 | uint32_t End) const { |
| 118 | assert(Begin >= Skew); |
| 119 | // We should never cut off the beginning of the stream since it might be |
| 120 | // skewed, meaning the initial bytes are important. |
| 121 | BinaryStreamRef NewStream = Stream.slice(0, End); |
| 122 | return {NewStream, E, Begin}; |
| 123 | } |
| 124 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 125 | /// given an offset into the array's underlying stream, return an |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 126 | /// iterator to the record at that offset. This is considered unsafe |
| 127 | /// since the behavior is undefined if \p Offset does not refer to the |
| 128 | /// beginning of a valid record. |
| 129 | Iterator at(uint32_t Offset) const { |
| 130 | return Iterator(*this, E, Offset, nullptr); |
| 131 | } |
| 132 | |
| 133 | const Extractor &getExtractor() const { return E; } |
| 134 | Extractor &getExtractor() { return E; } |
| 135 | |
| 136 | BinaryStreamRef getUnderlyingStream() const { return Stream; } |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 137 | void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) { |
| 138 | Stream = NewStream; |
| 139 | Skew = NewSkew; |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 140 | } |
| 141 | |
| 142 | void drop_front() { Skew += begin()->length(); } |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 143 | |
| 144 | private: |
| 145 | BinaryStreamRef Stream; |
| 146 | Extractor E; |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 147 | uint32_t Skew = 0; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 148 | }; |
| 149 | |
| 150 | template <typename ValueType, typename Extractor> |
| 151 | class VarStreamArrayIterator |
| 152 | : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>, |
| 153 | std::forward_iterator_tag, ValueType> { |
| 154 | typedef VarStreamArrayIterator<ValueType, Extractor> IterType; |
| 155 | typedef VarStreamArray<ValueType, Extractor> ArrayType; |
| 156 | |
| 157 | public: |
| 158 | VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 159 | uint32_t Offset, bool *HadError) |
| 160 | : IterRef(Array.Stream.drop_front(Offset)), Extract(E), |
| 161 | Array(&Array), AbsOffset(Offset), HadError(HadError) { |
| 162 | if (IterRef.getLength() == 0) |
| 163 | moveToEnd(); |
| 164 | else { |
| 165 | auto EC = Extract(IterRef, ThisLen, ThisValue); |
| 166 | if (EC) { |
| 167 | consumeError(std::move(EC)); |
| 168 | markError(); |
| 169 | } |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | VarStreamArrayIterator() = default; |
| 174 | explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} |
| 175 | ~VarStreamArrayIterator() = default; |
| 176 | |
| 177 | bool operator==(const IterType &R) const { |
| 178 | if (Array && R.Array) { |
| 179 | // Both have a valid array, make sure they're same. |
| 180 | assert(Array == R.Array); |
| 181 | return IterRef == R.IterRef; |
| 182 | } |
| 183 | |
| 184 | // Both iterators are at the end. |
| 185 | if (!Array && !R.Array) |
| 186 | return true; |
| 187 | |
| 188 | // One is not at the end and one is. |
| 189 | return false; |
| 190 | } |
| 191 | |
| 192 | const ValueType &operator*() const { |
| 193 | assert(Array && !HasError); |
| 194 | return ThisValue; |
| 195 | } |
| 196 | |
| 197 | ValueType &operator*() { |
| 198 | assert(Array && !HasError); |
| 199 | return ThisValue; |
| 200 | } |
| 201 | |
| 202 | IterType &operator+=(unsigned N) { |
| 203 | for (unsigned I = 0; I < N; ++I) { |
| 204 | // We are done with the current record, discard it so that we are |
| 205 | // positioned at the next record. |
| 206 | AbsOffset += ThisLen; |
| 207 | IterRef = IterRef.drop_front(ThisLen); |
| 208 | if (IterRef.getLength() == 0) { |
| 209 | // There is nothing after the current record, we must make this an end |
| 210 | // iterator. |
| 211 | moveToEnd(); |
| 212 | } else { |
| 213 | // There is some data after the current record. |
| 214 | auto EC = Extract(IterRef, ThisLen, ThisValue); |
| 215 | if (EC) { |
| 216 | consumeError(std::move(EC)); |
| 217 | markError(); |
| 218 | } else if (ThisLen == 0) { |
| 219 | // An empty record? Make this an end iterator. |
| 220 | moveToEnd(); |
| 221 | } |
| 222 | } |
| 223 | } |
| 224 | return *this; |
| 225 | } |
| 226 | |
| 227 | uint32_t offset() const { return AbsOffset; } |
| 228 | uint32_t getRecordLength() const { return ThisLen; } |
| 229 | |
| 230 | private: |
| 231 | void moveToEnd() { |
| 232 | Array = nullptr; |
| 233 | ThisLen = 0; |
| 234 | } |
| 235 | void markError() { |
| 236 | moveToEnd(); |
| 237 | HasError = true; |
| 238 | if (HadError != nullptr) |
| 239 | *HadError = true; |
| 240 | } |
| 241 | |
| 242 | ValueType ThisValue; |
| 243 | BinaryStreamRef IterRef; |
| 244 | Extractor Extract; |
| 245 | const ArrayType *Array{nullptr}; |
| 246 | uint32_t ThisLen{0}; |
| 247 | uint32_t AbsOffset{0}; |
| 248 | bool HasError{false}; |
| 249 | bool *HadError{nullptr}; |
| 250 | }; |
| 251 | |
| 252 | template <typename T> class FixedStreamArrayIterator; |
| 253 | |
| 254 | /// FixedStreamArray is similar to VarStreamArray, except with each record |
| 255 | /// having a fixed-length. As with VarStreamArray, there is no upfront |
| 256 | /// cost associated with building or copying a FixedStreamArray, as the |
| 257 | /// memory for each element is not read from the backing stream until that |
| 258 | /// element is iterated. |
| 259 | template <typename T> class FixedStreamArray { |
| 260 | friend class FixedStreamArrayIterator<T>; |
| 261 | |
| 262 | public: |
| 263 | typedef FixedStreamArrayIterator<T> Iterator; |
| 264 | |
| 265 | FixedStreamArray() = default; |
| 266 | explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) { |
| 267 | assert(Stream.getLength() % sizeof(T) == 0); |
| 268 | } |
| 269 | |
| 270 | bool operator==(const FixedStreamArray<T> &Other) const { |
| 271 | return Stream == Other.Stream; |
| 272 | } |
| 273 | |
| 274 | bool operator!=(const FixedStreamArray<T> &Other) const { |
| 275 | return !(*this == Other); |
| 276 | } |
| 277 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 278 | FixedStreamArray(const FixedStreamArray &) = default; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 279 | FixedStreamArray &operator=(const FixedStreamArray &) = default; |
| 280 | |
| 281 | const T &operator[](uint32_t Index) const { |
| 282 | assert(Index < size()); |
| 283 | uint32_t Off = Index * sizeof(T); |
| 284 | ArrayRef<uint8_t> Data; |
| 285 | if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) { |
| 286 | assert(false && "Unexpected failure reading from stream"); |
| 287 | // This should never happen since we asserted that the stream length was |
| 288 | // an exact multiple of the element size. |
| 289 | consumeError(std::move(EC)); |
| 290 | } |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 291 | assert(isAddrAligned(Align::Of<T>(), Data.data())); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 292 | return *reinterpret_cast<const T *>(Data.data()); |
| 293 | } |
| 294 | |
| 295 | uint32_t size() const { return Stream.getLength() / sizeof(T); } |
| 296 | |
| 297 | bool empty() const { return size() == 0; } |
| 298 | |
| 299 | FixedStreamArrayIterator<T> begin() const { |
| 300 | return FixedStreamArrayIterator<T>(*this, 0); |
| 301 | } |
| 302 | |
| 303 | FixedStreamArrayIterator<T> end() const { |
| 304 | return FixedStreamArrayIterator<T>(*this, size()); |
| 305 | } |
| 306 | |
| 307 | const T &front() const { return *begin(); } |
| 308 | const T &back() const { |
| 309 | FixedStreamArrayIterator<T> I = end(); |
| 310 | return *(--I); |
| 311 | } |
| 312 | |
| 313 | BinaryStreamRef getUnderlyingStream() const { return Stream; } |
| 314 | |
| 315 | private: |
| 316 | BinaryStreamRef Stream; |
| 317 | }; |
| 318 | |
| 319 | template <typename T> |
| 320 | class FixedStreamArrayIterator |
| 321 | : public iterator_facade_base<FixedStreamArrayIterator<T>, |
| 322 | std::random_access_iterator_tag, const T> { |
| 323 | |
| 324 | public: |
| 325 | FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index) |
| 326 | : Array(Array), Index(Index) {} |
| 327 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 328 | FixedStreamArrayIterator<T>(const FixedStreamArrayIterator<T> &Other) |
| 329 | : Array(Other.Array), Index(Other.Index) {} |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 330 | FixedStreamArrayIterator<T> & |
| 331 | operator=(const FixedStreamArrayIterator<T> &Other) { |
| 332 | Array = Other.Array; |
| 333 | Index = Other.Index; |
| 334 | return *this; |
| 335 | } |
| 336 | |
| 337 | const T &operator*() const { return Array[Index]; } |
| 338 | const T &operator*() { return Array[Index]; } |
| 339 | |
| 340 | bool operator==(const FixedStreamArrayIterator<T> &R) const { |
| 341 | assert(Array == R.Array); |
| 342 | return (Index == R.Index) && (Array == R.Array); |
| 343 | } |
| 344 | |
| 345 | FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) { |
| 346 | Index += N; |
| 347 | return *this; |
| 348 | } |
| 349 | |
| 350 | FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) { |
| 351 | assert(std::ptrdiff_t(Index) >= N); |
| 352 | Index -= N; |
| 353 | return *this; |
| 354 | } |
| 355 | |
| 356 | std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const { |
| 357 | assert(Array == R.Array); |
| 358 | assert(Index >= R.Index); |
| 359 | return Index - R.Index; |
| 360 | } |
| 361 | |
| 362 | bool operator<(const FixedStreamArrayIterator<T> &RHS) const { |
| 363 | assert(Array == RHS.Array); |
| 364 | return Index < RHS.Index; |
| 365 | } |
| 366 | |
| 367 | private: |
| 368 | FixedStreamArray<T> Array; |
| 369 | uint32_t Index; |
| 370 | }; |
| 371 | |
| 372 | } // namespace llvm |
| 373 | |
| 374 | #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H |