Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1 | //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H |
| 11 | #define LLVM_SUPPORT_BINARYSTREAMARRAY_H |
| 12 | |
| 13 | #include "llvm/ADT/ArrayRef.h" |
| 14 | #include "llvm/ADT/iterator.h" |
| 15 | #include "llvm/Support/BinaryStreamRef.h" |
| 16 | #include "llvm/Support/Error.h" |
| 17 | #include <cassert> |
| 18 | #include <cstdint> |
| 19 | |
| 20 | /// Lightweight arrays that are backed by an arbitrary BinaryStream. This file |
| 21 | /// provides two different array implementations. |
| 22 | /// |
| 23 | /// VarStreamArray - Arrays of variable length records. The user specifies |
| 24 | /// an Extractor type that can extract a record from a given offset and |
| 25 | /// return the number of bytes consumed by the record. |
| 26 | /// |
| 27 | /// FixedStreamArray - Arrays of fixed length records. This is similar in |
| 28 | /// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the |
| 29 | /// elements of the array need not be laid out in contiguous memory. |
| 30 | namespace llvm { |
| 31 | |
| 32 | /// VarStreamArrayExtractor is intended to be specialized to provide customized |
| 33 | /// extraction logic. On input it receives a BinaryStreamRef pointing to the |
| 34 | /// beginning of the next record, but where the length of the record is not yet |
| 35 | /// known. Upon completion, it should return an appropriate Error instance if |
| 36 | /// a record could not be extracted, or if one could be extracted it should |
| 37 | /// return success and set Len to the number of bytes this record occupied in |
| 38 | /// the underlying stream, and it should fill out the fields of the value type |
| 39 | /// Item appropriately to represent the current record. |
| 40 | /// |
| 41 | /// You can specialize this template for your own custom value types to avoid |
| 42 | /// having to specify a second template argument to VarStreamArray (documented |
| 43 | /// below). |
| 44 | template <typename T> struct VarStreamArrayExtractor { |
| 45 | // Method intentionally deleted. You must provide an explicit specialization |
| 46 | // with the following method implemented. |
| 47 | Error operator()(BinaryStreamRef Stream, uint32_t &Len, |
| 48 | T &Item) const = delete; |
| 49 | }; |
| 50 | |
| 51 | /// VarStreamArray represents an array of variable length records backed by a |
| 52 | /// stream. This could be a contiguous sequence of bytes in memory, it could |
| 53 | /// be a file on disk, or it could be a PDB stream where bytes are stored as |
| 54 | /// discontiguous blocks in a file. Usually it is desirable to treat arrays |
| 55 | /// as contiguous blocks of memory, but doing so with large PDB files, for |
| 56 | /// example, could mean allocating huge amounts of memory just to allow |
| 57 | /// re-ordering of stream data to be contiguous before iterating over it. By |
| 58 | /// abstracting this out, we need not duplicate this memory, and we can |
| 59 | /// iterate over arrays in arbitrarily formatted streams. Elements are parsed |
| 60 | /// lazily on iteration, so there is no upfront cost associated with building |
| 61 | /// or copying a VarStreamArray, no matter how large it may be. |
| 62 | /// |
| 63 | /// You create a VarStreamArray by specifying a ValueType and an Extractor type. |
| 64 | /// If you do not specify an Extractor type, you are expected to specialize |
| 65 | /// VarStreamArrayExtractor<T> for your ValueType. |
| 66 | /// |
| 67 | /// By default an Extractor is default constructed in the class, but in some |
| 68 | /// cases you might find it useful for an Extractor to maintain state across |
| 69 | /// extractions. In this case you can provide your own Extractor through a |
| 70 | /// secondary constructor. The following examples show various ways of |
| 71 | /// creating a VarStreamArray. |
| 72 | /// |
| 73 | /// // Will use VarStreamArrayExtractor<MyType> as the extractor. |
| 74 | /// VarStreamArray<MyType> MyTypeArray; |
| 75 | /// |
| 76 | /// // Will use a default-constructed MyExtractor as the extractor. |
| 77 | /// VarStreamArray<MyType, MyExtractor> MyTypeArray2; |
| 78 | /// |
| 79 | /// // Will use the specific instance of MyExtractor provided. |
| 80 | /// // MyExtractor need not be default-constructible in this case. |
| 81 | /// MyExtractor E(SomeContext); |
| 82 | /// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E); |
| 83 | /// |
| 84 | |
| 85 | template <typename ValueType, typename Extractor> class VarStreamArrayIterator; |
| 86 | |
| 87 | template <typename ValueType, |
| 88 | typename Extractor = VarStreamArrayExtractor<ValueType>> |
| 89 | class VarStreamArray { |
| 90 | friend class VarStreamArrayIterator<ValueType, Extractor>; |
| 91 | |
| 92 | public: |
| 93 | typedef VarStreamArrayIterator<ValueType, Extractor> Iterator; |
| 94 | |
| 95 | VarStreamArray() = default; |
| 96 | |
| 97 | explicit VarStreamArray(const Extractor &E) : E(E) {} |
| 98 | |
| 99 | explicit VarStreamArray(BinaryStreamRef Stream) : Stream(Stream) {} |
| 100 | |
| 101 | VarStreamArray(BinaryStreamRef Stream, const Extractor &E) |
| 102 | : Stream(Stream), E(E) {} |
| 103 | |
| 104 | Iterator begin(bool *HadError = nullptr) const { |
| 105 | return Iterator(*this, E, HadError); |
| 106 | } |
| 107 | |
| 108 | bool valid() const { return Stream.valid(); } |
| 109 | |
| 110 | Iterator end() const { return Iterator(E); } |
| 111 | |
| 112 | bool empty() const { return Stream.getLength() == 0; } |
| 113 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame^] | 114 | /// given an offset into the array's underlying stream, return an |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 115 | /// iterator to the record at that offset. This is considered unsafe |
| 116 | /// since the behavior is undefined if \p Offset does not refer to the |
| 117 | /// beginning of a valid record. |
| 118 | Iterator at(uint32_t Offset) const { |
| 119 | return Iterator(*this, E, Offset, nullptr); |
| 120 | } |
| 121 | |
| 122 | const Extractor &getExtractor() const { return E; } |
| 123 | Extractor &getExtractor() { return E; } |
| 124 | |
| 125 | BinaryStreamRef getUnderlyingStream() const { return Stream; } |
| 126 | void setUnderlyingStream(BinaryStreamRef S) { Stream = S; } |
| 127 | |
| 128 | private: |
| 129 | BinaryStreamRef Stream; |
| 130 | Extractor E; |
| 131 | }; |
| 132 | |
| 133 | template <typename ValueType, typename Extractor> |
| 134 | class VarStreamArrayIterator |
| 135 | : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>, |
| 136 | std::forward_iterator_tag, ValueType> { |
| 137 | typedef VarStreamArrayIterator<ValueType, Extractor> IterType; |
| 138 | typedef VarStreamArray<ValueType, Extractor> ArrayType; |
| 139 | |
| 140 | public: |
| 141 | VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, |
| 142 | bool *HadError) |
| 143 | : VarStreamArrayIterator(Array, E, 0, HadError) {} |
| 144 | |
| 145 | VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, |
| 146 | uint32_t Offset, bool *HadError) |
| 147 | : IterRef(Array.Stream.drop_front(Offset)), Extract(E), |
| 148 | Array(&Array), AbsOffset(Offset), HadError(HadError) { |
| 149 | if (IterRef.getLength() == 0) |
| 150 | moveToEnd(); |
| 151 | else { |
| 152 | auto EC = Extract(IterRef, ThisLen, ThisValue); |
| 153 | if (EC) { |
| 154 | consumeError(std::move(EC)); |
| 155 | markError(); |
| 156 | } |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | VarStreamArrayIterator() = default; |
| 161 | explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} |
| 162 | ~VarStreamArrayIterator() = default; |
| 163 | |
| 164 | bool operator==(const IterType &R) const { |
| 165 | if (Array && R.Array) { |
| 166 | // Both have a valid array, make sure they're same. |
| 167 | assert(Array == R.Array); |
| 168 | return IterRef == R.IterRef; |
| 169 | } |
| 170 | |
| 171 | // Both iterators are at the end. |
| 172 | if (!Array && !R.Array) |
| 173 | return true; |
| 174 | |
| 175 | // One is not at the end and one is. |
| 176 | return false; |
| 177 | } |
| 178 | |
| 179 | const ValueType &operator*() const { |
| 180 | assert(Array && !HasError); |
| 181 | return ThisValue; |
| 182 | } |
| 183 | |
| 184 | ValueType &operator*() { |
| 185 | assert(Array && !HasError); |
| 186 | return ThisValue; |
| 187 | } |
| 188 | |
| 189 | IterType &operator+=(unsigned N) { |
| 190 | for (unsigned I = 0; I < N; ++I) { |
| 191 | // We are done with the current record, discard it so that we are |
| 192 | // positioned at the next record. |
| 193 | AbsOffset += ThisLen; |
| 194 | IterRef = IterRef.drop_front(ThisLen); |
| 195 | if (IterRef.getLength() == 0) { |
| 196 | // There is nothing after the current record, we must make this an end |
| 197 | // iterator. |
| 198 | moveToEnd(); |
| 199 | } else { |
| 200 | // There is some data after the current record. |
| 201 | auto EC = Extract(IterRef, ThisLen, ThisValue); |
| 202 | if (EC) { |
| 203 | consumeError(std::move(EC)); |
| 204 | markError(); |
| 205 | } else if (ThisLen == 0) { |
| 206 | // An empty record? Make this an end iterator. |
| 207 | moveToEnd(); |
| 208 | } |
| 209 | } |
| 210 | } |
| 211 | return *this; |
| 212 | } |
| 213 | |
| 214 | uint32_t offset() const { return AbsOffset; } |
| 215 | uint32_t getRecordLength() const { return ThisLen; } |
| 216 | |
| 217 | private: |
| 218 | void moveToEnd() { |
| 219 | Array = nullptr; |
| 220 | ThisLen = 0; |
| 221 | } |
| 222 | void markError() { |
| 223 | moveToEnd(); |
| 224 | HasError = true; |
| 225 | if (HadError != nullptr) |
| 226 | *HadError = true; |
| 227 | } |
| 228 | |
| 229 | ValueType ThisValue; |
| 230 | BinaryStreamRef IterRef; |
| 231 | Extractor Extract; |
| 232 | const ArrayType *Array{nullptr}; |
| 233 | uint32_t ThisLen{0}; |
| 234 | uint32_t AbsOffset{0}; |
| 235 | bool HasError{false}; |
| 236 | bool *HadError{nullptr}; |
| 237 | }; |
| 238 | |
| 239 | template <typename T> class FixedStreamArrayIterator; |
| 240 | |
| 241 | /// FixedStreamArray is similar to VarStreamArray, except with each record |
| 242 | /// having a fixed-length. As with VarStreamArray, there is no upfront |
| 243 | /// cost associated with building or copying a FixedStreamArray, as the |
| 244 | /// memory for each element is not read from the backing stream until that |
| 245 | /// element is iterated. |
| 246 | template <typename T> class FixedStreamArray { |
| 247 | friend class FixedStreamArrayIterator<T>; |
| 248 | |
| 249 | public: |
| 250 | typedef FixedStreamArrayIterator<T> Iterator; |
| 251 | |
| 252 | FixedStreamArray() = default; |
| 253 | explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) { |
| 254 | assert(Stream.getLength() % sizeof(T) == 0); |
| 255 | } |
| 256 | |
| 257 | bool operator==(const FixedStreamArray<T> &Other) const { |
| 258 | return Stream == Other.Stream; |
| 259 | } |
| 260 | |
| 261 | bool operator!=(const FixedStreamArray<T> &Other) const { |
| 262 | return !(*this == Other); |
| 263 | } |
| 264 | |
| 265 | FixedStreamArray &operator=(const FixedStreamArray &) = default; |
| 266 | |
| 267 | const T &operator[](uint32_t Index) const { |
| 268 | assert(Index < size()); |
| 269 | uint32_t Off = Index * sizeof(T); |
| 270 | ArrayRef<uint8_t> Data; |
| 271 | if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) { |
| 272 | assert(false && "Unexpected failure reading from stream"); |
| 273 | // This should never happen since we asserted that the stream length was |
| 274 | // an exact multiple of the element size. |
| 275 | consumeError(std::move(EC)); |
| 276 | } |
| 277 | assert(llvm::alignmentAdjustment(Data.data(), alignof(T)) == 0); |
| 278 | return *reinterpret_cast<const T *>(Data.data()); |
| 279 | } |
| 280 | |
| 281 | uint32_t size() const { return Stream.getLength() / sizeof(T); } |
| 282 | |
| 283 | bool empty() const { return size() == 0; } |
| 284 | |
| 285 | FixedStreamArrayIterator<T> begin() const { |
| 286 | return FixedStreamArrayIterator<T>(*this, 0); |
| 287 | } |
| 288 | |
| 289 | FixedStreamArrayIterator<T> end() const { |
| 290 | return FixedStreamArrayIterator<T>(*this, size()); |
| 291 | } |
| 292 | |
| 293 | const T &front() const { return *begin(); } |
| 294 | const T &back() const { |
| 295 | FixedStreamArrayIterator<T> I = end(); |
| 296 | return *(--I); |
| 297 | } |
| 298 | |
| 299 | BinaryStreamRef getUnderlyingStream() const { return Stream; } |
| 300 | |
| 301 | private: |
| 302 | BinaryStreamRef Stream; |
| 303 | }; |
| 304 | |
| 305 | template <typename T> |
| 306 | class FixedStreamArrayIterator |
| 307 | : public iterator_facade_base<FixedStreamArrayIterator<T>, |
| 308 | std::random_access_iterator_tag, const T> { |
| 309 | |
| 310 | public: |
| 311 | FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index) |
| 312 | : Array(Array), Index(Index) {} |
| 313 | |
| 314 | FixedStreamArrayIterator<T> & |
| 315 | operator=(const FixedStreamArrayIterator<T> &Other) { |
| 316 | Array = Other.Array; |
| 317 | Index = Other.Index; |
| 318 | return *this; |
| 319 | } |
| 320 | |
| 321 | const T &operator*() const { return Array[Index]; } |
| 322 | const T &operator*() { return Array[Index]; } |
| 323 | |
| 324 | bool operator==(const FixedStreamArrayIterator<T> &R) const { |
| 325 | assert(Array == R.Array); |
| 326 | return (Index == R.Index) && (Array == R.Array); |
| 327 | } |
| 328 | |
| 329 | FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) { |
| 330 | Index += N; |
| 331 | return *this; |
| 332 | } |
| 333 | |
| 334 | FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) { |
| 335 | assert(std::ptrdiff_t(Index) >= N); |
| 336 | Index -= N; |
| 337 | return *this; |
| 338 | } |
| 339 | |
| 340 | std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const { |
| 341 | assert(Array == R.Array); |
| 342 | assert(Index >= R.Index); |
| 343 | return Index - R.Index; |
| 344 | } |
| 345 | |
| 346 | bool operator<(const FixedStreamArrayIterator<T> &RHS) const { |
| 347 | assert(Array == RHS.Array); |
| 348 | return Index < RHS.Index; |
| 349 | } |
| 350 | |
| 351 | private: |
| 352 | FixedStreamArray<T> Array; |
| 353 | uint32_t Index; |
| 354 | }; |
| 355 | |
| 356 | } // namespace llvm |
| 357 | |
| 358 | #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H |