blob: 96d09db69ae53685b3d31b9c24ff660f033e0a82 [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===//
2//
Andrew Walbran16937d02019-10-22 13:54:20 +01003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01006//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H
10#define LLVM_SUPPORT_BINARYSTREAMARRAY_H
11
12#include "llvm/ADT/ArrayRef.h"
13#include "llvm/ADT/iterator.h"
14#include "llvm/Support/BinaryStreamRef.h"
15#include "llvm/Support/Error.h"
16#include <cassert>
17#include <cstdint>
18
19/// Lightweight arrays that are backed by an arbitrary BinaryStream. This file
20/// provides two different array implementations.
21///
22/// VarStreamArray - Arrays of variable length records. The user specifies
23/// an Extractor type that can extract a record from a given offset and
24/// return the number of bytes consumed by the record.
25///
26/// FixedStreamArray - Arrays of fixed length records. This is similar in
27/// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the
28/// elements of the array need not be laid out in contiguous memory.
29namespace llvm {
30
31/// VarStreamArrayExtractor is intended to be specialized to provide customized
32/// extraction logic. On input it receives a BinaryStreamRef pointing to the
33/// beginning of the next record, but where the length of the record is not yet
34/// known. Upon completion, it should return an appropriate Error instance if
35/// a record could not be extracted, or if one could be extracted it should
36/// return success and set Len to the number of bytes this record occupied in
37/// the underlying stream, and it should fill out the fields of the value type
38/// Item appropriately to represent the current record.
39///
40/// You can specialize this template for your own custom value types to avoid
41/// having to specify a second template argument to VarStreamArray (documented
42/// below).
43template <typename T> struct VarStreamArrayExtractor {
44 // Method intentionally deleted. You must provide an explicit specialization
45 // with the following method implemented.
46 Error operator()(BinaryStreamRef Stream, uint32_t &Len,
47 T &Item) const = delete;
48};
49
50/// VarStreamArray represents an array of variable length records backed by a
51/// stream. This could be a contiguous sequence of bytes in memory, it could
52/// be a file on disk, or it could be a PDB stream where bytes are stored as
53/// discontiguous blocks in a file. Usually it is desirable to treat arrays
54/// as contiguous blocks of memory, but doing so with large PDB files, for
55/// example, could mean allocating huge amounts of memory just to allow
56/// re-ordering of stream data to be contiguous before iterating over it. By
57/// abstracting this out, we need not duplicate this memory, and we can
58/// iterate over arrays in arbitrarily formatted streams. Elements are parsed
59/// lazily on iteration, so there is no upfront cost associated with building
60/// or copying a VarStreamArray, no matter how large it may be.
61///
62/// You create a VarStreamArray by specifying a ValueType and an Extractor type.
63/// If you do not specify an Extractor type, you are expected to specialize
64/// VarStreamArrayExtractor<T> for your ValueType.
65///
66/// By default an Extractor is default constructed in the class, but in some
67/// cases you might find it useful for an Extractor to maintain state across
68/// extractions. In this case you can provide your own Extractor through a
69/// secondary constructor. The following examples show various ways of
70/// creating a VarStreamArray.
71///
72/// // Will use VarStreamArrayExtractor<MyType> as the extractor.
73/// VarStreamArray<MyType> MyTypeArray;
74///
75/// // Will use a default-constructed MyExtractor as the extractor.
76/// VarStreamArray<MyType, MyExtractor> MyTypeArray2;
77///
78/// // Will use the specific instance of MyExtractor provided.
79/// // MyExtractor need not be default-constructible in this case.
80/// MyExtractor E(SomeContext);
81/// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E);
82///
83
84template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
85
86template <typename ValueType,
87 typename Extractor = VarStreamArrayExtractor<ValueType>>
88class VarStreamArray {
89 friend class VarStreamArrayIterator<ValueType, Extractor>;
90
91public:
92 typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
93
94 VarStreamArray() = default;
95
96 explicit VarStreamArray(const Extractor &E) : E(E) {}
97
Andrew Walbran16937d02019-10-22 13:54:20 +010098 explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0)
99 : Stream(Stream), Skew(Skew) {}
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100100
Andrew Walbran16937d02019-10-22 13:54:20 +0100101 VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0)
102 : Stream(Stream), E(E), Skew(Skew) {}
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100103
104 Iterator begin(bool *HadError = nullptr) const {
Andrew Walbran16937d02019-10-22 13:54:20 +0100105 return Iterator(*this, E, Skew, nullptr);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100106 }
107
108 bool valid() const { return Stream.valid(); }
109
Andrew Walbran16937d02019-10-22 13:54:20 +0100110 uint32_t skew() const { return Skew; }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100111 Iterator end() const { return Iterator(E); }
112
113 bool empty() const { return Stream.getLength() == 0; }
114
Andrew Walbran16937d02019-10-22 13:54:20 +0100115 VarStreamArray<ValueType, Extractor> substream(uint32_t Begin,
116 uint32_t End) const {
117 assert(Begin >= Skew);
118 // We should never cut off the beginning of the stream since it might be
119 // skewed, meaning the initial bytes are important.
120 BinaryStreamRef NewStream = Stream.slice(0, End);
121 return {NewStream, E, Begin};
122 }
123
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100124 /// given an offset into the array's underlying stream, return an
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100125 /// iterator to the record at that offset. This is considered unsafe
126 /// since the behavior is undefined if \p Offset does not refer to the
127 /// beginning of a valid record.
128 Iterator at(uint32_t Offset) const {
129 return Iterator(*this, E, Offset, nullptr);
130 }
131
132 const Extractor &getExtractor() const { return E; }
133 Extractor &getExtractor() { return E; }
134
135 BinaryStreamRef getUnderlyingStream() const { return Stream; }
Andrew Walbran16937d02019-10-22 13:54:20 +0100136 void setUnderlyingStream(BinaryStreamRef S, uint32_t Skew = 0) {
137 Stream = S;
138 this->Skew = Skew;
139 }
140
141 void drop_front() { Skew += begin()->length(); }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100142
143private:
144 BinaryStreamRef Stream;
145 Extractor E;
Andrew Walbran16937d02019-10-22 13:54:20 +0100146 uint32_t Skew;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100147};
148
149template <typename ValueType, typename Extractor>
150class VarStreamArrayIterator
151 : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
152 std::forward_iterator_tag, ValueType> {
153 typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
154 typedef VarStreamArray<ValueType, Extractor> ArrayType;
155
156public:
157 VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100158 uint32_t Offset, bool *HadError)
159 : IterRef(Array.Stream.drop_front(Offset)), Extract(E),
160 Array(&Array), AbsOffset(Offset), HadError(HadError) {
161 if (IterRef.getLength() == 0)
162 moveToEnd();
163 else {
164 auto EC = Extract(IterRef, ThisLen, ThisValue);
165 if (EC) {
166 consumeError(std::move(EC));
167 markError();
168 }
169 }
170 }
171
172 VarStreamArrayIterator() = default;
173 explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
174 ~VarStreamArrayIterator() = default;
175
176 bool operator==(const IterType &R) const {
177 if (Array && R.Array) {
178 // Both have a valid array, make sure they're same.
179 assert(Array == R.Array);
180 return IterRef == R.IterRef;
181 }
182
183 // Both iterators are at the end.
184 if (!Array && !R.Array)
185 return true;
186
187 // One is not at the end and one is.
188 return false;
189 }
190
191 const ValueType &operator*() const {
192 assert(Array && !HasError);
193 return ThisValue;
194 }
195
196 ValueType &operator*() {
197 assert(Array && !HasError);
198 return ThisValue;
199 }
200
201 IterType &operator+=(unsigned N) {
202 for (unsigned I = 0; I < N; ++I) {
203 // We are done with the current record, discard it so that we are
204 // positioned at the next record.
205 AbsOffset += ThisLen;
206 IterRef = IterRef.drop_front(ThisLen);
207 if (IterRef.getLength() == 0) {
208 // There is nothing after the current record, we must make this an end
209 // iterator.
210 moveToEnd();
211 } else {
212 // There is some data after the current record.
213 auto EC = Extract(IterRef, ThisLen, ThisValue);
214 if (EC) {
215 consumeError(std::move(EC));
216 markError();
217 } else if (ThisLen == 0) {
218 // An empty record? Make this an end iterator.
219 moveToEnd();
220 }
221 }
222 }
223 return *this;
224 }
225
226 uint32_t offset() const { return AbsOffset; }
227 uint32_t getRecordLength() const { return ThisLen; }
228
229private:
230 void moveToEnd() {
231 Array = nullptr;
232 ThisLen = 0;
233 }
234 void markError() {
235 moveToEnd();
236 HasError = true;
237 if (HadError != nullptr)
238 *HadError = true;
239 }
240
241 ValueType ThisValue;
242 BinaryStreamRef IterRef;
243 Extractor Extract;
244 const ArrayType *Array{nullptr};
245 uint32_t ThisLen{0};
246 uint32_t AbsOffset{0};
247 bool HasError{false};
248 bool *HadError{nullptr};
249};
250
251template <typename T> class FixedStreamArrayIterator;
252
253/// FixedStreamArray is similar to VarStreamArray, except with each record
254/// having a fixed-length. As with VarStreamArray, there is no upfront
255/// cost associated with building or copying a FixedStreamArray, as the
256/// memory for each element is not read from the backing stream until that
257/// element is iterated.
258template <typename T> class FixedStreamArray {
259 friend class FixedStreamArrayIterator<T>;
260
261public:
262 typedef FixedStreamArrayIterator<T> Iterator;
263
264 FixedStreamArray() = default;
265 explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) {
266 assert(Stream.getLength() % sizeof(T) == 0);
267 }
268
269 bool operator==(const FixedStreamArray<T> &Other) const {
270 return Stream == Other.Stream;
271 }
272
273 bool operator!=(const FixedStreamArray<T> &Other) const {
274 return !(*this == Other);
275 }
276
277 FixedStreamArray &operator=(const FixedStreamArray &) = default;
278
279 const T &operator[](uint32_t Index) const {
280 assert(Index < size());
281 uint32_t Off = Index * sizeof(T);
282 ArrayRef<uint8_t> Data;
283 if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) {
284 assert(false && "Unexpected failure reading from stream");
285 // This should never happen since we asserted that the stream length was
286 // an exact multiple of the element size.
287 consumeError(std::move(EC));
288 }
289 assert(llvm::alignmentAdjustment(Data.data(), alignof(T)) == 0);
290 return *reinterpret_cast<const T *>(Data.data());
291 }
292
293 uint32_t size() const { return Stream.getLength() / sizeof(T); }
294
295 bool empty() const { return size() == 0; }
296
297 FixedStreamArrayIterator<T> begin() const {
298 return FixedStreamArrayIterator<T>(*this, 0);
299 }
300
301 FixedStreamArrayIterator<T> end() const {
302 return FixedStreamArrayIterator<T>(*this, size());
303 }
304
305 const T &front() const { return *begin(); }
306 const T &back() const {
307 FixedStreamArrayIterator<T> I = end();
308 return *(--I);
309 }
310
311 BinaryStreamRef getUnderlyingStream() const { return Stream; }
312
313private:
314 BinaryStreamRef Stream;
315};
316
317template <typename T>
318class FixedStreamArrayIterator
319 : public iterator_facade_base<FixedStreamArrayIterator<T>,
320 std::random_access_iterator_tag, const T> {
321
322public:
323 FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
324 : Array(Array), Index(Index) {}
325
326 FixedStreamArrayIterator<T> &
327 operator=(const FixedStreamArrayIterator<T> &Other) {
328 Array = Other.Array;
329 Index = Other.Index;
330 return *this;
331 }
332
333 const T &operator*() const { return Array[Index]; }
334 const T &operator*() { return Array[Index]; }
335
336 bool operator==(const FixedStreamArrayIterator<T> &R) const {
337 assert(Array == R.Array);
338 return (Index == R.Index) && (Array == R.Array);
339 }
340
341 FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
342 Index += N;
343 return *this;
344 }
345
346 FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
347 assert(std::ptrdiff_t(Index) >= N);
348 Index -= N;
349 return *this;
350 }
351
352 std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
353 assert(Array == R.Array);
354 assert(Index >= R.Index);
355 return Index - R.Index;
356 }
357
358 bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
359 assert(Array == RHS.Array);
360 return Index < RHS.Index;
361 }
362
363private:
364 FixedStreamArray<T> Array;
365 uint32_t Index;
366};
367
368} // namespace llvm
369
370#endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H