Blame - linux-x64/clang/include/llvm/Support/OnDiskHashTable.h - hafnium/prebuilts

blob: 11dc0de0f354ca0cc4689fbb7f4be934d8d7abda [file] [log] [blame]

Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	1	//===--- OnDiskHashTable.h - On-Disk Hash Table Implementation --- C++ --===//
				2	//
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	///
				9	/// \file
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	10	/// Defines facilities for reading and writing on-disk hash tables.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	11	///
				12	//===----------------------------------------------------------------------===//
				13	#ifndef LLVM_SUPPORT_ONDISKHASHTABLE_H
				14	#define LLVM_SUPPORT_ONDISKHASHTABLE_H
				15
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	16	#include "llvm/Support/Alignment.h"
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	17	#include "llvm/Support/Allocator.h"
				18	#include "llvm/Support/DataTypes.h"
				19	#include "llvm/Support/EndianStream.h"
				20	#include "llvm/Support/Host.h"
				21	#include "llvm/Support/MathExtras.h"
				22	#include "llvm/Support/raw_ostream.h"
				23	#include <cassert>
				24	#include <cstdlib>
				25
				26	namespace llvm {
				27
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	28	/// Generates an on disk hash table.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	29	///
				30	/// This needs an \c Info that handles storing values into the hash table's
				31	/// payload and computes the hash for a given key. This should provide the
				32	/// following interface:
				33	///
				34	/// \code
				35	/// class ExampleInfo {
				36	/// public:
				37	/// typedef ExampleKey key_type; // Must be copy constructible
				38	/// typedef ExampleKey &key_type_ref;
				39	/// typedef ExampleData data_type; // Must be copy constructible
				40	/// typedef ExampleData &data_type_ref;
				41	/// typedef uint32_t hash_value_type; // The type the hash function returns.
				42	/// typedef uint32_t offset_type; // The type for offsets into the table.
				43	///
				44	/// /// Calculate the hash for Key
				45	/// static hash_value_type ComputeHash(key_type_ref Key);
				46	/// /// Return the lengths, in bytes, of the given Key/Data pair.
				47	/// static std::pair<offset_type, offset_type>
				48	/// EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data);
				49	/// /// Write Key to Out. KeyLen is the length from EmitKeyDataLength.
				50	/// static void EmitKey(raw_ostream &Out, key_type_ref Key,
				51	/// offset_type KeyLen);
				52	/// /// Write Data to Out. DataLen is the length from EmitKeyDataLength.
				53	/// static void EmitData(raw_ostream &Out, key_type_ref Key,
				54	/// data_type_ref Data, offset_type DataLen);
				55	/// /// Determine if two keys are equal. Optional, only needed by contains.
				56	/// static bool EqualKey(key_type_ref Key1, key_type_ref Key2);
				57	/// };
				58	/// \endcode
				59	template <typename Info> class OnDiskChainedHashTableGenerator {
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	60	/// A single item in the hash table.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	61	class Item {
				62	public:
				63	typename Info::key_type Key;
				64	typename Info::data_type Data;
				65	Item *Next;
				66	const typename Info::hash_value_type Hash;
				67
				68	Item(typename Info::key_type_ref Key, typename Info::data_type_ref Data,
				69	Info &InfoObj)
				70	: Key(Key), Data(Data), Next(nullptr), Hash(InfoObj.ComputeHash(Key)) {}
				71	};
				72
				73	typedef typename Info::offset_type offset_type;
				74	offset_type NumBuckets;
				75	offset_type NumEntries;
				76	llvm::SpecificBumpPtrAllocator<Item> BA;
				77
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	78	/// A linked list of values in a particular hash bucket.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	79	struct Bucket {
				80	offset_type Off;
				81	unsigned Length;
				82	Item *Head;
				83	};
				84
				85	Bucket *Buckets;
				86
				87	private:
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	88	/// Insert an item into the appropriate hash bucket.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	89	void insert(Bucket Buckets, size_t Size, Item E) {
				90	Bucket &B = Buckets[E->Hash & (Size - 1)];
				91	E->Next = B.Head;
				92	++B.Length;
				93	B.Head = E;
				94	}
				95
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	96	/// Resize the hash table, moving the old entries into the new buckets.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	97	void resize(size_t NewSize) {
				98	Bucket NewBuckets = static_cast<Bucket >(
				99	safe_calloc(NewSize, sizeof(Bucket)));
				100	// Populate NewBuckets with the old entries.
				101	for (size_t I = 0; I < NumBuckets; ++I)
				102	for (Item *E = Buckets[I].Head; E;) {
				103	Item *N = E->Next;
				104	E->Next = nullptr;
				105	insert(NewBuckets, NewSize, E);
				106	E = N;
				107	}
				108
				109	free(Buckets);
				110	NumBuckets = NewSize;
				111	Buckets = NewBuckets;
				112	}
				113
				114	public:
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	115	/// Insert an entry into the table.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	116	void insert(typename Info::key_type_ref Key,
				117	typename Info::data_type_ref Data) {
				118	Info InfoObj;
				119	insert(Key, Data, InfoObj);
				120	}
				121
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	122	/// Insert an entry into the table.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	123	///
				124	/// Uses the provided Info instead of a stack allocated one.
				125	void insert(typename Info::key_type_ref Key,
				126	typename Info::data_type_ref Data, Info &InfoObj) {
				127	++NumEntries;
				128	if (4 * NumEntries >= 3 * NumBuckets)
				129	resize(NumBuckets * 2);
				130	insert(Buckets, NumBuckets, new (BA.Allocate()) Item(Key, Data, InfoObj));
				131	}
				132
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	133	/// Determine whether an entry has been inserted.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	134	bool contains(typename Info::key_type_ref Key, Info &InfoObj) {
				135	unsigned Hash = InfoObj.ComputeHash(Key);
				136	for (Item *I = Buckets[Hash & (NumBuckets - 1)].Head; I; I = I->Next)
				137	if (I->Hash == Hash && InfoObj.EqualKey(I->Key, Key))
				138	return true;
				139	return false;
				140	}
				141
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	142	/// Emit the table to Out, which must not be at offset 0.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	143	offset_type Emit(raw_ostream &Out) {
				144	Info InfoObj;
				145	return Emit(Out, InfoObj);
				146	}
				147
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	148	/// Emit the table to Out, which must not be at offset 0.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	149	///
				150	/// Uses the provided Info instead of a stack allocated one.
				151	offset_type Emit(raw_ostream &Out, Info &InfoObj) {
				152	using namespace llvm::support;
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	153	endian::Writer LE(Out, little);
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	154
				155	// Now we're done adding entries, resize the bucket list if it's
				156	// significantly too large. (This only happens if the number of
				157	// entries is small and we're within our initial allocation of
				158	// 64 buckets.) We aim for an occupancy ratio in [3/8, 3/4).
				159	//
				160	// As a special case, if there are two or fewer entries, just
				161	// form a single bucket. A linear scan is fine in that case, and
				162	// this is very common in C++ class lookup tables. This also
				163	// guarantees we produce at least one bucket for an empty table.
				164	//
				165	// FIXME: Try computing a perfect hash function at this point.
				166	unsigned TargetNumBuckets =
				167	NumEntries <= 2 ? 1 : NextPowerOf2(NumEntries * 4 / 3);
				168	if (TargetNumBuckets != NumBuckets)
				169	resize(TargetNumBuckets);
				170
				171	// Emit the payload of the table.
				172	for (offset_type I = 0; I < NumBuckets; ++I) {
				173	Bucket &B = Buckets[I];
				174	if (!B.Head)
				175	continue;
				176
				177	// Store the offset for the data of this bucket.
				178	B.Off = Out.tell();
				179	assert(B.Off && "Cannot write a bucket at offset 0. Please add padding.");
				180
				181	// Write out the number of items in the bucket.
				182	LE.write<uint16_t>(B.Length);
				183	assert(B.Length != 0 && "Bucket has a head but zero length?");
				184
				185	// Write out the entries in the bucket.
				186	for (Item *I = B.Head; I; I = I->Next) {
				187	LE.write<typename Info::hash_value_type>(I->Hash);
				188	const std::pair<offset_type, offset_type> &Len =
				189	InfoObj.EmitKeyDataLength(Out, I->Key, I->Data);
				190	#ifdef NDEBUG
				191	InfoObj.EmitKey(Out, I->Key, Len.first);
				192	InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
				193	#else
				194	// In asserts mode, check that the users length matches the data they
				195	// wrote.
				196	uint64_t KeyStart = Out.tell();
				197	InfoObj.EmitKey(Out, I->Key, Len.first);
				198	uint64_t DataStart = Out.tell();
				199	InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
				200	uint64_t End = Out.tell();
				201	assert(offset_type(DataStart - KeyStart) == Len.first &&
				202	"key length does not match bytes written");
				203	assert(offset_type(End - DataStart) == Len.second &&
				204	"data length does not match bytes written");
				205	#endif
				206	}
				207	}
				208
				209	// Pad with zeros so that we can start the hashtable at an aligned address.
				210	offset_type TableOff = Out.tell();
Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	211	uint64_t N = offsetToAlignment(TableOff, Align(alignof(offset_type)));
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	212	TableOff += N;
				213	while (N--)
				214	LE.write<uint8_t>(0);
				215
				216	// Emit the hashtable itself.
				217	LE.write<offset_type>(NumBuckets);
				218	LE.write<offset_type>(NumEntries);
				219	for (offset_type I = 0; I < NumBuckets; ++I)
				220	LE.write<offset_type>(Buckets[I].Off);
				221
				222	return TableOff;
				223	}
				224
				225	OnDiskChainedHashTableGenerator() {
				226	NumEntries = 0;
				227	NumBuckets = 64;
				228	// Note that we do not need to run the constructors of the individual
				229	// Bucket objects since 'calloc' returns bytes that are all 0.
				230	Buckets = static_cast<Bucket *>(safe_calloc(NumBuckets, sizeof(Bucket)));
				231	}
				232
				233	~OnDiskChainedHashTableGenerator() { std::free(Buckets); }
				234	};
				235
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	236	/// Provides lookup on an on disk hash table.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	237	///
				238	/// This needs an \c Info that handles reading values from the hash table's
				239	/// payload and computes the hash for a given key. This should provide the
				240	/// following interface:
				241	///
				242	/// \code
				243	/// class ExampleLookupInfo {
				244	/// public:
				245	/// typedef ExampleData data_type;
				246	/// typedef ExampleInternalKey internal_key_type; // The stored key type.
				247	/// typedef ExampleKey external_key_type; // The type to pass to find().
				248	/// typedef uint32_t hash_value_type; // The type the hash function returns.
				249	/// typedef uint32_t offset_type; // The type for offsets into the table.
				250	///
				251	/// /// Compare two keys for equality.
				252	/// static bool EqualKey(internal_key_type &Key1, internal_key_type &Key2);
				253	/// /// Calculate the hash for the given key.
				254	/// static hash_value_type ComputeHash(internal_key_type &IKey);
				255	/// /// Translate from the semantic type of a key in the hash table to the
				256	/// /// type that is actually stored and used for hashing and comparisons.
				257	/// /// The internal and external types are often the same, in which case this
				258	/// /// can simply return the passed in value.
				259	/// static const internal_key_type &GetInternalKey(external_key_type &EKey);
				260	/// /// Read the key and data length from Buffer, leaving it pointing at the
				261	/// /// following byte.
				262	/// static std::pair<offset_type, offset_type>
				263	/// ReadKeyDataLength(const unsigned char *&Buffer);
				264	/// /// Read the key from Buffer, given the KeyLen as reported from
				265	/// /// ReadKeyDataLength.
				266	/// const internal_key_type &ReadKey(const unsigned char *Buffer,
				267	/// offset_type KeyLen);
				268	/// /// Read the data for Key from Buffer, given the DataLen as reported from
				269	/// /// ReadKeyDataLength.
				270	/// data_type ReadData(StringRef Key, const unsigned char *Buffer,
				271	/// offset_type DataLen);
				272	/// };
				273	/// \endcode
				274	template <typename Info> class OnDiskChainedHashTable {
				275	const typename Info::offset_type NumBuckets;
				276	const typename Info::offset_type NumEntries;
				277	const unsigned char *const Buckets;
				278	const unsigned char *const Base;
				279	Info InfoObj;
				280
				281	public:
				282	typedef Info InfoType;
				283	typedef typename Info::internal_key_type internal_key_type;
				284	typedef typename Info::external_key_type external_key_type;
				285	typedef typename Info::data_type data_type;
				286	typedef typename Info::hash_value_type hash_value_type;
				287	typedef typename Info::offset_type offset_type;
				288
				289	OnDiskChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
				290	const unsigned char *Buckets,
				291	const unsigned char *Base,
				292	const Info &InfoObj = Info())
				293	: NumBuckets(NumBuckets), NumEntries(NumEntries), Buckets(Buckets),
				294	Base(Base), InfoObj(InfoObj) {
				295	assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
				296	"'buckets' must have a 4-byte alignment");
				297	}
				298
				299	/// Read the number of buckets and the number of entries from a hash table
				300	/// produced by OnDiskHashTableGenerator::Emit, and advance the Buckets
				301	/// pointer past them.
				302	static std::pair<offset_type, offset_type>
				303	readNumBucketsAndEntries(const unsigned char *&Buckets) {
				304	assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
				305	"buckets should be 4-byte aligned.");
				306	using namespace llvm::support;
				307	offset_type NumBuckets =
				308	endian::readNext<offset_type, little, aligned>(Buckets);
				309	offset_type NumEntries =
				310	endian::readNext<offset_type, little, aligned>(Buckets);
				311	return std::make_pair(NumBuckets, NumEntries);
				312	}
				313
				314	offset_type getNumBuckets() const { return NumBuckets; }
				315	offset_type getNumEntries() const { return NumEntries; }
				316	const unsigned char *getBase() const { return Base; }
				317	const unsigned char *getBuckets() const { return Buckets; }
				318
				319	bool isEmpty() const { return NumEntries == 0; }
				320
				321	class iterator {
				322	internal_key_type Key;
				323	const unsigned char *const Data;
				324	const offset_type Len;
				325	Info *InfoObj;
				326
				327	public:
				328	iterator() : Key(), Data(nullptr), Len(0), InfoObj(nullptr) {}
				329	iterator(const internal_key_type K, const unsigned char *D, offset_type L,
				330	Info *InfoObj)
				331	: Key(K), Data(D), Len(L), InfoObj(InfoObj) {}
				332
				333	data_type operator*() const { return InfoObj->ReadData(Key, Data, Len); }
				334
				335	const unsigned char *getDataPtr() const { return Data; }
				336	offset_type getDataLen() const { return Len; }
				337
				338	bool operator==(const iterator &X) const { return X.Data == Data; }
				339	bool operator!=(const iterator &X) const { return X.Data != Data; }
				340	};
				341
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	342	/// Look up the stored data for a particular key.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	343	iterator find(const external_key_type &EKey, Info *InfoPtr = nullptr) {
				344	const internal_key_type &IKey = InfoObj.GetInternalKey(EKey);
				345	hash_value_type KeyHash = InfoObj.ComputeHash(IKey);
				346	return find_hashed(IKey, KeyHash, InfoPtr);
				347	}
				348
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	349	/// Look up the stored data for a particular key with a known hash.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	350	iterator find_hashed(const internal_key_type &IKey, hash_value_type KeyHash,
				351	Info *InfoPtr = nullptr) {
				352	using namespace llvm::support;
				353
				354	if (!InfoPtr)
				355	InfoPtr = &InfoObj;
				356
				357	// Each bucket is just an offset into the hash table file.
				358	offset_type Idx = KeyHash & (NumBuckets - 1);
				359	const unsigned char Bucket = Buckets + sizeof(offset_type) Idx;
				360
				361	offset_type Offset = endian::readNext<offset_type, little, aligned>(Bucket);
				362	if (Offset == 0)
				363	return iterator(); // Empty bucket.
				364	const unsigned char *Items = Base + Offset;
				365
				366	// 'Items' starts with a 16-bit unsigned integer representing the
				367	// number of items in this bucket.
				368	unsigned Len = endian::readNext<uint16_t, little, unaligned>(Items);
				369
				370	for (unsigned i = 0; i < Len; ++i) {
				371	// Read the hash.
				372	hash_value_type ItemHash =
				373	endian::readNext<hash_value_type, little, unaligned>(Items);
				374
				375	// Determine the length of the key and the data.
				376	const std::pair<offset_type, offset_type> &L =
				377	Info::ReadKeyDataLength(Items);
				378	offset_type ItemLen = L.first + L.second;
				379
				380	// Compare the hashes. If they are not the same, skip the entry entirely.
				381	if (ItemHash != KeyHash) {
				382	Items += ItemLen;
				383	continue;
				384	}
				385
				386	// Read the key.
				387	const internal_key_type &X =
				388	InfoPtr->ReadKey((const unsigned char *const)Items, L.first);
				389
				390	// If the key doesn't match just skip reading the value.
				391	if (!InfoPtr->EqualKey(X, IKey)) {
				392	Items += ItemLen;
				393	continue;
				394	}
				395
				396	// The key matches!
				397	return iterator(X, Items + L.first, L.second, InfoPtr);
				398	}
				399
				400	return iterator();
				401	}
				402
				403	iterator end() const { return iterator(); }
				404
				405	Info &getInfoObj() { return InfoObj; }
				406
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	407	/// Create the hash table.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	408	///
				409	/// \param Buckets is the beginning of the hash table itself, which follows
				410	/// the payload of entire structure. This is the value returned by
				411	/// OnDiskHashTableGenerator::Emit.
				412	///
				413	/// \param Base is the point from which all offsets into the structure are
				414	/// based. This is offset 0 in the stream that was used when Emitting the
				415	/// table.
				416	static OnDiskChainedHashTable Create(const unsigned char Buckets,
				417	const unsigned char *const Base,
				418	const Info &InfoObj = Info()) {
				419	assert(Buckets > Base);
				420	auto NumBucketsAndEntries = readNumBucketsAndEntries(Buckets);
				421	return new OnDiskChainedHashTable<Info>(NumBucketsAndEntries.first,
				422	NumBucketsAndEntries.second,
				423	Buckets, Base, InfoObj);
				424	}
				425	};
				426
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	427	/// Provides lookup and iteration over an on disk hash table.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	428	///
				429	/// \copydetails llvm::OnDiskChainedHashTable
				430	template <typename Info>
				431	class OnDiskIterableChainedHashTable : public OnDiskChainedHashTable<Info> {
				432	const unsigned char *Payload;
				433
				434	public:
				435	typedef OnDiskChainedHashTable<Info> base_type;
				436	typedef typename base_type::internal_key_type internal_key_type;
				437	typedef typename base_type::external_key_type external_key_type;
				438	typedef typename base_type::data_type data_type;
				439	typedef typename base_type::hash_value_type hash_value_type;
				440	typedef typename base_type::offset_type offset_type;
				441
				442	private:
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	443	/// Iterates over all of the keys in the table.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	444	class iterator_base {
				445	const unsigned char *Ptr;
				446	offset_type NumItemsInBucketLeft;
				447	offset_type NumEntriesLeft;
				448
				449	public:
				450	typedef external_key_type value_type;
				451
				452	iterator_base(const unsigned char *const Ptr, offset_type NumEntries)
				453	: Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries) {}
				454	iterator_base()
				455	: Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0) {}
				456
				457	friend bool operator==(const iterator_base &X, const iterator_base &Y) {
				458	return X.NumEntriesLeft == Y.NumEntriesLeft;
				459	}
				460	friend bool operator!=(const iterator_base &X, const iterator_base &Y) {
				461	return X.NumEntriesLeft != Y.NumEntriesLeft;
				462	}
				463
				464	/// Move to the next item.
				465	void advance() {
				466	using namespace llvm::support;
				467	if (!NumItemsInBucketLeft) {
				468	// 'Items' starts with a 16-bit unsigned integer representing the
				469	// number of items in this bucket.
				470	NumItemsInBucketLeft =
				471	endian::readNext<uint16_t, little, unaligned>(Ptr);
				472	}
				473	Ptr += sizeof(hash_value_type); // Skip the hash.
				474	// Determine the length of the key and the data.
				475	const std::pair<offset_type, offset_type> &L =
				476	Info::ReadKeyDataLength(Ptr);
				477	Ptr += L.first + L.second;
				478	assert(NumItemsInBucketLeft);
				479	--NumItemsInBucketLeft;
				480	assert(NumEntriesLeft);
				481	--NumEntriesLeft;
				482	}
				483
				484	/// Get the start of the item as written by the trait (after the hash and
				485	/// immediately before the key and value length).
				486	const unsigned char *getItem() const {
				487	return Ptr + (NumItemsInBucketLeft ? 0 : 2) + sizeof(hash_value_type);
				488	}
				489	};
				490
				491	public:
				492	OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
				493	const unsigned char *Buckets,
				494	const unsigned char *Payload,
				495	const unsigned char *Base,
				496	const Info &InfoObj = Info())
				497	: base_type(NumBuckets, NumEntries, Buckets, Base, InfoObj),
				498	Payload(Payload) {}
				499
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	500	/// Iterates over all of the keys in the table.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	501	class key_iterator : public iterator_base {
				502	Info *InfoObj;
				503
				504	public:
				505	typedef external_key_type value_type;
				506
				507	key_iterator(const unsigned char *const Ptr, offset_type NumEntries,
				508	Info *InfoObj)
				509	: iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
				510	key_iterator() : iterator_base(), InfoObj() {}
				511
				512	key_iterator &operator++() {
				513	this->advance();
				514	return *this;
				515	}
				516	key_iterator operator++(int) { // Postincrement
				517	key_iterator tmp = *this;
				518	++*this;
				519	return tmp;
				520	}
				521
				522	internal_key_type getInternalKey() const {
				523	auto *LocalPtr = this->getItem();
				524
				525	// Determine the length of the key and the data.
				526	auto L = Info::ReadKeyDataLength(LocalPtr);
				527
				528	// Read the key.
				529	return InfoObj->ReadKey(LocalPtr, L.first);
				530	}
				531
				532	value_type operator*() const {
				533	return InfoObj->GetExternalKey(getInternalKey());
				534	}
				535	};
				536
				537	key_iterator key_begin() {
				538	return key_iterator(Payload, this->getNumEntries(), &this->getInfoObj());
				539	}
				540	key_iterator key_end() { return key_iterator(); }
				541
				542	iterator_range<key_iterator> keys() {
				543	return make_range(key_begin(), key_end());
				544	}
				545
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	546	/// Iterates over all the entries in the table, returning the data.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	547	class data_iterator : public iterator_base {
				548	Info *InfoObj;
				549
				550	public:
				551	typedef data_type value_type;
				552
				553	data_iterator(const unsigned char *const Ptr, offset_type NumEntries,
				554	Info *InfoObj)
				555	: iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
				556	data_iterator() : iterator_base(), InfoObj() {}
				557
				558	data_iterator &operator++() { // Preincrement
				559	this->advance();
				560	return *this;
				561	}
				562	data_iterator operator++(int) { // Postincrement
				563	data_iterator tmp = *this;
				564	++*this;
				565	return tmp;
				566	}
				567
				568	value_type operator*() const {
				569	auto *LocalPtr = this->getItem();
				570
				571	// Determine the length of the key and the data.
				572	auto L = Info::ReadKeyDataLength(LocalPtr);
				573
				574	// Read the key.
				575	const internal_key_type &Key = InfoObj->ReadKey(LocalPtr, L.first);
				576	return InfoObj->ReadData(Key, LocalPtr + L.first, L.second);
				577	}
				578	};
				579
				580	data_iterator data_begin() {
				581	return data_iterator(Payload, this->getNumEntries(), &this->getInfoObj());
				582	}
				583	data_iterator data_end() { return data_iterator(); }
				584
				585	iterator_range<data_iterator> data() {
				586	return make_range(data_begin(), data_end());
				587	}
				588
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	589	/// Create the hash table.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	590	///
				591	/// \param Buckets is the beginning of the hash table itself, which follows
				592	/// the payload of entire structure. This is the value returned by
				593	/// OnDiskHashTableGenerator::Emit.
				594	///
				595	/// \param Payload is the beginning of the data contained in the table. This
				596	/// is Base plus any padding or header data that was stored, ie, the offset
				597	/// that the stream was at when calling Emit.
				598	///
				599	/// \param Base is the point from which all offsets into the structure are
				600	/// based. This is offset 0 in the stream that was used when Emitting the
				601	/// table.
				602	static OnDiskIterableChainedHashTable *
				603	Create(const unsigned char Buckets, const unsigned char const Payload,
				604	const unsigned char *const Base, const Info &InfoObj = Info()) {
				605	assert(Buckets > Base);
				606	auto NumBucketsAndEntries =
				607	OnDiskIterableChainedHashTable<Info>::readNumBucketsAndEntries(Buckets);
				608	return new OnDiskIterableChainedHashTable<Info>(
				609	NumBucketsAndEntries.first, NumBucketsAndEntries.second,
				610	Buckets, Payload, Base, InfoObj);
				611	}
				612	};
				613
				614	} // end namespace llvm
				615
				616	#endif