Blame - linux-x64/clang/include/llvm/Bitcode/BitstreamReader.h - hafnium/prebuilts

blob: 90cbe7b4b1e3219f8738203dceb596019f82617c [file] [log] [blame]

Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	1	//===- BitstreamReader.h - Low-level bitstream reader interface -- C++ --===//
				2	//
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame^]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	// This header defines the BitstreamReader class. This class can be used to
				10	// read an arbitrary bitstream, regardless of its contents.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#ifndef LLVM_BITCODE_BITSTREAMREADER_H
				15	#define LLVM_BITCODE_BITSTREAMREADER_H
				16
				17	#include "llvm/ADT/ArrayRef.h"
				18	#include "llvm/ADT/SmallVector.h"
				19	#include "llvm/Bitcode/BitCodes.h"
				20	#include "llvm/Support/Endian.h"
				21	#include "llvm/Support/ErrorHandling.h"
				22	#include "llvm/Support/MathExtras.h"
				23	#include "llvm/Support/MemoryBuffer.h"
				24	#include <algorithm>
				25	#include <cassert>
				26	#include <climits>
				27	#include <cstddef>
				28	#include <cstdint>
				29	#include <memory>
				30	#include <string>
				31	#include <utility>
				32	#include <vector>
				33
				34	namespace llvm {
				35
				36	/// This class maintains the abbreviations read from a block info block.
				37	class BitstreamBlockInfo {
				38	public:
				39	/// This contains information emitted to BLOCKINFO_BLOCK blocks. These
				40	/// describe abbreviations that all blocks of the specified ID inherit.
				41	struct BlockInfo {
				42	unsigned BlockID;
				43	std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
				44	std::string Name;
				45	std::vector<std::pair<unsigned, std::string>> RecordNames;
				46	};
				47
				48	private:
				49	std::vector<BlockInfo> BlockInfoRecords;
				50
				51	public:
				52	/// If there is block info for the specified ID, return it, otherwise return
				53	/// null.
				54	const BlockInfo *getBlockInfo(unsigned BlockID) const {
				55	// Common case, the most recent entry matches BlockID.
				56	if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
				57	return &BlockInfoRecords.back();
				58
				59	for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
				60	i != e; ++i)
				61	if (BlockInfoRecords[i].BlockID == BlockID)
				62	return &BlockInfoRecords[i];
				63	return nullptr;
				64	}
				65
				66	BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
				67	if (const BlockInfo *BI = getBlockInfo(BlockID))
				68	return const_cast<BlockInfo>(BI);
				69
				70	// Otherwise, add a new record.
				71	BlockInfoRecords.emplace_back();
				72	BlockInfoRecords.back().BlockID = BlockID;
				73	return BlockInfoRecords.back();
				74	}
				75	};
				76
				77	/// This represents a position within a bitstream. There may be multiple
				78	/// independent cursors reading within one bitstream, each maintaining their
				79	/// own local state.
				80	class SimpleBitstreamCursor {
				81	ArrayRef<uint8_t> BitcodeBytes;
				82	size_t NextChar = 0;
				83
				84	public:
				85	/// This is the current data we have pulled from the stream but have not
				86	/// returned to the client. This is specifically and intentionally defined to
				87	/// follow the word size of the host machine for efficiency. We use word_t in
				88	/// places that are aware of this to make it perfectly explicit what is going
				89	/// on.
				90	using word_t = size_t;
				91
				92	private:
				93	word_t CurWord = 0;
				94
				95	/// This is the number of bits in CurWord that are valid. This is always from
				96	/// [0...bits_of(size_t)-1] inclusive.
				97	unsigned BitsInCurWord = 0;
				98
				99	public:
				100	static const size_t MaxChunkSize = sizeof(word_t) * 8;
				101
				102	SimpleBitstreamCursor() = default;
				103	explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
				104	: BitcodeBytes(BitcodeBytes) {}
				105	explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
				106	: BitcodeBytes(reinterpret_cast<const uint8_t *>(BitcodeBytes.data()),
				107	BitcodeBytes.size()) {}
				108	explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
				109	: SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
				110
				111	bool canSkipToPos(size_t pos) const {
				112	// pos can be skipped to if it is a valid address or one byte past the end.
				113	return pos <= BitcodeBytes.size();
				114	}
				115
				116	bool AtEndOfStream() {
				117	return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar;
				118	}
				119
				120	/// Return the bit # of the bit we are reading.
				121	uint64_t GetCurrentBitNo() const {
				122	return NextChar*CHAR_BIT - BitsInCurWord;
				123	}
				124
				125	// Return the byte # of the current bit.
				126	uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
				127
				128	ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
				129
				130	/// Reset the stream to the specified bit number.
				131	void JumpToBit(uint64_t BitNo) {
				132	size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
				133	unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
				134	assert(canSkipToPos(ByteNo) && "Invalid location");
				135
				136	// Move the cursor to the right word.
				137	NextChar = ByteNo;
				138	BitsInCurWord = 0;
				139
				140	// Skip over any bits that are already consumed.
				141	if (WordBitNo)
				142	Read(WordBitNo);
				143	}
				144
				145	/// Get a pointer into the bitstream at the specified byte offset.
				146	const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
				147	return BitcodeBytes.data() + ByteNo;
				148	}
				149
				150	/// Get a pointer into the bitstream at the specified bit offset.
				151	///
				152	/// The bit offset must be on a byte boundary.
				153	const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
				154	assert(!(BitNo % 8) && "Expected bit on byte boundary");
				155	return getPointerToByte(BitNo / 8, NumBytes);
				156	}
				157
				158	void fillCurWord() {
				159	if (NextChar >= BitcodeBytes.size())
				160	report_fatal_error("Unexpected end of file");
				161
				162	// Read the next word from the stream.
				163	const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
				164	unsigned BytesRead;
				165	if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
				166	BytesRead = sizeof(word_t);
				167	CurWord =
				168	support::endian::read<word_t, support::little, support::unaligned>(
				169	NextCharPtr);
				170	} else {
				171	// Short read.
				172	BytesRead = BitcodeBytes.size() - NextChar;
				173	CurWord = 0;
				174	for (unsigned B = 0; B != BytesRead; ++B)
				175	CurWord \|= uint64_t(NextCharPtr[B]) << (B * 8);
				176	}
				177	NextChar += BytesRead;
				178	BitsInCurWord = BytesRead * 8;
				179	}
				180
				181	word_t Read(unsigned NumBits) {
				182	static const unsigned BitsInWord = MaxChunkSize;
				183
				184	assert(NumBits && NumBits <= BitsInWord &&
				185	"Cannot return zero or more than BitsInWord bits!");
				186
				187	static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
				188
				189	// If the field is fully contained by CurWord, return it quickly.
				190	if (BitsInCurWord >= NumBits) {
				191	word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
				192
				193	// Use a mask to avoid undefined behavior.
				194	CurWord >>= (NumBits & Mask);
				195
				196	BitsInCurWord -= NumBits;
				197	return R;
				198	}
				199
				200	word_t R = BitsInCurWord ? CurWord : 0;
				201	unsigned BitsLeft = NumBits - BitsInCurWord;
				202
				203	fillCurWord();
				204
				205	// If we run out of data, abort.
				206	if (BitsLeft > BitsInCurWord)
				207	report_fatal_error("Unexpected end of file");
				208
				209	word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
				210
				211	// Use a mask to avoid undefined behavior.
				212	CurWord >>= (BitsLeft & Mask);
				213
				214	BitsInCurWord -= BitsLeft;
				215
				216	R \|= R2 << (NumBits - BitsLeft);
				217
				218	return R;
				219	}
				220
				221	uint32_t ReadVBR(unsigned NumBits) {
				222	uint32_t Piece = Read(NumBits);
				223	if ((Piece & (1U << (NumBits-1))) == 0)
				224	return Piece;
				225
				226	uint32_t Result = 0;
				227	unsigned NextBit = 0;
				228	while (true) {
				229	Result \|= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
				230
				231	if ((Piece & (1U << (NumBits-1))) == 0)
				232	return Result;
				233
				234	NextBit += NumBits-1;
				235	Piece = Read(NumBits);
				236	}
				237	}
				238
				239	// Read a VBR that may have a value up to 64-bits in size. The chunk size of
				240	// the VBR must still be <= 32 bits though.
				241	uint64_t ReadVBR64(unsigned NumBits) {
				242	uint32_t Piece = Read(NumBits);
				243	if ((Piece & (1U << (NumBits-1))) == 0)
				244	return uint64_t(Piece);
				245
				246	uint64_t Result = 0;
				247	unsigned NextBit = 0;
				248	while (true) {
				249	Result \|= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
				250
				251	if ((Piece & (1U << (NumBits-1))) == 0)
				252	return Result;
				253
				254	NextBit += NumBits-1;
				255	Piece = Read(NumBits);
				256	}
				257	}
				258
				259	void SkipToFourByteBoundary() {
				260	// If word_t is 64-bits and if we've read less than 32 bits, just dump
				261	// the bits we have up to the next 32-bit boundary.
				262	if (sizeof(word_t) > 4 &&
				263	BitsInCurWord >= 32) {
				264	CurWord >>= BitsInCurWord-32;
				265	BitsInCurWord = 32;
				266	return;
				267	}
				268
				269	BitsInCurWord = 0;
				270	}
				271
				272	/// Skip to the end of the file.
				273	void skipToEnd() { NextChar = BitcodeBytes.size(); }
				274	};
				275
				276	/// When advancing through a bitstream cursor, each advance can discover a few
				277	/// different kinds of entries:
				278	struct BitstreamEntry {
				279	enum {
				280	Error, // Malformed bitcode was found.
				281	EndBlock, // We've reached the end of the current block, (or the end of the
				282	// file, which is treated like a series of EndBlock records.
				283	SubBlock, // This is the start of a new subblock of a specific ID.
				284	Record // This is a record with a specific AbbrevID.
				285	} Kind;
				286
				287	unsigned ID;
				288
				289	static BitstreamEntry getError() {
				290	BitstreamEntry E; E.Kind = Error; return E;
				291	}
				292
				293	static BitstreamEntry getEndBlock() {
				294	BitstreamEntry E; E.Kind = EndBlock; return E;
				295	}
				296
				297	static BitstreamEntry getSubBlock(unsigned ID) {
				298	BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
				299	}
				300
				301	static BitstreamEntry getRecord(unsigned AbbrevID) {
				302	BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
				303	}
				304	};
				305
				306	/// This represents a position within a bitcode file, implemented on top of a
				307	/// SimpleBitstreamCursor.
				308	///
				309	/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
				310	/// be passed by value.
				311	class BitstreamCursor : SimpleBitstreamCursor {
				312	// This is the declared size of code values used for the current block, in
				313	// bits.
				314	unsigned CurCodeSize = 2;
				315
				316	/// Abbrevs installed at in this block.
				317	std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
				318
				319	struct Block {
				320	unsigned PrevCodeSize;
				321	std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
				322
				323	explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
				324	};
				325
				326	/// This tracks the codesize of parent blocks.
				327	SmallVector<Block, 8> BlockScope;
				328
				329	BitstreamBlockInfo *BlockInfo = nullptr;
				330
				331	public:
				332	static const size_t MaxChunkSize = sizeof(word_t) * 8;
				333
				334	BitstreamCursor() = default;
				335	explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
				336	: SimpleBitstreamCursor(BitcodeBytes) {}
				337	explicit BitstreamCursor(StringRef BitcodeBytes)
				338	: SimpleBitstreamCursor(BitcodeBytes) {}
				339	explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
				340	: SimpleBitstreamCursor(BitcodeBytes) {}
				341
				342	using SimpleBitstreamCursor::canSkipToPos;
				343	using SimpleBitstreamCursor::AtEndOfStream;
				344	using SimpleBitstreamCursor::getBitcodeBytes;
				345	using SimpleBitstreamCursor::GetCurrentBitNo;
				346	using SimpleBitstreamCursor::getCurrentByteNo;
				347	using SimpleBitstreamCursor::getPointerToByte;
				348	using SimpleBitstreamCursor::JumpToBit;
				349	using SimpleBitstreamCursor::fillCurWord;
				350	using SimpleBitstreamCursor::Read;
				351	using SimpleBitstreamCursor::ReadVBR;
				352	using SimpleBitstreamCursor::ReadVBR64;
				353
				354	/// Return the number of bits used to encode an abbrev #.
				355	unsigned getAbbrevIDWidth() const { return CurCodeSize; }
				356
				357	/// Flags that modify the behavior of advance().
				358	enum {
				359	/// If this flag is used, the advance() method does not automatically pop
				360	/// the block scope when the end of a block is reached.
				361	AF_DontPopBlockAtEnd = 1,
				362
				363	/// If this flag is used, abbrev entries are returned just like normal
				364	/// records.
				365	AF_DontAutoprocessAbbrevs = 2
				366	};
				367
				368	/// Advance the current bitstream, returning the next entry in the stream.
				369	BitstreamEntry advance(unsigned Flags = 0) {
				370	while (true) {
				371	if (AtEndOfStream())
				372	return BitstreamEntry::getError();
				373
				374	unsigned Code = ReadCode();
				375	if (Code == bitc::END_BLOCK) {
				376	// Pop the end of the block unless Flags tells us not to.
				377	if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
				378	return BitstreamEntry::getError();
				379	return BitstreamEntry::getEndBlock();
				380	}
				381
				382	if (Code == bitc::ENTER_SUBBLOCK)
				383	return BitstreamEntry::getSubBlock(ReadSubBlockID());
				384
				385	if (Code == bitc::DEFINE_ABBREV &&
				386	!(Flags & AF_DontAutoprocessAbbrevs)) {
				387	// We read and accumulate abbrev's, the client can't do anything with
				388	// them anyway.
				389	ReadAbbrevRecord();
				390	continue;
				391	}
				392
				393	return BitstreamEntry::getRecord(Code);
				394	}
				395	}
				396
				397	/// This is a convenience function for clients that don't expect any
				398	/// subblocks. This just skips over them automatically.
				399	BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
				400	while (true) {
				401	// If we found a normal entry, return it.
				402	BitstreamEntry Entry = advance(Flags);
				403	if (Entry.Kind != BitstreamEntry::SubBlock)
				404	return Entry;
				405
				406	// If we found a sub-block, just skip over it and check the next entry.
				407	if (SkipBlock())
				408	return BitstreamEntry::getError();
				409	}
				410	}
				411
				412	unsigned ReadCode() {
				413	return Read(CurCodeSize);
				414	}
				415
				416	// Block header:
				417	// [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
				418
				419	/// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
				420	unsigned ReadSubBlockID() {
				421	return ReadVBR(bitc::BlockIDWidth);
				422	}
				423
				424	/// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
				425	/// of this block. If the block record is malformed, return true.
				426	bool SkipBlock() {
				427	// Read and ignore the codelen value. Since we are skipping this block, we
				428	// don't care what code widths are used inside of it.
				429	ReadVBR(bitc::CodeLenWidth);
				430	SkipToFourByteBoundary();
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	431	size_t NumFourBytes = Read(bitc::BlockSizeWidth);
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	432
				433	// Check that the block wasn't partially defined, and that the offset isn't
				434	// bogus.
				435	size_t SkipTo = GetCurrentBitNo() + NumFourBytes48;
				436	if (AtEndOfStream() \|\| !canSkipToPos(SkipTo/8))
				437	return true;
				438
				439	JumpToBit(SkipTo);
				440	return false;
				441	}
				442
				443	/// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true
				444	/// if the block has an error.
				445	bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
				446
				447	bool ReadBlockEnd() {
				448	if (BlockScope.empty()) return true;
				449
				450	// Block tail:
				451	// [END_BLOCK, <align4bytes>]
				452	SkipToFourByteBoundary();
				453
				454	popBlockScope();
				455	return false;
				456	}
				457
				458	private:
				459	void popBlockScope() {
				460	CurCodeSize = BlockScope.back().PrevCodeSize;
				461
				462	CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
				463	BlockScope.pop_back();
				464	}
				465
				466	//===--------------------------------------------------------------------===//
				467	// Record Processing
				468	//===--------------------------------------------------------------------===//
				469
				470	public:
				471	/// Return the abbreviation for the specified AbbrevId.
				472	const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
				473	unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
				474	if (AbbrevNo >= CurAbbrevs.size())
				475	report_fatal_error("Invalid abbrev number");
				476	return CurAbbrevs[AbbrevNo].get();
				477	}
				478
				479	/// Read the current record and discard it, returning the code for the record.
				480	unsigned skipRecord(unsigned AbbrevID);
				481
				482	unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
				483	StringRef *Blob = nullptr);
				484
				485	//===--------------------------------------------------------------------===//
				486	// Abbrev Processing
				487	//===--------------------------------------------------------------------===//
				488	void ReadAbbrevRecord();
				489
				490	/// Read and return a block info block from the bitstream. If an error was
				491	/// encountered, return None.
				492	///
				493	/// \param ReadBlockInfoNames Whether to read block/record name information in
				494	/// the BlockInfo block. Only llvm-bcanalyzer uses this.
				495	Optional<BitstreamBlockInfo>
				496	ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
				497
				498	/// Set the block info to be used by this BitstreamCursor to interpret
				499	/// abbreviated records.
				500	void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
				501	};
				502
				503	} // end llvm namespace
				504
				505	#endif // LLVM_BITCODE_BITSTREAMREADER_H