Blame - linux-x64/clang/include/clang/Lex/Token.h - hafnium/prebuilts

blob: 89042a674fec392c0e36419fbc33a6205c6af7e9 [file] [log] [blame]

Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	1	//===--- Token.h - Token interface ------------------------------- C++ --===//
				2	//
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	// This file defines the Token interface.
				10	//
				11	//===----------------------------------------------------------------------===//
				12
				13	#ifndef LLVM_CLANG_LEX_TOKEN_H
				14	#define LLVM_CLANG_LEX_TOKEN_H
				15
				16	#include "clang/Basic/SourceLocation.h"
				17	#include "clang/Basic/TokenKinds.h"
				18	#include "llvm/ADT/StringRef.h"
				19	#include <cassert>
				20
				21	namespace clang {
				22
				23	class IdentifierInfo;
				24
				25	/// Token - This structure provides full information about a lexed token.
				26	/// It is not intended to be space efficient, it is intended to return as much
				27	/// information as possible about each returned token. This is expected to be
				28	/// compressed into a smaller form if memory footprint is important.
				29	///
				30	/// The parser can create a special "annotation token" representing a stream of
				31	/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
				32	/// can be represented by a single typename annotation token that carries
				33	/// information about the SourceRange of the tokens and the type object.
				34	class Token {
				35	/// The location of the token. This is actually a SourceLocation.
				36	unsigned Loc;
				37
				38	// Conceptually these next two fields could be in a union. However, this
				39	// causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
				40	// routine. Keeping as separate members with casts until a more beautiful fix
				41	// presents itself.
				42
				43	/// UintData - This holds either the length of the token text, when
				44	/// a normal token, or the end of the SourceRange when an annotation
				45	/// token.
				46	unsigned UintData;
				47
				48	/// PtrData - This is a union of four different pointer types, which depends
				49	/// on what type of token this is:
				50	/// Identifiers, keywords, etc:
				51	/// This is an IdentifierInfo*, which contains the uniqued identifier
				52	/// spelling.
				53	/// Literals: isLiteral() returns true.
				54	/// This is a pointer to the start of the token in a text buffer, which
				55	/// may be dirty (have trigraphs / escaped newlines).
				56	/// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
				57	/// This is a pointer to sema-specific data for the annotation token.
				58	/// Eof:
				59	// This is a pointer to a Decl.
				60	/// Other:
				61	/// This is null.
				62	void *PtrData;
				63
				64	/// Kind - The actual flavor of token this is.
				65	tok::TokenKind Kind;
				66
				67	/// Flags - Bits we track about this token, members of the TokenFlags enum.
				68	unsigned short Flags;
				69
				70	public:
				71	// Various flags set per token:
				72	enum TokenFlags {
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame^]	73	StartOfLine = 0x01, // At start of line or only after whitespace
				74	// (considering the line after macro expansion).
				75	LeadingSpace = 0x02, // Whitespace exists before this token (considering
				76	// whitespace after macro expansion).
				77	DisableExpand = 0x04, // This identifier may never be macro expanded.
				78	NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	79	LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame^]	80	HasUDSuffix = 0x20, // This string or character literal has a ud-suffix.
				81	HasUCN = 0x40, // This identifier contains a UCN.
				82	IgnoredComma = 0x80, // This comma is not a macro argument separator (MS).
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	83	StringifiedInMacro = 0x100, // This string or character literal is formed by
				84	// macro stringizing or charizing operator.
				85	CommaAfterElided = 0x200, // The comma following this token was elided (MS).
				86	IsEditorPlaceholder = 0x400, // This identifier is a placeholder.
Andrew Walbran	3d2c197	2020-04-07 12:24:26 +0100	[diff] [blame^]	87	IsReinjected = 0x800, // A phase 4 token that was produced before and
				88	// re-added, e.g. via EnterTokenStream. Annotation
				89	// tokens are not reinjected.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	90	};
				91
				92	tok::TokenKind getKind() const { return Kind; }
				93	void setKind(tok::TokenKind K) { Kind = K; }
				94
				95	/// is/isNot - Predicates to check if this token is a specific kind, as in
				96	/// "if (Tok.is(tok::l_brace)) {...}".
				97	bool is(tok::TokenKind K) const { return Kind == K; }
				98	bool isNot(tok::TokenKind K) const { return Kind != K; }
				99	bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
				100	return is(K1) \|\| is(K2);
				101	}
				102	template <typename... Ts>
				103	bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts... Ks) const {
				104	return is(K1) \|\| isOneOf(K2, Ks...);
				105	}
				106
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	107	/// Return true if this is a raw identifier (when lexing
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	108	/// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
				109	bool isAnyIdentifier() const {
				110	return tok::isAnyIdentifier(getKind());
				111	}
				112
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	113	/// Return true if this is a "literal", like a numeric
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	114	/// constant, string, etc.
				115	bool isLiteral() const {
				116	return tok::isLiteral(getKind());
				117	}
				118
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	119	/// Return true if this is any of tok::annot_* kind tokens.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	120	bool isAnnotation() const {
				121	return tok::isAnnotation(getKind());
				122	}
				123
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	124	/// Return a source location identifier for the specified
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	125	/// offset in the current file.
				126	SourceLocation getLocation() const {
				127	return SourceLocation::getFromRawEncoding(Loc);
				128	}
				129	unsigned getLength() const {
				130	assert(!isAnnotation() && "Annotation tokens have no length field");
				131	return UintData;
				132	}
				133
				134	void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }
				135	void setLength(unsigned Len) {
				136	assert(!isAnnotation() && "Annotation tokens have no length field");
				137	UintData = Len;
				138	}
				139
				140	SourceLocation getAnnotationEndLoc() const {
				141	assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
				142	return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc);
				143	}
				144	void setAnnotationEndLoc(SourceLocation L) {
				145	assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
				146	UintData = L.getRawEncoding();
				147	}
				148
				149	SourceLocation getLastLoc() const {
				150	return isAnnotation() ? getAnnotationEndLoc() : getLocation();
				151	}
				152
				153	SourceLocation getEndLoc() const {
				154	return isAnnotation() ? getAnnotationEndLoc()
				155	: getLocation().getLocWithOffset(getLength());
				156	}
				157
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	158	/// SourceRange of the group of tokens that this annotation token
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	159	/// represents.
				160	SourceRange getAnnotationRange() const {
				161	return SourceRange(getLocation(), getAnnotationEndLoc());
				162	}
				163	void setAnnotationRange(SourceRange R) {
				164	setLocation(R.getBegin());
				165	setAnnotationEndLoc(R.getEnd());
				166	}
				167
				168	const char *getName() const { return tok::getTokenName(Kind); }
				169
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	170	/// Reset all flags to cleared.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	171	void startToken() {
				172	Kind = tok::unknown;
				173	Flags = 0;
				174	PtrData = nullptr;
				175	UintData = 0;
				176	Loc = SourceLocation().getRawEncoding();
				177	}
				178
				179	IdentifierInfo *getIdentifierInfo() const {
				180	assert(isNot(tok::raw_identifier) &&
				181	"getIdentifierInfo() on a tok::raw_identifier token!");
				182	assert(!isAnnotation() &&
				183	"getIdentifierInfo() on an annotation token!");
				184	if (isLiteral()) return nullptr;
				185	if (is(tok::eof)) return nullptr;
				186	return (IdentifierInfo*) PtrData;
				187	}
				188	void setIdentifierInfo(IdentifierInfo *II) {
				189	PtrData = (void*) II;
				190	}
				191
				192	const void *getEofData() const {
				193	assert(is(tok::eof));
				194	return reinterpret_cast<const void *>(PtrData);
				195	}
				196	void setEofData(const void *D) {
				197	assert(is(tok::eof));
				198	assert(!PtrData);
				199	PtrData = const_cast<void *>(D);
				200	}
				201
				202	/// getRawIdentifier - For a raw identifier token (i.e., an identifier
				203	/// lexed in raw mode), returns a reference to the text substring in the
				204	/// buffer if known.
				205	StringRef getRawIdentifier() const {
				206	assert(is(tok::raw_identifier));
				207	return StringRef(reinterpret_cast<const char *>(PtrData), getLength());
				208	}
				209	void setRawIdentifierData(const char *Ptr) {
				210	assert(is(tok::raw_identifier));
				211	PtrData = const_cast<char*>(Ptr);
				212	}
				213
				214	/// getLiteralData - For a literal token (numeric constant, string, etc), this
				215	/// returns a pointer to the start of it in the text buffer if known, null
				216	/// otherwise.
				217	const char *getLiteralData() const {
				218	assert(isLiteral() && "Cannot get literal data of non-literal");
				219	return reinterpret_cast<const char*>(PtrData);
				220	}
				221	void setLiteralData(const char *Ptr) {
				222	assert(isLiteral() && "Cannot set literal data of non-literal");
				223	PtrData = const_cast<char*>(Ptr);
				224	}
				225
				226	void *getAnnotationValue() const {
				227	assert(isAnnotation() && "Used AnnotVal on non-annotation token");
				228	return PtrData;
				229	}
				230	void setAnnotationValue(void *val) {
				231	assert(isAnnotation() && "Used AnnotVal on non-annotation token");
				232	PtrData = val;
				233	}
				234
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	235	/// Set the specified flag.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	236	void setFlag(TokenFlags Flag) {
				237	Flags \|= Flag;
				238	}
				239
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	240	/// Get the specified flag.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	241	bool getFlag(TokenFlags Flag) const {
				242	return (Flags & Flag) != 0;
				243	}
				244
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	245	/// Unset the specified flag.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	246	void clearFlag(TokenFlags Flag) {
				247	Flags &= ~Flag;
				248	}
				249
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	250	/// Return the internal represtation of the flags.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	251	///
				252	/// This is only intended for low-level operations such as writing tokens to
				253	/// disk.
				254	unsigned getFlags() const {
				255	return Flags;
				256	}
				257
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	258	/// Set a flag to either true or false.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	259	void setFlagValue(TokenFlags Flag, bool Val) {
				260	if (Val)
				261	setFlag(Flag);
				262	else
				263	clearFlag(Flag);
				264	}
				265
				266	/// isAtStartOfLine - Return true if this token is at the start of a line.
				267	///
				268	bool isAtStartOfLine() const { return getFlag(StartOfLine); }
				269
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	270	/// Return true if this token has whitespace before it.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	271	///
				272	bool hasLeadingSpace() const { return getFlag(LeadingSpace); }
				273
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	274	/// Return true if this identifier token should never
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	275	/// be expanded in the future, due to C99 6.10.3.4p2.
				276	bool isExpandDisabled() const { return getFlag(DisableExpand); }
				277
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	278	/// Return true if we have an ObjC keyword identifier.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	279	bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
				280
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	281	/// Return the ObjC keyword kind.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	282	tok::ObjCKeywordKind getObjCKeywordID() const;
				283
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	284	/// Return true if this token has trigraphs or escaped newlines in it.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	285	bool needsCleaning() const { return getFlag(NeedsCleaning); }
				286
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	287	/// Return true if this token has an empty macro before it.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	288	///
				289	bool hasLeadingEmptyMacro() const { return getFlag(LeadingEmptyMacro); }
				290
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	291	/// Return true if this token is a string or character literal which
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	292	/// has a ud-suffix.
				293	bool hasUDSuffix() const { return getFlag(HasUDSuffix); }
				294
				295	/// Returns true if this token contains a universal character name.
				296	bool hasUCN() const { return getFlag(HasUCN); }
				297
				298	/// Returns true if this token is formed by macro by stringizing or charizing
				299	/// operator.
				300	bool stringifiedInMacro() const { return getFlag(StringifiedInMacro); }
				301
				302	/// Returns true if the comma after this token was elided.
				303	bool commaAfterElided() const { return getFlag(CommaAfterElided); }
				304
				305	/// Returns true if this token is an editor placeholder.
				306	///
				307	/// Editor placeholders are produced by the code-completion engine and are
				308	/// represented as characters between '<#' and '#>' in the source code. The
				309	/// lexer uses identifier tokens to represent placeholders.
				310	bool isEditorPlaceholder() const { return getFlag(IsEditorPlaceholder); }
				311	};
				312
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	313	/// Information about the conditional stack (\#if directives)
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	314	/// currently active.
				315	struct PPConditionalInfo {
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	316	/// Location where the conditional started.
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	317	SourceLocation IfLoc;
				318
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	319	/// True if this was contained in a skipping directive, e.g.,
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	320	/// in a "\#if 0" block.
				321	bool WasSkipping;
				322
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	323	/// True if we have emitted tokens already, and now we're in
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	324	/// an \#else block or something. Only useful in Skipping blocks.
				325	bool FoundNonSkip;
				326
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	327	/// True if we've seen a \#else in this block. If so,
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	328	/// \#elif/\#else directives are not allowed.
				329	bool FoundElse;
				330	};
				331
				332	} // end namespace clang
				333
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	334	#endif // LLVM_CLANG_LEX_TOKEN_H