Blame - linux-x64/clang/include/llvm/Support/Automaton.h - hafnium/prebuilts

blob: c2b921311a8ceb470f82fbbd9645438c5b99e769 [file] [log] [blame]

Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	1	//===-- Automaton.h - Support for driving TableGen-produced DFAs ----------===//
				2	//
				3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	// This file implements class that drive and introspect deterministic finite-
				10	// state automata (DFAs) as generated by TableGen's -gen-automata backend.
				11	//
				12	// For a description of how to define an automaton, see
				13	// include/llvm/TableGen/Automaton.td.
				14	//
				15	// One important detail is that these deterministic automata are created from
				16	// (potentially) nondeterministic definitions. Therefore a unique sequence of
				17	// input symbols will produce one path through the DFA but multiple paths
				18	// through the original NFA. An automaton by default only returns "accepted" or
				19	// "not accepted", but frequently we want to analyze what NFA path was taken.
				20	// Finding a path through the NFA states that results in a DFA state can help
				21	// answer what the solution to a problem was, not just that there exists a
				22	// solution.
				23	//
				24	//===----------------------------------------------------------------------===//
				25
				26	#ifndef LLVM_SUPPORT_AUTOMATON_H
				27	#define LLVM_SUPPORT_AUTOMATON_H
				28
				29	#include "llvm/ADT/ArrayRef.h"
				30	#include "llvm/ADT/DenseMap.h"
				31	#include "llvm/ADT/SmallVector.h"
				32	#include "llvm/Support/Allocator.h"
				33	#include <deque>
				34	#include <map>
				35	#include <memory>
				36	#include <unordered_map>
				37	#include <vector>
				38
				39	namespace llvm {
				40
				41	using NfaPath = SmallVector<uint64_t, 4>;
				42
				43	/// Forward define the pair type used by the automata transition info tables.
				44	///
				45	/// Experimental results with large tables have shown a significant (multiple
				46	/// orders of magnitude) parsing speedup by using a custom struct here with a
				47	/// trivial constructor rather than std::pair<uint64_t, uint64_t>.
				48	struct NfaStatePair {
				49	uint64_t FromDfaState, ToDfaState;
				50
				51	bool operator<(const NfaStatePair &Other) const {
				52	return std::make_tuple(FromDfaState, ToDfaState) <
				53	std::make_tuple(Other.FromDfaState, Other.ToDfaState);
				54	}
				55	};
				56
				57	namespace internal {
				58	/// The internal class that maintains all possible paths through an NFA based
				59	/// on a path through the DFA.
				60	class NfaTranscriber {
				61	private:
				62	/// Cached transition table. This is a table of NfaStatePairs that contains
				63	/// zero-terminated sequences pointed to by DFA transitions.
				64	ArrayRef<NfaStatePair> TransitionInfo;
				65
				66	/// A simple linked-list of traversed states that can have a shared tail. The
				67	/// traversed path is stored in reverse order with the latest state as the
				68	/// head.
				69	struct PathSegment {
				70	uint64_t State;
				71	PathSegment *Tail;
				72	};
				73
				74	/// We allocate segment objects frequently. Allocate them upfront and dispose
				75	/// at the end of a traversal rather than hammering the system allocator.
				76	SpecificBumpPtrAllocator<PathSegment> Allocator;
				77
				78	/// Heads of each tracked path. These are not ordered.
				79	std::deque<PathSegment *> Heads;
				80
				81	/// The returned paths. This is populated during getPaths.
				82	SmallVector<NfaPath, 4> Paths;
				83
				84	/// Create a new segment and return it.
				85	PathSegment makePathSegment(uint64_t State, PathSegment Tail) {
				86	PathSegment *P = Allocator.Allocate();
				87	*P = {State, Tail};
				88	return P;
				89	}
				90
				91	/// Pairs defines a sequence of possible NFA transitions for a single DFA
				92	/// transition.
				93	void transition(ArrayRef<NfaStatePair> Pairs) {
				94	// Iterate over all existing heads. We will mutate the Heads deque during
				95	// iteration.
				96	unsigned NumHeads = Heads.size();
				97	for (unsigned I = 0; I < NumHeads; ++I) {
				98	PathSegment *Head = Heads[I];
				99	// The sequence of pairs is sorted. Select the set of pairs that
				100	// transition from the current head state.
				101	auto PI = lower_bound(Pairs, NfaStatePair{Head->State, 0ULL});
				102	auto PE = upper_bound(Pairs, NfaStatePair{Head->State, INT64_MAX});
				103	// For every transition from the current head state, add a new path
				104	// segment.
				105	for (; PI != PE; ++PI)
				106	if (PI->FromDfaState == Head->State)
				107	Heads.push_back(makePathSegment(PI->ToDfaState, Head));
				108	}
				109	// Now we've iterated over all the initial heads and added new ones,
				110	// dispose of the original heads.
				111	Heads.erase(Heads.begin(), std::next(Heads.begin(), NumHeads));
				112	}
				113
				114	public:
				115	NfaTranscriber(ArrayRef<NfaStatePair> TransitionInfo)
				116	: TransitionInfo(TransitionInfo) {
				117	reset();
				118	}
				119
				120	ArrayRef<NfaStatePair> getTransitionInfo() const {
				121	return TransitionInfo;
				122	}
				123
				124	void reset() {
				125	Paths.clear();
				126	Heads.clear();
				127	Allocator.DestroyAll();
				128	// The initial NFA state is 0.
				129	Heads.push_back(makePathSegment(0ULL, nullptr));
				130	}
				131
				132	void transition(unsigned TransitionInfoIdx) {
				133	unsigned EndIdx = TransitionInfoIdx;
				134	while (TransitionInfo[EndIdx].ToDfaState != 0)
				135	++EndIdx;
				136	ArrayRef<NfaStatePair> Pairs(&TransitionInfo[TransitionInfoIdx],
				137	EndIdx - TransitionInfoIdx);
				138	transition(Pairs);
				139	}
				140
				141	ArrayRef<NfaPath> getPaths() {
				142	Paths.clear();
				143	for (auto *Head : Heads) {
				144	NfaPath P;
				145	while (Head->State != 0) {
				146	P.push_back(Head->State);
				147	Head = Head->Tail;
				148	}
				149	std::reverse(P.begin(), P.end());
				150	Paths.push_back(std::move(P));
				151	}
				152	return Paths;
				153	}
				154	};
				155	} // namespace internal
				156
				157	/// A deterministic finite-state automaton. The automaton is defined in
				158	/// TableGen; this object drives an automaton defined by tblgen-emitted tables.
				159	///
				160	/// An automaton accepts a sequence of input tokens ("actions"). This class is
				161	/// templated on the type of these actions.
				162	template <typename ActionT> class Automaton {
				163	/// Map from {State, Action} to {NewState, TransitionInfoIdx}.
				164	/// TransitionInfoIdx is used by the DfaTranscriber to analyze the transition.
				165	/// FIXME: This uses a std::map because ActionT can be a pair type including
				166	/// an enum. In particular DenseMapInfo<ActionT> must be defined to use
				167	/// DenseMap here.
				168	/// This is a shared_ptr to allow very quick copy-construction of Automata; this
				169	/// state is immutable after construction so this is safe.
				170	using MapTy = std::map<std::pair<uint64_t, ActionT>, std::pair<uint64_t, unsigned>>;
				171	std::shared_ptr<MapTy> M;
				172	/// An optional transcription object. This uses much more state than simply
				173	/// traversing the DFA for acceptance, so is heap allocated.
				174	std::shared_ptr<internal::NfaTranscriber> Transcriber;
				175	/// The initial DFA state is 1.
				176	uint64_t State = 1;
				177	/// True if we should transcribe and false if not (even if Transcriber is defined).
				178	bool Transcribe;
				179
				180	public:
				181	/// Create an automaton.
				182	/// \param Transitions The Transitions table as created by TableGen. Note that
				183	/// because the action type differs per automaton, the
				184	/// table type is templated as ArrayRef<InfoT>.
				185	/// \param TranscriptionTable The TransitionInfo table as created by TableGen.
				186	///
				187	/// Providing the TranscriptionTable argument as non-empty will enable the
				188	/// use of transcription, which analyzes the possible paths in the original
				189	/// NFA taken by the DFA. NOTE: This is substantially more work than simply
				190	/// driving the DFA, so unless you require the getPaths() method leave this
				191	/// empty.
				192	template <typename InfoT>
				193	Automaton(ArrayRef<InfoT> Transitions,
				194	ArrayRef<NfaStatePair> TranscriptionTable = {}) {
				195	if (!TranscriptionTable.empty())
				196	Transcriber =
				197	std::make_shared<internal::NfaTranscriber>(TranscriptionTable);
				198	Transcribe = Transcriber != nullptr;
				199	M = std::make_shared<MapTy>();
				200	for (const auto &I : Transitions)
				201	// Greedily read and cache the transition table.
				202	M->emplace(std::make_pair(I.FromDfaState, I.Action),
				203	std::make_pair(I.ToDfaState, I.InfoIdx));
				204	}
				205	Automaton(const Automaton &Other)
				206	: M(Other.M), State(Other.State), Transcribe(Other.Transcribe) {
				207	// Transcriber is not thread-safe, so create a new instance on copy.
				208	if (Other.Transcriber)
				209	Transcriber = std::make_shared<internal::NfaTranscriber>(
				210	Other.Transcriber->getTransitionInfo());
				211	}
				212
				213	/// Reset the automaton to its initial state.
				214	void reset() {
				215	State = 1;
				216	if (Transcriber)
				217	Transcriber->reset();
				218	}
				219
				220	/// Enable or disable transcription. Transcription is only available if
				221	/// TranscriptionTable was provided to the constructor.
				222	void enableTranscription(bool Enable = true) {
				223	assert(Transcriber &&
				224	"Transcription is only available if TranscriptionTable was provided "
				225	"to the Automaton constructor");
				226	Transcribe = Enable;
				227	}
				228
				229	/// Transition the automaton based on input symbol A. Return true if the
				230	/// automaton transitioned to a valid state, false if the automaton
				231	/// transitioned to an invalid state.
				232	///
				233	/// If this function returns false, all methods are undefined until reset() is
				234	/// called.
				235	bool add(const ActionT &A) {
				236	auto I = M->find({State, A});
				237	if (I == M->end())
				238	return false;
				239	if (Transcriber && Transcribe)
				240	Transcriber->transition(I->second.second);
				241	State = I->second.first;
				242	return true;
				243	}
				244
				245	/// Return true if the automaton can be transitioned based on input symbol A.
				246	bool canAdd(const ActionT &A) {
				247	auto I = M->find({State, A});
				248	return I != M->end();
				249	}
				250
				251	/// Obtain a set of possible paths through the input nondeterministic
				252	/// automaton that could be obtained from the sequence of input actions
				253	/// presented to this deterministic automaton.
				254	ArrayRef<NfaPath> getNfaPaths() {
				255	assert(Transcriber && Transcribe &&
				256	"Can only obtain NFA paths if transcribing!");
				257	return Transcriber->getPaths();
				258	}
				259	};
				260
				261	} // namespace llvm
				262
				263	#endif // LLVM_SUPPORT_AUTOMATON_H