Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1 | //===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===// |
| 2 | // |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This is a YAML 1.2 parser. |
| 10 | // |
| 11 | // See http://www.yaml.org/spec/1.2/spec.html for the full standard. |
| 12 | // |
| 13 | // This currently does not implement the following: |
| 14 | // * Multi-line literal folding. |
| 15 | // * Tag resolution. |
| 16 | // * UTF-16. |
| 17 | // * BOMs anywhere other than the first Unicode scalar value in the file. |
| 18 | // |
| 19 | // The most important class here is Stream. This represents a YAML stream with |
| 20 | // 0, 1, or many documents. |
| 21 | // |
| 22 | // SourceMgr sm; |
| 23 | // StringRef input = getInput(); |
| 24 | // yaml::Stream stream(input, sm); |
| 25 | // |
| 26 | // for (yaml::document_iterator di = stream.begin(), de = stream.end(); |
| 27 | // di != de; ++di) { |
| 28 | // yaml::Node *n = di->getRoot(); |
| 29 | // if (n) { |
| 30 | // // Do something with n... |
| 31 | // } else |
| 32 | // break; |
| 33 | // } |
| 34 | // |
| 35 | //===----------------------------------------------------------------------===// |
| 36 | |
| 37 | #ifndef LLVM_SUPPORT_YAMLPARSER_H |
| 38 | #define LLVM_SUPPORT_YAMLPARSER_H |
| 39 | |
| 40 | #include "llvm/ADT/StringRef.h" |
| 41 | #include "llvm/Support/Allocator.h" |
| 42 | #include "llvm/Support/SMLoc.h" |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 43 | #include "llvm/Support/SourceMgr.h" |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 44 | #include <cassert> |
| 45 | #include <cstddef> |
| 46 | #include <iterator> |
| 47 | #include <map> |
| 48 | #include <memory> |
| 49 | #include <string> |
| 50 | #include <system_error> |
| 51 | |
| 52 | namespace llvm { |
| 53 | |
| 54 | class MemoryBufferRef; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 55 | class raw_ostream; |
| 56 | class Twine; |
| 57 | |
| 58 | namespace yaml { |
| 59 | |
| 60 | class Document; |
| 61 | class document_iterator; |
| 62 | class Node; |
| 63 | class Scanner; |
| 64 | struct Token; |
| 65 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 66 | /// Dump all the tokens in this stream to OS. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 67 | /// \returns true if there was an error, false otherwise. |
| 68 | bool dumpTokens(StringRef Input, raw_ostream &); |
| 69 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 70 | /// Scans all tokens in input without outputting anything. This is used |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 71 | /// for benchmarking the tokenizer. |
| 72 | /// \returns true if there was an error, false otherwise. |
| 73 | bool scanTokens(StringRef Input); |
| 74 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 75 | /// Escape \a Input for a double quoted scalar; if \p EscapePrintable |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 76 | /// is true, all UTF8 sequences will be escaped, if \p EscapePrintable is |
| 77 | /// false, those UTF8 sequences encoding printable unicode scalars will not be |
| 78 | /// escaped, but emitted verbatim. |
| 79 | std::string escape(StringRef Input, bool EscapePrintable = true); |
| 80 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 81 | /// Parse \p S as a bool according to https://yaml.org/type/bool.html. |
| 82 | llvm::Optional<bool> parseBool(StringRef S); |
| 83 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 84 | /// This class represents a YAML stream potentially containing multiple |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 85 | /// documents. |
| 86 | class Stream { |
| 87 | public: |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 88 | /// This keeps a reference to the string referenced by \p Input. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 89 | Stream(StringRef Input, SourceMgr &, bool ShowColors = true, |
| 90 | std::error_code *EC = nullptr); |
| 91 | |
| 92 | Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true, |
| 93 | std::error_code *EC = nullptr); |
| 94 | ~Stream(); |
| 95 | |
| 96 | document_iterator begin(); |
| 97 | document_iterator end(); |
| 98 | void skip(); |
| 99 | bool failed(); |
| 100 | |
| 101 | bool validate() { |
| 102 | skip(); |
| 103 | return !failed(); |
| 104 | } |
| 105 | |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 106 | void printError(Node *N, const Twine &Msg, |
| 107 | SourceMgr::DiagKind Kind = SourceMgr::DK_Error); |
| 108 | void printError(const SMRange &Range, const Twine &Msg, |
| 109 | SourceMgr::DiagKind Kind = SourceMgr::DK_Error); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 110 | |
| 111 | private: |
| 112 | friend class Document; |
| 113 | |
| 114 | std::unique_ptr<Scanner> scanner; |
| 115 | std::unique_ptr<Document> CurrentDoc; |
| 116 | }; |
| 117 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 118 | /// Abstract base class for all Nodes. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 119 | class Node { |
| 120 | virtual void anchor(); |
| 121 | |
| 122 | public: |
| 123 | enum NodeKind { |
| 124 | NK_Null, |
| 125 | NK_Scalar, |
| 126 | NK_BlockScalar, |
| 127 | NK_KeyValue, |
| 128 | NK_Mapping, |
| 129 | NK_Sequence, |
| 130 | NK_Alias |
| 131 | }; |
| 132 | |
| 133 | Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor, |
| 134 | StringRef Tag); |
| 135 | |
| 136 | // It's not safe to copy YAML nodes; the document is streamed and the position |
| 137 | // is part of the state. |
| 138 | Node(const Node &) = delete; |
| 139 | void operator=(const Node &) = delete; |
| 140 | |
| 141 | void *operator new(size_t Size, BumpPtrAllocator &Alloc, |
| 142 | size_t Alignment = 16) noexcept { |
| 143 | return Alloc.Allocate(Size, Alignment); |
| 144 | } |
| 145 | |
| 146 | void operator delete(void *Ptr, BumpPtrAllocator &Alloc, |
| 147 | size_t Size) noexcept { |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 148 | Alloc.Deallocate(Ptr, Size, 0); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 149 | } |
| 150 | |
| 151 | void operator delete(void *) noexcept = delete; |
| 152 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 153 | /// Get the value of the anchor attached to this node. If it does not |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 154 | /// have one, getAnchor().size() will be 0. |
| 155 | StringRef getAnchor() const { return Anchor; } |
| 156 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 157 | /// Get the tag as it was written in the document. This does not |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 158 | /// perform tag resolution. |
| 159 | StringRef getRawTag() const { return Tag; } |
| 160 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 161 | /// Get the verbatium tag for a given Node. This performs tag resoluton |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 162 | /// and substitution. |
| 163 | std::string getVerbatimTag() const; |
| 164 | |
| 165 | SMRange getSourceRange() const { return SourceRange; } |
| 166 | void setSourceRange(SMRange SR) { SourceRange = SR; } |
| 167 | |
| 168 | // These functions forward to Document and Scanner. |
| 169 | Token &peekNext(); |
| 170 | Token getNext(); |
| 171 | Node *parseBlockNode(); |
| 172 | BumpPtrAllocator &getAllocator(); |
| 173 | void setError(const Twine &Message, Token &Location) const; |
| 174 | bool failed() const; |
| 175 | |
| 176 | virtual void skip() {} |
| 177 | |
| 178 | unsigned int getType() const { return TypeID; } |
| 179 | |
| 180 | protected: |
| 181 | std::unique_ptr<Document> &Doc; |
| 182 | SMRange SourceRange; |
| 183 | |
| 184 | ~Node() = default; |
| 185 | |
| 186 | private: |
| 187 | unsigned int TypeID; |
| 188 | StringRef Anchor; |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 189 | /// The tag as typed in the document. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 190 | StringRef Tag; |
| 191 | }; |
| 192 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 193 | /// A null value. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 194 | /// |
| 195 | /// Example: |
| 196 | /// !!null null |
| 197 | class NullNode final : public Node { |
| 198 | void anchor() override; |
| 199 | |
| 200 | public: |
| 201 | NullNode(std::unique_ptr<Document> &D) |
| 202 | : Node(NK_Null, D, StringRef(), StringRef()) {} |
| 203 | |
| 204 | static bool classof(const Node *N) { return N->getType() == NK_Null; } |
| 205 | }; |
| 206 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 207 | /// A scalar node is an opaque datum that can be presented as a |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 208 | /// series of zero or more Unicode scalar values. |
| 209 | /// |
| 210 | /// Example: |
| 211 | /// Adena |
| 212 | class ScalarNode final : public Node { |
| 213 | void anchor() override; |
| 214 | |
| 215 | public: |
| 216 | ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, |
| 217 | StringRef Val) |
| 218 | : Node(NK_Scalar, D, Anchor, Tag), Value(Val) { |
| 219 | SMLoc Start = SMLoc::getFromPointer(Val.begin()); |
| 220 | SMLoc End = SMLoc::getFromPointer(Val.end()); |
| 221 | SourceRange = SMRange(Start, End); |
| 222 | } |
| 223 | |
| 224 | // Return Value without any escaping or folding or other fun YAML stuff. This |
| 225 | // is the exact bytes that are contained in the file (after conversion to |
| 226 | // utf8). |
| 227 | StringRef getRawValue() const { return Value; } |
| 228 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 229 | /// Gets the value of this node as a StringRef. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 230 | /// |
Olivier Deprez | f4ef2d0 | 2021-04-20 13:36:24 +0200 | [diff] [blame] | 231 | /// \param Storage is used to store the content of the returned StringRef if |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 232 | /// it requires any modification from how it appeared in the source. |
| 233 | /// This happens with escaped characters and multi-line literals. |
| 234 | StringRef getValue(SmallVectorImpl<char> &Storage) const; |
| 235 | |
| 236 | static bool classof(const Node *N) { |
| 237 | return N->getType() == NK_Scalar; |
| 238 | } |
| 239 | |
| 240 | private: |
| 241 | StringRef Value; |
| 242 | |
| 243 | StringRef unescapeDoubleQuoted(StringRef UnquotedValue, |
| 244 | StringRef::size_type Start, |
| 245 | SmallVectorImpl<char> &Storage) const; |
| 246 | }; |
| 247 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 248 | /// A block scalar node is an opaque datum that can be presented as a |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 249 | /// series of zero or more Unicode scalar values. |
| 250 | /// |
| 251 | /// Example: |
| 252 | /// | |
| 253 | /// Hello |
| 254 | /// World |
| 255 | class BlockScalarNode final : public Node { |
| 256 | void anchor() override; |
| 257 | |
| 258 | public: |
| 259 | BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, |
| 260 | StringRef Value, StringRef RawVal) |
| 261 | : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) { |
| 262 | SMLoc Start = SMLoc::getFromPointer(RawVal.begin()); |
| 263 | SMLoc End = SMLoc::getFromPointer(RawVal.end()); |
| 264 | SourceRange = SMRange(Start, End); |
| 265 | } |
| 266 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 267 | /// Gets the value of this node as a StringRef. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 268 | StringRef getValue() const { return Value; } |
| 269 | |
| 270 | static bool classof(const Node *N) { |
| 271 | return N->getType() == NK_BlockScalar; |
| 272 | } |
| 273 | |
| 274 | private: |
| 275 | StringRef Value; |
| 276 | }; |
| 277 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 278 | /// A key and value pair. While not technically a Node under the YAML |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 279 | /// representation graph, it is easier to treat them this way. |
| 280 | /// |
| 281 | /// TODO: Consider making this not a child of Node. |
| 282 | /// |
| 283 | /// Example: |
| 284 | /// Section: .text |
| 285 | class KeyValueNode final : public Node { |
| 286 | void anchor() override; |
| 287 | |
| 288 | public: |
| 289 | KeyValueNode(std::unique_ptr<Document> &D) |
| 290 | : Node(NK_KeyValue, D, StringRef(), StringRef()) {} |
| 291 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 292 | /// Parse and return the key. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 293 | /// |
| 294 | /// This may be called multiple times. |
| 295 | /// |
| 296 | /// \returns The key, or nullptr if failed() == true. |
| 297 | Node *getKey(); |
| 298 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 299 | /// Parse and return the value. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 300 | /// |
| 301 | /// This may be called multiple times. |
| 302 | /// |
| 303 | /// \returns The value, or nullptr if failed() == true. |
| 304 | Node *getValue(); |
| 305 | |
| 306 | void skip() override { |
| 307 | if (Node *Key = getKey()) { |
| 308 | Key->skip(); |
| 309 | if (Node *Val = getValue()) |
| 310 | Val->skip(); |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | static bool classof(const Node *N) { |
| 315 | return N->getType() == NK_KeyValue; |
| 316 | } |
| 317 | |
| 318 | private: |
| 319 | Node *Key = nullptr; |
| 320 | Node *Value = nullptr; |
| 321 | }; |
| 322 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 323 | /// This is an iterator abstraction over YAML collections shared by both |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 324 | /// sequences and maps. |
| 325 | /// |
| 326 | /// BaseT must have a ValueT* member named CurrentEntry and a member function |
| 327 | /// increment() which must set CurrentEntry to 0 to create an end iterator. |
| 328 | template <class BaseT, class ValueT> |
| 329 | class basic_collection_iterator |
| 330 | : public std::iterator<std::input_iterator_tag, ValueT> { |
| 331 | public: |
| 332 | basic_collection_iterator() = default; |
| 333 | basic_collection_iterator(BaseT *B) : Base(B) {} |
| 334 | |
| 335 | ValueT *operator->() const { |
| 336 | assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); |
| 337 | return Base->CurrentEntry; |
| 338 | } |
| 339 | |
| 340 | ValueT &operator*() const { |
| 341 | assert(Base && Base->CurrentEntry && |
| 342 | "Attempted to dereference end iterator!"); |
| 343 | return *Base->CurrentEntry; |
| 344 | } |
| 345 | |
| 346 | operator ValueT *() const { |
| 347 | assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); |
| 348 | return Base->CurrentEntry; |
| 349 | } |
| 350 | |
| 351 | /// Note on EqualityComparable: |
| 352 | /// |
| 353 | /// The iterator is not re-entrant, |
| 354 | /// it is meant to be used for parsing YAML on-demand |
| 355 | /// Once iteration started - it can point only to one entry at a time |
| 356 | /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal |
| 357 | /// iff Base and Other.Base are equal. |
| 358 | bool operator==(const basic_collection_iterator &Other) const { |
| 359 | if (Base && (Base == Other.Base)) { |
| 360 | assert((Base->CurrentEntry == Other.Base->CurrentEntry) |
| 361 | && "Equal Bases expected to point to equal Entries"); |
| 362 | } |
| 363 | |
| 364 | return Base == Other.Base; |
| 365 | } |
| 366 | |
| 367 | bool operator!=(const basic_collection_iterator &Other) const { |
| 368 | return !(Base == Other.Base); |
| 369 | } |
| 370 | |
| 371 | basic_collection_iterator &operator++() { |
| 372 | assert(Base && "Attempted to advance iterator past end!"); |
| 373 | Base->increment(); |
| 374 | // Create an end iterator. |
| 375 | if (!Base->CurrentEntry) |
| 376 | Base = nullptr; |
| 377 | return *this; |
| 378 | } |
| 379 | |
| 380 | private: |
| 381 | BaseT *Base = nullptr; |
| 382 | }; |
| 383 | |
| 384 | // The following two templates are used for both MappingNode and Sequence Node. |
| 385 | template <class CollectionType> |
| 386 | typename CollectionType::iterator begin(CollectionType &C) { |
| 387 | assert(C.IsAtBeginning && "You may only iterate over a collection once!"); |
| 388 | C.IsAtBeginning = false; |
| 389 | typename CollectionType::iterator ret(&C); |
| 390 | ++ret; |
| 391 | return ret; |
| 392 | } |
| 393 | |
| 394 | template <class CollectionType> void skip(CollectionType &C) { |
| 395 | // TODO: support skipping from the middle of a parsed collection ;/ |
| 396 | assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); |
| 397 | if (C.IsAtBeginning) |
| 398 | for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e; |
| 399 | ++i) |
| 400 | i->skip(); |
| 401 | } |
| 402 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 403 | /// Represents a YAML map created from either a block map for a flow map. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 404 | /// |
| 405 | /// This parses the YAML stream as increment() is called. |
| 406 | /// |
| 407 | /// Example: |
| 408 | /// Name: _main |
| 409 | /// Scope: Global |
| 410 | class MappingNode final : public Node { |
| 411 | void anchor() override; |
| 412 | |
| 413 | public: |
| 414 | enum MappingType { |
| 415 | MT_Block, |
| 416 | MT_Flow, |
| 417 | MT_Inline ///< An inline mapping node is used for "[key: value]". |
| 418 | }; |
| 419 | |
| 420 | MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, |
| 421 | MappingType MT) |
| 422 | : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {} |
| 423 | |
| 424 | friend class basic_collection_iterator<MappingNode, KeyValueNode>; |
| 425 | |
| 426 | using iterator = basic_collection_iterator<MappingNode, KeyValueNode>; |
| 427 | |
| 428 | template <class T> friend typename T::iterator yaml::begin(T &); |
| 429 | template <class T> friend void yaml::skip(T &); |
| 430 | |
| 431 | iterator begin() { return yaml::begin(*this); } |
| 432 | |
| 433 | iterator end() { return iterator(); } |
| 434 | |
| 435 | void skip() override { yaml::skip(*this); } |
| 436 | |
| 437 | static bool classof(const Node *N) { |
| 438 | return N->getType() == NK_Mapping; |
| 439 | } |
| 440 | |
| 441 | private: |
| 442 | MappingType Type; |
| 443 | bool IsAtBeginning = true; |
| 444 | bool IsAtEnd = false; |
| 445 | KeyValueNode *CurrentEntry = nullptr; |
| 446 | |
| 447 | void increment(); |
| 448 | }; |
| 449 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 450 | /// Represents a YAML sequence created from either a block sequence for a |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 451 | /// flow sequence. |
| 452 | /// |
| 453 | /// This parses the YAML stream as increment() is called. |
| 454 | /// |
| 455 | /// Example: |
| 456 | /// - Hello |
| 457 | /// - World |
| 458 | class SequenceNode final : public Node { |
| 459 | void anchor() override; |
| 460 | |
| 461 | public: |
| 462 | enum SequenceType { |
| 463 | ST_Block, |
| 464 | ST_Flow, |
| 465 | // Use for: |
| 466 | // |
| 467 | // key: |
| 468 | // - val1 |
| 469 | // - val2 |
| 470 | // |
| 471 | // As a BlockMappingEntry and BlockEnd are not created in this case. |
| 472 | ST_Indentless |
| 473 | }; |
| 474 | |
| 475 | SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, |
| 476 | SequenceType ST) |
| 477 | : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {} |
| 478 | |
| 479 | friend class basic_collection_iterator<SequenceNode, Node>; |
| 480 | |
| 481 | using iterator = basic_collection_iterator<SequenceNode, Node>; |
| 482 | |
| 483 | template <class T> friend typename T::iterator yaml::begin(T &); |
| 484 | template <class T> friend void yaml::skip(T &); |
| 485 | |
| 486 | void increment(); |
| 487 | |
| 488 | iterator begin() { return yaml::begin(*this); } |
| 489 | |
| 490 | iterator end() { return iterator(); } |
| 491 | |
| 492 | void skip() override { yaml::skip(*this); } |
| 493 | |
| 494 | static bool classof(const Node *N) { |
| 495 | return N->getType() == NK_Sequence; |
| 496 | } |
| 497 | |
| 498 | private: |
| 499 | SequenceType SeqType; |
| 500 | bool IsAtBeginning = true; |
| 501 | bool IsAtEnd = false; |
| 502 | bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','. |
| 503 | Node *CurrentEntry = nullptr; |
| 504 | }; |
| 505 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 506 | /// Represents an alias to a Node with an anchor. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 507 | /// |
| 508 | /// Example: |
| 509 | /// *AnchorName |
| 510 | class AliasNode final : public Node { |
| 511 | void anchor() override; |
| 512 | |
| 513 | public: |
| 514 | AliasNode(std::unique_ptr<Document> &D, StringRef Val) |
| 515 | : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {} |
| 516 | |
| 517 | StringRef getName() const { return Name; } |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 518 | |
| 519 | static bool classof(const Node *N) { return N->getType() == NK_Alias; } |
| 520 | |
| 521 | private: |
| 522 | StringRef Name; |
| 523 | }; |
| 524 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 525 | /// A YAML Stream is a sequence of Documents. A document contains a root |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 526 | /// node. |
| 527 | class Document { |
| 528 | public: |
| 529 | Document(Stream &ParentStream); |
| 530 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 531 | /// Root for parsing a node. Returns a single node. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 532 | Node *parseBlockNode(); |
| 533 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 534 | /// Finish parsing the current document and return true if there are |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 535 | /// more. Return false otherwise. |
| 536 | bool skip(); |
| 537 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 538 | /// Parse and return the root level node. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 539 | Node *getRoot() { |
| 540 | if (Root) |
| 541 | return Root; |
| 542 | return Root = parseBlockNode(); |
| 543 | } |
| 544 | |
| 545 | const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; } |
| 546 | |
| 547 | private: |
| 548 | friend class Node; |
| 549 | friend class document_iterator; |
| 550 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 551 | /// Stream to read tokens from. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 552 | Stream &stream; |
| 553 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 554 | /// Used to allocate nodes to. All are destroyed without calling their |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 555 | /// destructor when the document is destroyed. |
| 556 | BumpPtrAllocator NodeAllocator; |
| 557 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 558 | /// The root node. Used to support skipping a partially parsed |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 559 | /// document. |
| 560 | Node *Root; |
| 561 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 562 | /// Maps tag prefixes to their expansion. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 563 | std::map<StringRef, StringRef> TagMap; |
| 564 | |
| 565 | Token &peekNext(); |
| 566 | Token getNext(); |
| 567 | void setError(const Twine &Message, Token &Location) const; |
| 568 | bool failed() const; |
| 569 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 570 | /// Parse %BLAH directives and return true if any were encountered. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 571 | bool parseDirectives(); |
| 572 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 573 | /// Parse %YAML |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 574 | void parseYAMLDirective(); |
| 575 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 576 | /// Parse %TAG |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 577 | void parseTAGDirective(); |
| 578 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 579 | /// Consume the next token and error if it is not \a TK. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 580 | bool expectToken(int TK); |
| 581 | }; |
| 582 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 583 | /// Iterator abstraction for Documents over a Stream. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 584 | class document_iterator { |
| 585 | public: |
| 586 | document_iterator() = default; |
| 587 | document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {} |
| 588 | |
| 589 | bool operator==(const document_iterator &Other) const { |
| 590 | if (isAtEnd() || Other.isAtEnd()) |
| 591 | return isAtEnd() && Other.isAtEnd(); |
| 592 | |
| 593 | return Doc == Other.Doc; |
| 594 | } |
| 595 | bool operator!=(const document_iterator &Other) const { |
| 596 | return !(*this == Other); |
| 597 | } |
| 598 | |
| 599 | document_iterator operator++() { |
| 600 | assert(Doc && "incrementing iterator past the end."); |
| 601 | if (!(*Doc)->skip()) { |
| 602 | Doc->reset(nullptr); |
| 603 | } else { |
| 604 | Stream &S = (*Doc)->stream; |
| 605 | Doc->reset(new Document(S)); |
| 606 | } |
| 607 | return *this; |
| 608 | } |
| 609 | |
| 610 | Document &operator*() { return *Doc->get(); } |
| 611 | |
| 612 | std::unique_ptr<Document> &operator->() { return *Doc; } |
| 613 | |
| 614 | private: |
| 615 | bool isAtEnd() const { return !Doc || !*Doc; } |
| 616 | |
| 617 | std::unique_ptr<Document> *Doc = nullptr; |
| 618 | }; |
| 619 | |
| 620 | } // end namespace yaml |
| 621 | |
| 622 | } // end namespace llvm |
| 623 | |
| 624 | #endif // LLVM_SUPPORT_YAMLPARSER_H |