Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1 | //===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===// |
| 2 | // |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This is a YAML 1.2 parser. |
| 10 | // |
| 11 | // See http://www.yaml.org/spec/1.2/spec.html for the full standard. |
| 12 | // |
| 13 | // This currently does not implement the following: |
| 14 | // * Multi-line literal folding. |
| 15 | // * Tag resolution. |
| 16 | // * UTF-16. |
| 17 | // * BOMs anywhere other than the first Unicode scalar value in the file. |
| 18 | // |
| 19 | // The most important class here is Stream. This represents a YAML stream with |
| 20 | // 0, 1, or many documents. |
| 21 | // |
| 22 | // SourceMgr sm; |
| 23 | // StringRef input = getInput(); |
| 24 | // yaml::Stream stream(input, sm); |
| 25 | // |
| 26 | // for (yaml::document_iterator di = stream.begin(), de = stream.end(); |
| 27 | // di != de; ++di) { |
| 28 | // yaml::Node *n = di->getRoot(); |
| 29 | // if (n) { |
| 30 | // // Do something with n... |
| 31 | // } else |
| 32 | // break; |
| 33 | // } |
| 34 | // |
| 35 | //===----------------------------------------------------------------------===// |
| 36 | |
| 37 | #ifndef LLVM_SUPPORT_YAMLPARSER_H |
| 38 | #define LLVM_SUPPORT_YAMLPARSER_H |
| 39 | |
| 40 | #include "llvm/ADT/StringRef.h" |
| 41 | #include "llvm/Support/Allocator.h" |
| 42 | #include "llvm/Support/SMLoc.h" |
| 43 | #include <cassert> |
| 44 | #include <cstddef> |
| 45 | #include <iterator> |
| 46 | #include <map> |
| 47 | #include <memory> |
| 48 | #include <string> |
| 49 | #include <system_error> |
| 50 | |
| 51 | namespace llvm { |
| 52 | |
| 53 | class MemoryBufferRef; |
| 54 | class SourceMgr; |
| 55 | class raw_ostream; |
| 56 | class Twine; |
| 57 | |
| 58 | namespace yaml { |
| 59 | |
| 60 | class Document; |
| 61 | class document_iterator; |
| 62 | class Node; |
| 63 | class Scanner; |
| 64 | struct Token; |
| 65 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 66 | /// Dump all the tokens in this stream to OS. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 67 | /// \returns true if there was an error, false otherwise. |
| 68 | bool dumpTokens(StringRef Input, raw_ostream &); |
| 69 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 70 | /// Scans all tokens in input without outputting anything. This is used |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 71 | /// for benchmarking the tokenizer. |
| 72 | /// \returns true if there was an error, false otherwise. |
| 73 | bool scanTokens(StringRef Input); |
| 74 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 75 | /// Escape \a Input for a double quoted scalar; if \p EscapePrintable |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 76 | /// is true, all UTF8 sequences will be escaped, if \p EscapePrintable is |
| 77 | /// false, those UTF8 sequences encoding printable unicode scalars will not be |
| 78 | /// escaped, but emitted verbatim. |
| 79 | std::string escape(StringRef Input, bool EscapePrintable = true); |
| 80 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 81 | /// This class represents a YAML stream potentially containing multiple |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 82 | /// documents. |
| 83 | class Stream { |
| 84 | public: |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 85 | /// This keeps a reference to the string referenced by \p Input. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 86 | Stream(StringRef Input, SourceMgr &, bool ShowColors = true, |
| 87 | std::error_code *EC = nullptr); |
| 88 | |
| 89 | Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true, |
| 90 | std::error_code *EC = nullptr); |
| 91 | ~Stream(); |
| 92 | |
| 93 | document_iterator begin(); |
| 94 | document_iterator end(); |
| 95 | void skip(); |
| 96 | bool failed(); |
| 97 | |
| 98 | bool validate() { |
| 99 | skip(); |
| 100 | return !failed(); |
| 101 | } |
| 102 | |
| 103 | void printError(Node *N, const Twine &Msg); |
| 104 | |
| 105 | private: |
| 106 | friend class Document; |
| 107 | |
| 108 | std::unique_ptr<Scanner> scanner; |
| 109 | std::unique_ptr<Document> CurrentDoc; |
| 110 | }; |
| 111 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 112 | /// Abstract base class for all Nodes. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 113 | class Node { |
| 114 | virtual void anchor(); |
| 115 | |
| 116 | public: |
| 117 | enum NodeKind { |
| 118 | NK_Null, |
| 119 | NK_Scalar, |
| 120 | NK_BlockScalar, |
| 121 | NK_KeyValue, |
| 122 | NK_Mapping, |
| 123 | NK_Sequence, |
| 124 | NK_Alias |
| 125 | }; |
| 126 | |
| 127 | Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor, |
| 128 | StringRef Tag); |
| 129 | |
| 130 | // It's not safe to copy YAML nodes; the document is streamed and the position |
| 131 | // is part of the state. |
| 132 | Node(const Node &) = delete; |
| 133 | void operator=(const Node &) = delete; |
| 134 | |
| 135 | void *operator new(size_t Size, BumpPtrAllocator &Alloc, |
| 136 | size_t Alignment = 16) noexcept { |
| 137 | return Alloc.Allocate(Size, Alignment); |
| 138 | } |
| 139 | |
| 140 | void operator delete(void *Ptr, BumpPtrAllocator &Alloc, |
| 141 | size_t Size) noexcept { |
| 142 | Alloc.Deallocate(Ptr, Size); |
| 143 | } |
| 144 | |
| 145 | void operator delete(void *) noexcept = delete; |
| 146 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 147 | /// Get the value of the anchor attached to this node. If it does not |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 148 | /// have one, getAnchor().size() will be 0. |
| 149 | StringRef getAnchor() const { return Anchor; } |
| 150 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 151 | /// Get the tag as it was written in the document. This does not |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 152 | /// perform tag resolution. |
| 153 | StringRef getRawTag() const { return Tag; } |
| 154 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 155 | /// Get the verbatium tag for a given Node. This performs tag resoluton |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 156 | /// and substitution. |
| 157 | std::string getVerbatimTag() const; |
| 158 | |
| 159 | SMRange getSourceRange() const { return SourceRange; } |
| 160 | void setSourceRange(SMRange SR) { SourceRange = SR; } |
| 161 | |
| 162 | // These functions forward to Document and Scanner. |
| 163 | Token &peekNext(); |
| 164 | Token getNext(); |
| 165 | Node *parseBlockNode(); |
| 166 | BumpPtrAllocator &getAllocator(); |
| 167 | void setError(const Twine &Message, Token &Location) const; |
| 168 | bool failed() const; |
| 169 | |
| 170 | virtual void skip() {} |
| 171 | |
| 172 | unsigned int getType() const { return TypeID; } |
| 173 | |
| 174 | protected: |
| 175 | std::unique_ptr<Document> &Doc; |
| 176 | SMRange SourceRange; |
| 177 | |
| 178 | ~Node() = default; |
| 179 | |
| 180 | private: |
| 181 | unsigned int TypeID; |
| 182 | StringRef Anchor; |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 183 | /// The tag as typed in the document. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 184 | StringRef Tag; |
| 185 | }; |
| 186 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 187 | /// A null value. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 188 | /// |
| 189 | /// Example: |
| 190 | /// !!null null |
| 191 | class NullNode final : public Node { |
| 192 | void anchor() override; |
| 193 | |
| 194 | public: |
| 195 | NullNode(std::unique_ptr<Document> &D) |
| 196 | : Node(NK_Null, D, StringRef(), StringRef()) {} |
| 197 | |
| 198 | static bool classof(const Node *N) { return N->getType() == NK_Null; } |
| 199 | }; |
| 200 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 201 | /// A scalar node is an opaque datum that can be presented as a |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 202 | /// series of zero or more Unicode scalar values. |
| 203 | /// |
| 204 | /// Example: |
| 205 | /// Adena |
| 206 | class ScalarNode final : public Node { |
| 207 | void anchor() override; |
| 208 | |
| 209 | public: |
| 210 | ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, |
| 211 | StringRef Val) |
| 212 | : Node(NK_Scalar, D, Anchor, Tag), Value(Val) { |
| 213 | SMLoc Start = SMLoc::getFromPointer(Val.begin()); |
| 214 | SMLoc End = SMLoc::getFromPointer(Val.end()); |
| 215 | SourceRange = SMRange(Start, End); |
| 216 | } |
| 217 | |
| 218 | // Return Value without any escaping or folding or other fun YAML stuff. This |
| 219 | // is the exact bytes that are contained in the file (after conversion to |
| 220 | // utf8). |
| 221 | StringRef getRawValue() const { return Value; } |
| 222 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 223 | /// Gets the value of this node as a StringRef. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 224 | /// |
| 225 | /// \param Storage is used to store the content of the returned StringRef iff |
| 226 | /// it requires any modification from how it appeared in the source. |
| 227 | /// This happens with escaped characters and multi-line literals. |
| 228 | StringRef getValue(SmallVectorImpl<char> &Storage) const; |
| 229 | |
| 230 | static bool classof(const Node *N) { |
| 231 | return N->getType() == NK_Scalar; |
| 232 | } |
| 233 | |
| 234 | private: |
| 235 | StringRef Value; |
| 236 | |
| 237 | StringRef unescapeDoubleQuoted(StringRef UnquotedValue, |
| 238 | StringRef::size_type Start, |
| 239 | SmallVectorImpl<char> &Storage) const; |
| 240 | }; |
| 241 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 242 | /// A block scalar node is an opaque datum that can be presented as a |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 243 | /// series of zero or more Unicode scalar values. |
| 244 | /// |
| 245 | /// Example: |
| 246 | /// | |
| 247 | /// Hello |
| 248 | /// World |
| 249 | class BlockScalarNode final : public Node { |
| 250 | void anchor() override; |
| 251 | |
| 252 | public: |
| 253 | BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, |
| 254 | StringRef Value, StringRef RawVal) |
| 255 | : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) { |
| 256 | SMLoc Start = SMLoc::getFromPointer(RawVal.begin()); |
| 257 | SMLoc End = SMLoc::getFromPointer(RawVal.end()); |
| 258 | SourceRange = SMRange(Start, End); |
| 259 | } |
| 260 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 261 | /// Gets the value of this node as a StringRef. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 262 | StringRef getValue() const { return Value; } |
| 263 | |
| 264 | static bool classof(const Node *N) { |
| 265 | return N->getType() == NK_BlockScalar; |
| 266 | } |
| 267 | |
| 268 | private: |
| 269 | StringRef Value; |
| 270 | }; |
| 271 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 272 | /// A key and value pair. While not technically a Node under the YAML |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 273 | /// representation graph, it is easier to treat them this way. |
| 274 | /// |
| 275 | /// TODO: Consider making this not a child of Node. |
| 276 | /// |
| 277 | /// Example: |
| 278 | /// Section: .text |
| 279 | class KeyValueNode final : public Node { |
| 280 | void anchor() override; |
| 281 | |
| 282 | public: |
| 283 | KeyValueNode(std::unique_ptr<Document> &D) |
| 284 | : Node(NK_KeyValue, D, StringRef(), StringRef()) {} |
| 285 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 286 | /// Parse and return the key. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 287 | /// |
| 288 | /// This may be called multiple times. |
| 289 | /// |
| 290 | /// \returns The key, or nullptr if failed() == true. |
| 291 | Node *getKey(); |
| 292 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 293 | /// Parse and return the value. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 294 | /// |
| 295 | /// This may be called multiple times. |
| 296 | /// |
| 297 | /// \returns The value, or nullptr if failed() == true. |
| 298 | Node *getValue(); |
| 299 | |
| 300 | void skip() override { |
| 301 | if (Node *Key = getKey()) { |
| 302 | Key->skip(); |
| 303 | if (Node *Val = getValue()) |
| 304 | Val->skip(); |
| 305 | } |
| 306 | } |
| 307 | |
| 308 | static bool classof(const Node *N) { |
| 309 | return N->getType() == NK_KeyValue; |
| 310 | } |
| 311 | |
| 312 | private: |
| 313 | Node *Key = nullptr; |
| 314 | Node *Value = nullptr; |
| 315 | }; |
| 316 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 317 | /// This is an iterator abstraction over YAML collections shared by both |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 318 | /// sequences and maps. |
| 319 | /// |
| 320 | /// BaseT must have a ValueT* member named CurrentEntry and a member function |
| 321 | /// increment() which must set CurrentEntry to 0 to create an end iterator. |
| 322 | template <class BaseT, class ValueT> |
| 323 | class basic_collection_iterator |
| 324 | : public std::iterator<std::input_iterator_tag, ValueT> { |
| 325 | public: |
| 326 | basic_collection_iterator() = default; |
| 327 | basic_collection_iterator(BaseT *B) : Base(B) {} |
| 328 | |
| 329 | ValueT *operator->() const { |
| 330 | assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); |
| 331 | return Base->CurrentEntry; |
| 332 | } |
| 333 | |
| 334 | ValueT &operator*() const { |
| 335 | assert(Base && Base->CurrentEntry && |
| 336 | "Attempted to dereference end iterator!"); |
| 337 | return *Base->CurrentEntry; |
| 338 | } |
| 339 | |
| 340 | operator ValueT *() const { |
| 341 | assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); |
| 342 | return Base->CurrentEntry; |
| 343 | } |
| 344 | |
| 345 | /// Note on EqualityComparable: |
| 346 | /// |
| 347 | /// The iterator is not re-entrant, |
| 348 | /// it is meant to be used for parsing YAML on-demand |
| 349 | /// Once iteration started - it can point only to one entry at a time |
| 350 | /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal |
| 351 | /// iff Base and Other.Base are equal. |
| 352 | bool operator==(const basic_collection_iterator &Other) const { |
| 353 | if (Base && (Base == Other.Base)) { |
| 354 | assert((Base->CurrentEntry == Other.Base->CurrentEntry) |
| 355 | && "Equal Bases expected to point to equal Entries"); |
| 356 | } |
| 357 | |
| 358 | return Base == Other.Base; |
| 359 | } |
| 360 | |
| 361 | bool operator!=(const basic_collection_iterator &Other) const { |
| 362 | return !(Base == Other.Base); |
| 363 | } |
| 364 | |
| 365 | basic_collection_iterator &operator++() { |
| 366 | assert(Base && "Attempted to advance iterator past end!"); |
| 367 | Base->increment(); |
| 368 | // Create an end iterator. |
| 369 | if (!Base->CurrentEntry) |
| 370 | Base = nullptr; |
| 371 | return *this; |
| 372 | } |
| 373 | |
| 374 | private: |
| 375 | BaseT *Base = nullptr; |
| 376 | }; |
| 377 | |
| 378 | // The following two templates are used for both MappingNode and Sequence Node. |
| 379 | template <class CollectionType> |
| 380 | typename CollectionType::iterator begin(CollectionType &C) { |
| 381 | assert(C.IsAtBeginning && "You may only iterate over a collection once!"); |
| 382 | C.IsAtBeginning = false; |
| 383 | typename CollectionType::iterator ret(&C); |
| 384 | ++ret; |
| 385 | return ret; |
| 386 | } |
| 387 | |
| 388 | template <class CollectionType> void skip(CollectionType &C) { |
| 389 | // TODO: support skipping from the middle of a parsed collection ;/ |
| 390 | assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); |
| 391 | if (C.IsAtBeginning) |
| 392 | for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e; |
| 393 | ++i) |
| 394 | i->skip(); |
| 395 | } |
| 396 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 397 | /// Represents a YAML map created from either a block map for a flow map. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 398 | /// |
| 399 | /// This parses the YAML stream as increment() is called. |
| 400 | /// |
| 401 | /// Example: |
| 402 | /// Name: _main |
| 403 | /// Scope: Global |
| 404 | class MappingNode final : public Node { |
| 405 | void anchor() override; |
| 406 | |
| 407 | public: |
| 408 | enum MappingType { |
| 409 | MT_Block, |
| 410 | MT_Flow, |
| 411 | MT_Inline ///< An inline mapping node is used for "[key: value]". |
| 412 | }; |
| 413 | |
| 414 | MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, |
| 415 | MappingType MT) |
| 416 | : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {} |
| 417 | |
| 418 | friend class basic_collection_iterator<MappingNode, KeyValueNode>; |
| 419 | |
| 420 | using iterator = basic_collection_iterator<MappingNode, KeyValueNode>; |
| 421 | |
| 422 | template <class T> friend typename T::iterator yaml::begin(T &); |
| 423 | template <class T> friend void yaml::skip(T &); |
| 424 | |
| 425 | iterator begin() { return yaml::begin(*this); } |
| 426 | |
| 427 | iterator end() { return iterator(); } |
| 428 | |
| 429 | void skip() override { yaml::skip(*this); } |
| 430 | |
| 431 | static bool classof(const Node *N) { |
| 432 | return N->getType() == NK_Mapping; |
| 433 | } |
| 434 | |
| 435 | private: |
| 436 | MappingType Type; |
| 437 | bool IsAtBeginning = true; |
| 438 | bool IsAtEnd = false; |
| 439 | KeyValueNode *CurrentEntry = nullptr; |
| 440 | |
| 441 | void increment(); |
| 442 | }; |
| 443 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 444 | /// Represents a YAML sequence created from either a block sequence for a |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 445 | /// flow sequence. |
| 446 | /// |
| 447 | /// This parses the YAML stream as increment() is called. |
| 448 | /// |
| 449 | /// Example: |
| 450 | /// - Hello |
| 451 | /// - World |
| 452 | class SequenceNode final : public Node { |
| 453 | void anchor() override; |
| 454 | |
| 455 | public: |
| 456 | enum SequenceType { |
| 457 | ST_Block, |
| 458 | ST_Flow, |
| 459 | // Use for: |
| 460 | // |
| 461 | // key: |
| 462 | // - val1 |
| 463 | // - val2 |
| 464 | // |
| 465 | // As a BlockMappingEntry and BlockEnd are not created in this case. |
| 466 | ST_Indentless |
| 467 | }; |
| 468 | |
| 469 | SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, |
| 470 | SequenceType ST) |
| 471 | : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {} |
| 472 | |
| 473 | friend class basic_collection_iterator<SequenceNode, Node>; |
| 474 | |
| 475 | using iterator = basic_collection_iterator<SequenceNode, Node>; |
| 476 | |
| 477 | template <class T> friend typename T::iterator yaml::begin(T &); |
| 478 | template <class T> friend void yaml::skip(T &); |
| 479 | |
| 480 | void increment(); |
| 481 | |
| 482 | iterator begin() { return yaml::begin(*this); } |
| 483 | |
| 484 | iterator end() { return iterator(); } |
| 485 | |
| 486 | void skip() override { yaml::skip(*this); } |
| 487 | |
| 488 | static bool classof(const Node *N) { |
| 489 | return N->getType() == NK_Sequence; |
| 490 | } |
| 491 | |
| 492 | private: |
| 493 | SequenceType SeqType; |
| 494 | bool IsAtBeginning = true; |
| 495 | bool IsAtEnd = false; |
| 496 | bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','. |
| 497 | Node *CurrentEntry = nullptr; |
| 498 | }; |
| 499 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 500 | /// Represents an alias to a Node with an anchor. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 501 | /// |
| 502 | /// Example: |
| 503 | /// *AnchorName |
| 504 | class AliasNode final : public Node { |
| 505 | void anchor() override; |
| 506 | |
| 507 | public: |
| 508 | AliasNode(std::unique_ptr<Document> &D, StringRef Val) |
| 509 | : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {} |
| 510 | |
| 511 | StringRef getName() const { return Name; } |
| 512 | Node *getTarget(); |
| 513 | |
| 514 | static bool classof(const Node *N) { return N->getType() == NK_Alias; } |
| 515 | |
| 516 | private: |
| 517 | StringRef Name; |
| 518 | }; |
| 519 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 520 | /// A YAML Stream is a sequence of Documents. A document contains a root |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 521 | /// node. |
| 522 | class Document { |
| 523 | public: |
| 524 | Document(Stream &ParentStream); |
| 525 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 526 | /// Root for parsing a node. Returns a single node. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 527 | Node *parseBlockNode(); |
| 528 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 529 | /// Finish parsing the current document and return true if there are |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 530 | /// more. Return false otherwise. |
| 531 | bool skip(); |
| 532 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 533 | /// Parse and return the root level node. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 534 | Node *getRoot() { |
| 535 | if (Root) |
| 536 | return Root; |
| 537 | return Root = parseBlockNode(); |
| 538 | } |
| 539 | |
| 540 | const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; } |
| 541 | |
| 542 | private: |
| 543 | friend class Node; |
| 544 | friend class document_iterator; |
| 545 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 546 | /// Stream to read tokens from. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 547 | Stream &stream; |
| 548 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 549 | /// Used to allocate nodes to. All are destroyed without calling their |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 550 | /// destructor when the document is destroyed. |
| 551 | BumpPtrAllocator NodeAllocator; |
| 552 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 553 | /// The root node. Used to support skipping a partially parsed |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 554 | /// document. |
| 555 | Node *Root; |
| 556 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 557 | /// Maps tag prefixes to their expansion. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 558 | std::map<StringRef, StringRef> TagMap; |
| 559 | |
| 560 | Token &peekNext(); |
| 561 | Token getNext(); |
| 562 | void setError(const Twine &Message, Token &Location) const; |
| 563 | bool failed() const; |
| 564 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 565 | /// Parse %BLAH directives and return true if any were encountered. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 566 | bool parseDirectives(); |
| 567 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 568 | /// Parse %YAML |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 569 | void parseYAMLDirective(); |
| 570 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 571 | /// Parse %TAG |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 572 | void parseTAGDirective(); |
| 573 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 574 | /// Consume the next token and error if it is not \a TK. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 575 | bool expectToken(int TK); |
| 576 | }; |
| 577 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 578 | /// Iterator abstraction for Documents over a Stream. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 579 | class document_iterator { |
| 580 | public: |
| 581 | document_iterator() = default; |
| 582 | document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {} |
| 583 | |
| 584 | bool operator==(const document_iterator &Other) const { |
| 585 | if (isAtEnd() || Other.isAtEnd()) |
| 586 | return isAtEnd() && Other.isAtEnd(); |
| 587 | |
| 588 | return Doc == Other.Doc; |
| 589 | } |
| 590 | bool operator!=(const document_iterator &Other) const { |
| 591 | return !(*this == Other); |
| 592 | } |
| 593 | |
| 594 | document_iterator operator++() { |
| 595 | assert(Doc && "incrementing iterator past the end."); |
| 596 | if (!(*Doc)->skip()) { |
| 597 | Doc->reset(nullptr); |
| 598 | } else { |
| 599 | Stream &S = (*Doc)->stream; |
| 600 | Doc->reset(new Document(S)); |
| 601 | } |
| 602 | return *this; |
| 603 | } |
| 604 | |
| 605 | Document &operator*() { return *Doc->get(); } |
| 606 | |
| 607 | std::unique_ptr<Document> &operator->() { return *Doc; } |
| 608 | |
| 609 | private: |
| 610 | bool isAtEnd() const { return !Doc || !*Doc; } |
| 611 | |
| 612 | std::unique_ptr<Document> *Doc = nullptr; |
| 613 | }; |
| 614 | |
| 615 | } // end namespace yaml |
| 616 | |
| 617 | } // end namespace llvm |
| 618 | |
| 619 | #endif // LLVM_SUPPORT_YAMLPARSER_H |