blob: c753cee60ec1255a9c92da417095e34816203054 [file] [log] [blame]
Andrew Scullcdfcccc2018-10-05 20:58:37 +01001//===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
2//
Andrew Walbran16937d02019-10-22 13:54:20 +01003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scullcdfcccc2018-10-05 20:58:37 +01006//
7//===---------------------------------------------------------------------===//
8///
9/// \file
10/// This file supports working with JSON data.
11///
12/// It comprises:
13///
14/// - classes which hold dynamically-typed parsed JSON structures
15/// These are value types that can be composed, inspected, and modified.
16/// See json::Value, and the related types json::Object and json::Array.
17///
18/// - functions to parse JSON text into Values, and to serialize Values to text.
19/// See parse(), operator<<, and format_provider.
20///
21/// - a convention and helpers for mapping between json::Value and user-defined
22/// types. See fromJSON(), ObjectMapper, and the class comment on Value.
23///
Andrew Walbran3d2c1972020-04-07 12:24:26 +010024/// - an output API json::OStream which can emit JSON without materializing
25/// all structures as json::Value.
26///
Andrew Scullcdfcccc2018-10-05 20:58:37 +010027/// Typically, JSON data would be read from an external source, parsed into
28/// a Value, and then converted into some native data structure before doing
29/// real work on it. (And vice versa when writing).
30///
31/// Other serialization mechanisms you may consider:
32///
33/// - YAML is also text-based, and more human-readable than JSON. It's a more
34/// complex format and data model, and YAML parsers aren't ubiquitous.
35/// YAMLParser.h is a streaming parser suitable for parsing large documents
36/// (including JSON, as YAML is a superset). It can be awkward to use
37/// directly. YAML I/O (YAMLTraits.h) provides data mapping that is more
38/// declarative than the toJSON/fromJSON conventions here.
39///
40/// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
41/// encodes LLVM IR ("bitcode"), but it can be a container for other data.
Andrew Walbran3d2c1972020-04-07 12:24:26 +010042/// Low-level reader/writer libraries are in Bitstream/Bitstream*.h
Andrew Scullcdfcccc2018-10-05 20:58:37 +010043///
44//===---------------------------------------------------------------------===//
45
46#ifndef LLVM_SUPPORT_JSON_H
47#define LLVM_SUPPORT_JSON_H
48
49#include "llvm/ADT/DenseMap.h"
50#include "llvm/ADT/SmallVector.h"
51#include "llvm/ADT/StringRef.h"
52#include "llvm/Support/Error.h"
53#include "llvm/Support/FormatVariadic.h"
54#include "llvm/Support/raw_ostream.h"
55#include <map>
56
57namespace llvm {
58namespace json {
59
60// === String encodings ===
61//
62// JSON strings are character sequences (not byte sequences like std::string).
63// We need to know the encoding, and for simplicity only support UTF-8.
64//
65// - When parsing, invalid UTF-8 is a syntax error like any other
66//
67// - When creating Values from strings, callers must ensure they are UTF-8.
68// with asserts on, invalid UTF-8 will crash the program
69// with asserts off, we'll substitute the replacement character (U+FFFD)
70// Callers can use json::isUTF8() and json::fixUTF8() for validation.
71//
72// - When retrieving strings from Values (e.g. asString()), the result will
73// always be valid UTF-8.
74
75/// Returns true if \p S is valid UTF-8, which is required for use as JSON.
76/// If it returns false, \p Offset is set to a byte offset near the first error.
77bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
78/// Replaces invalid UTF-8 sequences in \p S with the replacement character
79/// (U+FFFD). The returned string is valid UTF-8.
80/// This is much slower than isUTF8, so test that first.
81std::string fixUTF8(llvm::StringRef S);
82
83class Array;
84class ObjectKey;
85class Value;
86template <typename T> Value toJSON(const llvm::Optional<T> &Opt);
87
88/// An Object is a JSON object, which maps strings to heterogenous JSON values.
89/// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
90class Object {
91 using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>;
92 Storage M;
93
94public:
95 using key_type = ObjectKey;
96 using mapped_type = Value;
97 using value_type = Storage::value_type;
98 using iterator = Storage::iterator;
99 using const_iterator = Storage::const_iterator;
100
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100101 Object() = default;
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100102 // KV is a trivial key-value struct for list-initialization.
103 // (using std::pair forces extra copies).
104 struct KV;
105 explicit Object(std::initializer_list<KV> Properties);
106
107 iterator begin() { return M.begin(); }
108 const_iterator begin() const { return M.begin(); }
109 iterator end() { return M.end(); }
110 const_iterator end() const { return M.end(); }
111
112 bool empty() const { return M.empty(); }
113 size_t size() const { return M.size(); }
114
115 void clear() { M.clear(); }
116 std::pair<iterator, bool> insert(KV E);
117 template <typename... Ts>
118 std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) {
119 return M.try_emplace(K, std::forward<Ts>(Args)...);
120 }
121 template <typename... Ts>
122 std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) {
123 return M.try_emplace(std::move(K), std::forward<Ts>(Args)...);
124 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200125 bool erase(StringRef K);
126 void erase(iterator I) { M.erase(I); }
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100127
128 iterator find(StringRef K) { return M.find_as(K); }
129 const_iterator find(StringRef K) const { return M.find_as(K); }
130 // operator[] acts as if Value was default-constructible as null.
131 Value &operator[](const ObjectKey &K);
132 Value &operator[](ObjectKey &&K);
133 // Look up a property, returning nullptr if it doesn't exist.
134 Value *get(StringRef K);
135 const Value *get(StringRef K) const;
136 // Typed accessors return None/nullptr if
137 // - the property doesn't exist
138 // - or it has the wrong type
139 llvm::Optional<std::nullptr_t> getNull(StringRef K) const;
140 llvm::Optional<bool> getBoolean(StringRef K) const;
141 llvm::Optional<double> getNumber(StringRef K) const;
142 llvm::Optional<int64_t> getInteger(StringRef K) const;
143 llvm::Optional<llvm::StringRef> getString(StringRef K) const;
144 const json::Object *getObject(StringRef K) const;
145 json::Object *getObject(StringRef K);
146 const json::Array *getArray(StringRef K) const;
147 json::Array *getArray(StringRef K);
148};
149bool operator==(const Object &LHS, const Object &RHS);
150inline bool operator!=(const Object &LHS, const Object &RHS) {
151 return !(LHS == RHS);
152}
153
154/// An Array is a JSON array, which contains heterogeneous JSON values.
155/// It simulates std::vector<Value>.
156class Array {
157 std::vector<Value> V;
158
159public:
160 using value_type = Value;
161 using iterator = std::vector<Value>::iterator;
162 using const_iterator = std::vector<Value>::const_iterator;
163
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100164 Array() = default;
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100165 explicit Array(std::initializer_list<Value> Elements);
166 template <typename Collection> explicit Array(const Collection &C) {
167 for (const auto &V : C)
168 emplace_back(V);
169 }
170
171 Value &operator[](size_t I) { return V[I]; }
172 const Value &operator[](size_t I) const { return V[I]; }
173 Value &front() { return V.front(); }
174 const Value &front() const { return V.front(); }
175 Value &back() { return V.back(); }
176 const Value &back() const { return V.back(); }
177 Value *data() { return V.data(); }
178 const Value *data() const { return V.data(); }
179
180 iterator begin() { return V.begin(); }
181 const_iterator begin() const { return V.begin(); }
182 iterator end() { return V.end(); }
183 const_iterator end() const { return V.end(); }
184
185 bool empty() const { return V.empty(); }
186 size_t size() const { return V.size(); }
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100187 void reserve(size_t S) { V.reserve(S); }
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100188
189 void clear() { V.clear(); }
190 void push_back(const Value &E) { V.push_back(E); }
191 void push_back(Value &&E) { V.push_back(std::move(E)); }
192 template <typename... Args> void emplace_back(Args &&... A) {
193 V.emplace_back(std::forward<Args>(A)...);
194 }
195 void pop_back() { V.pop_back(); }
196 // FIXME: insert() takes const_iterator since C++11, old libstdc++ disagrees.
197 iterator insert(iterator P, const Value &E) { return V.insert(P, E); }
198 iterator insert(iterator P, Value &&E) {
199 return V.insert(P, std::move(E));
200 }
201 template <typename It> iterator insert(iterator P, It A, It Z) {
202 return V.insert(P, A, Z);
203 }
204 template <typename... Args> iterator emplace(const_iterator P, Args &&... A) {
205 return V.emplace(P, std::forward<Args>(A)...);
206 }
207
208 friend bool operator==(const Array &L, const Array &R) { return L.V == R.V; }
209};
210inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
211
212/// A Value is an JSON value of unknown type.
213/// They can be copied, but should generally be moved.
214///
215/// === Composing values ===
216///
217/// You can implicitly construct Values from:
218/// - strings: std::string, SmallString, formatv, StringRef, char*
219/// (char*, and StringRef are references, not copies!)
220/// - numbers
221/// - booleans
222/// - null: nullptr
223/// - arrays: {"foo", 42.0, false}
224/// - serializable things: types with toJSON(const T&)->Value, found by ADL
225///
226/// They can also be constructed from object/array helpers:
227/// - json::Object is a type like map<ObjectKey, Value>
228/// - json::Array is a type like vector<Value>
229/// These can be list-initialized, or used to build up collections in a loop.
230/// json::ary(Collection) converts all items in a collection to Values.
231///
232/// === Inspecting values ===
233///
234/// Each Value is one of the JSON kinds:
235/// null (nullptr_t)
236/// boolean (bool)
237/// number (double or int64)
238/// string (StringRef)
239/// array (json::Array)
240/// object (json::Object)
241///
242/// The kind can be queried directly, or implicitly via the typed accessors:
243/// if (Optional<StringRef> S = E.getAsString()
244/// assert(E.kind() == Value::String);
245///
246/// Array and Object also have typed indexing accessors for easy traversal:
247/// Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )");
248/// if (Object* O = E->getAsObject())
249/// if (Object* Opts = O->getObject("options"))
250/// if (Optional<StringRef> Font = Opts->getString("font"))
251/// assert(Opts->at("font").kind() == Value::String);
252///
253/// === Converting JSON values to C++ types ===
254///
255/// The convention is to have a deserializer function findable via ADL:
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200256/// fromJSON(const json::Value&, T&, Path) -> bool
257///
258/// The return value indicates overall success, and Path is used for precise
259/// error reporting. (The Path::Root passed in at the top level fromJSON call
260/// captures any nested error and can render it in context).
261/// If conversion fails, fromJSON calls Path::report() and immediately returns.
262/// This ensures that the first fatal error survives.
263///
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100264/// Deserializers are provided for:
265/// - bool
266/// - int and int64_t
267/// - double
268/// - std::string
269/// - vector<T>, where T is deserializable
270/// - map<string, T>, where T is deserializable
271/// - Optional<T>, where T is deserializable
272/// ObjectMapper can help writing fromJSON() functions for object types.
273///
274/// For conversion in the other direction, the serializer function is:
275/// toJSON(const T&) -> json::Value
276/// If this exists, then it also allows constructing Value from T, and can
277/// be used to serialize vector<T>, map<string, T>, and Optional<T>.
278///
279/// === Serialization ===
280///
281/// Values can be serialized to JSON:
282/// 1) raw_ostream << Value // Basic formatting.
283/// 2) raw_ostream << formatv("{0}", Value) // Basic formatting.
284/// 3) raw_ostream << formatv("{0:2}", Value) // Pretty-print with indent 2.
285///
286/// And parsed:
287/// Expected<Value> E = json::parse("[1, 2, null]");
288/// assert(E && E->kind() == Value::Array);
289class Value {
290public:
291 enum Kind {
292 Null,
293 Boolean,
294 /// Number values can store both int64s and doubles at full precision,
295 /// depending on what they were constructed/parsed from.
296 Number,
297 String,
298 Array,
299 Object,
300 };
301
302 // It would be nice to have Value() be null. But that would make {} null too.
303 Value(const Value &M) { copyFrom(M); }
304 Value(Value &&M) { moveFrom(std::move(M)); }
305 Value(std::initializer_list<Value> Elements);
306 Value(json::Array &&Elements) : Type(T_Array) {
307 create<json::Array>(std::move(Elements));
308 }
Andrew Walbran16937d02019-10-22 13:54:20 +0100309 template <typename Elt>
310 Value(const std::vector<Elt> &C) : Value(json::Array(C)) {}
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100311 Value(json::Object &&Properties) : Type(T_Object) {
312 create<json::Object>(std::move(Properties));
313 }
Andrew Walbran16937d02019-10-22 13:54:20 +0100314 template <typename Elt>
315 Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {}
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100316 // Strings: types with value semantics. Must be valid UTF-8.
317 Value(std::string V) : Type(T_String) {
318 if (LLVM_UNLIKELY(!isUTF8(V))) {
319 assert(false && "Invalid UTF-8 in value used as JSON");
320 V = fixUTF8(std::move(V));
321 }
322 create<std::string>(std::move(V));
323 }
324 Value(const llvm::SmallVectorImpl<char> &V)
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100325 : Value(std::string(V.begin(), V.end())) {}
326 Value(const llvm::formatv_object_base &V) : Value(V.str()) {}
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100327 // Strings: types with reference semantics. Must be valid UTF-8.
328 Value(StringRef V) : Type(T_StringRef) {
329 create<llvm::StringRef>(V);
330 if (LLVM_UNLIKELY(!isUTF8(V))) {
331 assert(false && "Invalid UTF-8 in value used as JSON");
332 *this = Value(fixUTF8(V));
333 }
334 }
335 Value(const char *V) : Value(StringRef(V)) {}
336 Value(std::nullptr_t) : Type(T_Null) {}
337 // Boolean (disallow implicit conversions).
338 // (The last template parameter is a dummy to keep templates distinct.)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200339 template <typename T,
340 typename = std::enable_if_t<std::is_same<T, bool>::value>,
341 bool = false>
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100342 Value(T B) : Type(T_Boolean) {
343 create<bool>(B);
344 }
345 // Integers (except boolean). Must be non-narrowing convertible to int64_t.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200346 template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>,
347 typename = std::enable_if_t<!std::is_same<T, bool>::value>>
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100348 Value(T I) : Type(T_Integer) {
349 create<int64_t>(int64_t{I});
350 }
351 // Floating point. Must be non-narrowing convertible to double.
352 template <typename T,
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200353 typename = std::enable_if_t<std::is_floating_point<T>::value>,
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100354 double * = nullptr>
355 Value(T D) : Type(T_Double) {
356 create<double>(double{D});
357 }
358 // Serializable types: with a toJSON(const T&)->Value function, found by ADL.
359 template <typename T,
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200360 typename = std::enable_if_t<std::is_same<
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100361 Value, decltype(toJSON(*(const T *)nullptr))>::value>,
362 Value * = nullptr>
363 Value(const T &V) : Value(toJSON(V)) {}
364
365 Value &operator=(const Value &M) {
366 destroy();
367 copyFrom(M);
368 return *this;
369 }
370 Value &operator=(Value &&M) {
371 destroy();
372 moveFrom(std::move(M));
373 return *this;
374 }
375 ~Value() { destroy(); }
376
377 Kind kind() const {
378 switch (Type) {
379 case T_Null:
380 return Null;
381 case T_Boolean:
382 return Boolean;
383 case T_Double:
384 case T_Integer:
385 return Number;
386 case T_String:
387 case T_StringRef:
388 return String;
389 case T_Object:
390 return Object;
391 case T_Array:
392 return Array;
393 }
394 llvm_unreachable("Unknown kind");
395 }
396
397 // Typed accessors return None/nullptr if the Value is not of this type.
398 llvm::Optional<std::nullptr_t> getAsNull() const {
399 if (LLVM_LIKELY(Type == T_Null))
400 return nullptr;
401 return llvm::None;
402 }
403 llvm::Optional<bool> getAsBoolean() const {
404 if (LLVM_LIKELY(Type == T_Boolean))
405 return as<bool>();
406 return llvm::None;
407 }
408 llvm::Optional<double> getAsNumber() const {
409 if (LLVM_LIKELY(Type == T_Double))
410 return as<double>();
411 if (LLVM_LIKELY(Type == T_Integer))
412 return as<int64_t>();
413 return llvm::None;
414 }
415 // Succeeds if the Value is a Number, and exactly representable as int64_t.
416 llvm::Optional<int64_t> getAsInteger() const {
417 if (LLVM_LIKELY(Type == T_Integer))
418 return as<int64_t>();
419 if (LLVM_LIKELY(Type == T_Double)) {
420 double D = as<double>();
421 if (LLVM_LIKELY(std::modf(D, &D) == 0.0 &&
422 D >= double(std::numeric_limits<int64_t>::min()) &&
423 D <= double(std::numeric_limits<int64_t>::max())))
424 return D;
425 }
426 return llvm::None;
427 }
428 llvm::Optional<llvm::StringRef> getAsString() const {
429 if (Type == T_String)
430 return llvm::StringRef(as<std::string>());
431 if (LLVM_LIKELY(Type == T_StringRef))
432 return as<llvm::StringRef>();
433 return llvm::None;
434 }
435 const json::Object *getAsObject() const {
436 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
437 }
438 json::Object *getAsObject() {
439 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
440 }
441 const json::Array *getAsArray() const {
442 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
443 }
444 json::Array *getAsArray() {
445 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
446 }
447
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100448private:
449 void destroy();
450 void copyFrom(const Value &M);
451 // We allow moving from *const* Values, by marking all members as mutable!
452 // This hack is needed to support initializer-list syntax efficiently.
453 // (std::initializer_list<T> is a container of const T).
454 void moveFrom(const Value &&M);
455 friend class Array;
456 friend class Object;
457
458 template <typename T, typename... U> void create(U &&... V) {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200459 new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...);
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100460 }
461 template <typename T> T &as() const {
Andrew Scull0372a572018-11-16 15:47:06 +0000462 // Using this two-step static_cast via void * instead of reinterpret_cast
463 // silences a -Wstrict-aliasing false positive from GCC6 and earlier.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200464 void *Storage = static_cast<void *>(&Union);
Andrew Scull0372a572018-11-16 15:47:06 +0000465 return *static_cast<T *>(Storage);
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100466 }
467
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100468 friend class OStream;
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100469
470 enum ValueType : char {
471 T_Null,
472 T_Boolean,
473 T_Double,
474 T_Integer,
475 T_StringRef,
476 T_String,
477 T_Object,
478 T_Array,
479 };
480 // All members mutable, see moveFrom().
481 mutable ValueType Type;
482 mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
483 std::string, json::Array, json::Object>
484 Union;
Andrew Walbran16937d02019-10-22 13:54:20 +0100485 friend bool operator==(const Value &, const Value &);
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100486};
487
488bool operator==(const Value &, const Value &);
489inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100490
491/// ObjectKey is a used to capture keys in Object. Like Value but:
492/// - only strings are allowed
493/// - it's optimized for the string literal case (Owned == nullptr)
494/// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
495class ObjectKey {
496public:
497 ObjectKey(const char *S) : ObjectKey(StringRef(S)) {}
498 ObjectKey(std::string S) : Owned(new std::string(std::move(S))) {
499 if (LLVM_UNLIKELY(!isUTF8(*Owned))) {
500 assert(false && "Invalid UTF-8 in value used as JSON");
501 *Owned = fixUTF8(std::move(*Owned));
502 }
503 Data = *Owned;
504 }
505 ObjectKey(llvm::StringRef S) : Data(S) {
506 if (LLVM_UNLIKELY(!isUTF8(Data))) {
507 assert(false && "Invalid UTF-8 in value used as JSON");
508 *this = ObjectKey(fixUTF8(S));
509 }
510 }
511 ObjectKey(const llvm::SmallVectorImpl<char> &V)
512 : ObjectKey(std::string(V.begin(), V.end())) {}
513 ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}
514
515 ObjectKey(const ObjectKey &C) { *this = C; }
516 ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}
517 ObjectKey &operator=(const ObjectKey &C) {
518 if (C.Owned) {
519 Owned.reset(new std::string(*C.Owned));
520 Data = *Owned;
521 } else {
522 Data = C.Data;
523 }
524 return *this;
525 }
526 ObjectKey &operator=(ObjectKey &&) = default;
527
528 operator llvm::StringRef() const { return Data; }
529 std::string str() const { return Data.str(); }
530
531private:
532 // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned
533 // could be 2 pointers at most.
534 std::unique_ptr<std::string> Owned;
535 llvm::StringRef Data;
536};
537
538inline bool operator==(const ObjectKey &L, const ObjectKey &R) {
539 return llvm::StringRef(L) == llvm::StringRef(R);
540}
541inline bool operator!=(const ObjectKey &L, const ObjectKey &R) {
542 return !(L == R);
543}
544inline bool operator<(const ObjectKey &L, const ObjectKey &R) {
545 return StringRef(L) < StringRef(R);
546}
547
548struct Object::KV {
549 ObjectKey K;
550 Value V;
551};
552
553inline Object::Object(std::initializer_list<KV> Properties) {
554 for (const auto &P : Properties) {
555 auto R = try_emplace(P.K, nullptr);
556 if (R.second)
557 R.first->getSecond().moveFrom(std::move(P.V));
558 }
559}
560inline std::pair<Object::iterator, bool> Object::insert(KV E) {
561 return try_emplace(std::move(E.K), std::move(E.V));
562}
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200563inline bool Object::erase(StringRef K) {
564 return M.erase(ObjectKey(K));
565}
566
567/// A "cursor" marking a position within a Value.
568/// The Value is a tree, and this is the path from the root to the current node.
569/// This is used to associate errors with particular subobjects.
570class Path {
571public:
572 class Root;
573
574 /// Records that the value at the current path is invalid.
575 /// Message is e.g. "expected number" and becomes part of the final error.
576 /// This overwrites any previously written error message in the root.
577 void report(llvm::StringLiteral Message);
578
579 /// The root may be treated as a Path.
580 Path(Root &R) : Parent(nullptr), Seg(&R) {}
581 /// Derives a path for an array element: this[Index]
582 Path index(unsigned Index) const { return Path(this, Segment(Index)); }
583 /// Derives a path for an object field: this.Field
584 Path field(StringRef Field) const { return Path(this, Segment(Field)); }
585
586private:
587 /// One element in a JSON path: an object field (.foo) or array index [27].
588 /// Exception: the root Path encodes a pointer to the Path::Root.
589 class Segment {
590 uintptr_t Pointer;
591 unsigned Offset;
592
593 public:
594 Segment() = default;
595 Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {}
596 Segment(llvm::StringRef Field)
597 : Pointer(reinterpret_cast<uintptr_t>(Field.data())),
598 Offset(static_cast<unsigned>(Field.size())) {}
599 Segment(unsigned Index) : Pointer(0), Offset(Index) {}
600
601 bool isField() const { return Pointer != 0; }
602 StringRef field() const {
603 return StringRef(reinterpret_cast<const char *>(Pointer), Offset);
604 }
605 unsigned index() const { return Offset; }
606 Root *root() const { return reinterpret_cast<Root *>(Pointer); }
607 };
608
609 const Path *Parent;
610 Segment Seg;
611
612 Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {}
613};
614
615/// The root is the trivial Path to the root value.
616/// It also stores the latest reported error and the path where it occurred.
617class Path::Root {
618 llvm::StringRef Name;
619 llvm::StringLiteral ErrorMessage;
620 std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed.
621
622 friend void Path::report(llvm::StringLiteral Message);
623
624public:
625 Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {}
626 // No copy/move allowed as there are incoming pointers.
627 Root(Root &&) = delete;
628 Root &operator=(Root &&) = delete;
629 Root(const Root &) = delete;
630 Root &operator=(const Root &) = delete;
631
632 /// Returns the last error reported, or else a generic error.
633 Error getError() const;
634 /// Print the root value with the error shown inline as a comment.
635 /// Unrelated parts of the value are elided for brevity, e.g.
636 /// {
637 /// "id": 42,
638 /// "name": /* expected string */ null,
639 /// "properties": { ... }
640 /// }
641 void printErrorContext(const Value &, llvm::raw_ostream &) const;
642};
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100643
644// Standard deserializers are provided for primitive types.
645// See comments on Value.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200646inline bool fromJSON(const Value &E, std::string &Out, Path P) {
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100647 if (auto S = E.getAsString()) {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200648 Out = std::string(*S);
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100649 return true;
650 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200651 P.report("expected string");
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100652 return false;
653}
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200654inline bool fromJSON(const Value &E, int &Out, Path P) {
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100655 if (auto S = E.getAsInteger()) {
656 Out = *S;
657 return true;
658 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200659 P.report("expected integer");
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100660 return false;
661}
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200662inline bool fromJSON(const Value &E, int64_t &Out, Path P) {
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100663 if (auto S = E.getAsInteger()) {
664 Out = *S;
665 return true;
666 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200667 P.report("expected integer");
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100668 return false;
669}
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200670inline bool fromJSON(const Value &E, double &Out, Path P) {
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100671 if (auto S = E.getAsNumber()) {
672 Out = *S;
673 return true;
674 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200675 P.report("expected number");
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100676 return false;
677}
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200678inline bool fromJSON(const Value &E, bool &Out, Path P) {
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100679 if (auto S = E.getAsBoolean()) {
680 Out = *S;
681 return true;
682 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200683 P.report("expected boolean");
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100684 return false;
685}
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200686inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
687 if (auto S = E.getAsNull()) {
688 Out = *S;
689 return true;
690 }
691 P.report("expected null");
692 return false;
693}
694template <typename T>
695bool fromJSON(const Value &E, llvm::Optional<T> &Out, Path P) {
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100696 if (E.getAsNull()) {
697 Out = llvm::None;
698 return true;
699 }
700 T Result;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200701 if (!fromJSON(E, Result, P))
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100702 return false;
703 Out = std::move(Result);
704 return true;
705}
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200706template <typename T>
707bool fromJSON(const Value &E, std::vector<T> &Out, Path P) {
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100708 if (auto *A = E.getAsArray()) {
709 Out.clear();
710 Out.resize(A->size());
711 for (size_t I = 0; I < A->size(); ++I)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200712 if (!fromJSON((*A)[I], Out[I], P.index(I)))
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100713 return false;
714 return true;
715 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200716 P.report("expected array");
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100717 return false;
718}
719template <typename T>
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200720bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) {
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100721 if (auto *O = E.getAsObject()) {
722 Out.clear();
723 for (const auto &KV : *O)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200724 if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))],
725 P.field(KV.first)))
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100726 return false;
727 return true;
728 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200729 P.report("expected object");
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100730 return false;
731}
732
733// Allow serialization of Optional<T> for supported T.
734template <typename T> Value toJSON(const llvm::Optional<T> &Opt) {
735 return Opt ? Value(*Opt) : Value(nullptr);
736}
737
738/// Helper for mapping JSON objects onto protocol structs.
739///
740/// Example:
741/// \code
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200742/// bool fromJSON(const Value &E, MyStruct &R, Path P) {
743/// ObjectMapper O(E, P);
744/// // When returning false, error details were already reported.
745/// return O && O.map("mandatory_field", R.MandatoryField) &&
746/// O.mapOptional("optional_field", R.OptionalField);
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100747/// }
748/// \endcode
749class ObjectMapper {
750public:
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200751 /// If O is not an object, this mapper is invalid and an error is reported.
752 ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) {
753 if (!O)
754 P.report("expected object");
755 }
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100756
757 /// True if the expression is an object.
758 /// Must be checked before calling map().
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200759 operator bool() const { return O; }
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100760
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200761 /// Maps a property to a field.
762 /// If the property is missing or invalid, reports an error.
763 template <typename T> bool map(StringLiteral Prop, T &Out) {
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100764 assert(*this && "Must check this is an object before calling map()");
765 if (const Value *E = O->get(Prop))
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200766 return fromJSON(*E, Out, P.field(Prop));
767 P.field(Prop).report("missing value");
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100768 return false;
769 }
770
771 /// Maps a property to a field, if it exists.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200772 /// If the property exists and is invalid, reports an error.
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100773 /// (Optional requires special handling, because missing keys are OK).
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200774 template <typename T> bool map(StringLiteral Prop, llvm::Optional<T> &Out) {
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100775 assert(*this && "Must check this is an object before calling map()");
776 if (const Value *E = O->get(Prop))
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200777 return fromJSON(*E, Out, P.field(Prop));
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100778 Out = llvm::None;
779 return true;
780 }
781
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200782 /// Maps a property to a field, if it exists.
783 /// If the property exists and is invalid, reports an error.
784 /// If the property does not exist, Out is unchanged.
785 template <typename T> bool mapOptional(StringLiteral Prop, T &Out) {
786 assert(*this && "Must check this is an object before calling map()");
787 if (const Value *E = O->get(Prop))
788 return fromJSON(*E, Out, P.field(Prop));
789 return true;
790 }
791
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100792private:
793 const Object *O;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200794 Path P;
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100795};
796
797/// Parses the provided JSON source, or returns a ParseError.
798/// The returned Value is self-contained and owns its strings (they do not refer
799/// to the original source).
800llvm::Expected<Value> parse(llvm::StringRef JSON);
801
802class ParseError : public llvm::ErrorInfo<ParseError> {
803 const char *Msg;
804 unsigned Line, Column, Offset;
805
806public:
807 static char ID;
808 ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset)
809 : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {}
810 void log(llvm::raw_ostream &OS) const override {
811 OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg);
812 }
813 std::error_code convertToErrorCode() const override {
814 return llvm::inconvertibleErrorCode();
815 }
816};
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100817
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200818/// Version of parse() that converts the parsed value to the type T.
819/// RootName describes the root object and is used in error messages.
820template <typename T>
821Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") {
822 auto V = parse(JSON);
823 if (!V)
824 return V.takeError();
825 Path::Root R(RootName);
826 T Result;
827 if (fromJSON(*V, Result, R))
828 return std::move(Result);
829 return R.getError();
830}
831
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100832/// json::OStream allows writing well-formed JSON without materializing
833/// all structures as json::Value ahead of time.
834/// It's faster, lower-level, and less safe than OS << json::Value.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200835/// It also allows emitting more constructs, such as comments.
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100836///
837/// Only one "top-level" object can be written to a stream.
838/// Simplest usage involves passing lambdas (Blocks) to fill in containers:
839///
840/// json::OStream J(OS);
841/// J.array([&]{
842/// for (const Event &E : Events)
843/// J.object([&] {
844/// J.attribute("timestamp", int64_t(E.Time));
845/// J.attributeArray("participants", [&] {
846/// for (const Participant &P : E.Participants)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200847/// J.value(P.toString());
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100848/// });
849/// });
850/// });
851///
852/// This would produce JSON like:
853///
854/// [
855/// {
856/// "timestamp": 19287398741,
857/// "participants": [
858/// "King Kong",
859/// "Miley Cyrus",
860/// "Cleopatra"
861/// ]
862/// },
863/// ...
864/// ]
865///
866/// The lower level begin/end methods (arrayBegin()) are more flexible but
867/// care must be taken to pair them correctly:
868///
869/// json::OStream J(OS);
870// J.arrayBegin();
871/// for (const Event &E : Events) {
872/// J.objectBegin();
873/// J.attribute("timestamp", int64_t(E.Time));
874/// J.attributeBegin("participants");
875/// for (const Participant &P : E.Participants)
876/// J.value(P.toString());
877/// J.attributeEnd();
878/// J.objectEnd();
879/// }
880/// J.arrayEnd();
881///
882/// If the call sequence isn't valid JSON, asserts will fire in debug mode.
883/// This can be mismatched begin()/end() pairs, trying to emit attributes inside
884/// an array, and so on.
885/// With asserts disabled, this is undefined behavior.
886class OStream {
887 public:
888 using Block = llvm::function_ref<void()>;
889 // If IndentSize is nonzero, output is pretty-printed.
890 explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
891 : OS(OS), IndentSize(IndentSize) {
892 Stack.emplace_back();
893 }
894 ~OStream() {
895 assert(Stack.size() == 1 && "Unmatched begin()/end()");
896 assert(Stack.back().Ctx == Singleton);
897 assert(Stack.back().HasValue && "Did not write top-level value");
898 }
899
900 /// Flushes the underlying ostream. OStream does not buffer internally.
901 void flush() { OS.flush(); }
902
903 // High level functions to output a value.
904 // Valid at top-level (exactly once), in an attribute value (exactly once),
905 // or in an array (any number of times).
906
907 /// Emit a self-contained value (number, string, vector<string> etc).
908 void value(const Value &V);
909 /// Emit an array whose elements are emitted in the provided Block.
910 void array(Block Contents) {
911 arrayBegin();
912 Contents();
913 arrayEnd();
914 }
915 /// Emit an object whose elements are emitted in the provided Block.
916 void object(Block Contents) {
917 objectBegin();
918 Contents();
919 objectEnd();
920 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200921 /// Emit an externally-serialized value.
922 /// The caller must write exactly one valid JSON value to the provided stream.
923 /// No validation or formatting of this value occurs.
924 void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) {
925 rawValueBegin();
926 Contents(OS);
927 rawValueEnd();
928 }
929 void rawValue(llvm::StringRef Contents) {
930 rawValue([&](raw_ostream &OS) { OS << Contents; });
931 }
932 /// Emit a JavaScript comment associated with the next printed value.
933 /// The string must be valid until the next attribute or value is emitted.
934 /// Comments are not part of standard JSON, and many parsers reject them!
935 void comment(llvm::StringRef);
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100936
937 // High level functions to output object attributes.
938 // Valid only within an object (any number of times).
939
940 /// Emit an attribute whose value is self-contained (number, vector<int> etc).
941 void attribute(llvm::StringRef Key, const Value& Contents) {
942 attributeImpl(Key, [&] { value(Contents); });
943 }
944 /// Emit an attribute whose value is an array with elements from the Block.
945 void attributeArray(llvm::StringRef Key, Block Contents) {
946 attributeImpl(Key, [&] { array(Contents); });
947 }
948 /// Emit an attribute whose value is an object with attributes from the Block.
949 void attributeObject(llvm::StringRef Key, Block Contents) {
950 attributeImpl(Key, [&] { object(Contents); });
951 }
952
953 // Low-level begin/end functions to output arrays, objects, and attributes.
954 // Must be correctly paired. Allowed contexts are as above.
955
956 void arrayBegin();
957 void arrayEnd();
958 void objectBegin();
959 void objectEnd();
960 void attributeBegin(llvm::StringRef Key);
961 void attributeEnd();
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200962 raw_ostream &rawValueBegin();
963 void rawValueEnd();
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100964
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200965private:
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100966 void attributeImpl(llvm::StringRef Key, Block Contents) {
967 attributeBegin(Key);
968 Contents();
969 attributeEnd();
970 }
971
972 void valueBegin();
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200973 void flushComment();
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100974 void newline();
975
976 enum Context {
977 Singleton, // Top level, or object attribute.
978 Array,
979 Object,
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200980 RawValue, // External code writing a value to OS directly.
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100981 };
982 struct State {
983 Context Ctx = Singleton;
984 bool HasValue = false;
985 };
986 llvm::SmallVector<State, 16> Stack; // Never empty.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200987 llvm::StringRef PendingComment;
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100988 llvm::raw_ostream &OS;
989 unsigned IndentSize;
990 unsigned Indent = 0;
991};
992
993/// Serializes this Value to JSON, writing it to the provided stream.
994/// The formatting is compact (no extra whitespace) and deterministic.
995/// For pretty-printing, use the formatv() format_provider below.
996inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
997 OStream(OS).value(V);
998 return OS;
999}
Andrew Scullcdfcccc2018-10-05 20:58:37 +01001000} // namespace json
1001
1002/// Allow printing json::Value with formatv().
1003/// The default style is basic/compact formatting, like operator<<.
1004/// A format string like formatv("{0:2}", Value) pretty-prints with indent 2.
1005template <> struct format_provider<llvm::json::Value> {
1006 static void format(const llvm::json::Value &, raw_ostream &, StringRef);
1007};
1008} // namespace llvm
1009
1010#endif