DynamicParser.h source code [folly/experimental/DynamicParser.h]

1	/*
2	* Copyright 2016-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	/*
17	* Copyright (c) 2015, Facebook, Inc.
18	* All rights reserved.
19	*
20	* This source code is licensed under the BSD-style license found in the
21	* LICENSE file in the root directory of this source tree. An additional grant
22	* of patent rights can be found in the PATENTS file in the same directory.
23	*
24	*/
25	#pragma once
26
27	#include <folly/CPortability.h>
28	#include <folly/ScopeGuard.h>
29	#include <folly/dynamic.h>
30
31	namespace folly {
32
33	/**
34	* DynamicParser provides a tiny DSL for easily, correctly, and losslessly
35	* parsing a folly::dynamic into any other representation.
36	*
37	* To make this concrete, this lets you take a JSON config that potentially
38	* contains user errors, and parse __all__ of its valid parts, while
39	* automatically and __reversibly__ recording any parts that cause errors:
40	*
41	* {"my values": {
42	* "an int": "THIS WILL BE RECORDED AS AN ERROR, BUT WE'LL PARSE THE REST",
43	* "a double": 3.1415,
44	* "keys & values": {
45	* "the sky is blue": true,
46	* "THIS WILL ALSO BE RECORDED AS AN ERROR": "cheese",
47	* "2+2=5": false,
48	* }
49	* }}
50	*
51	* To parse this JSON, you need no exception handling, it is as easy as:
52	*
53	* folly::dynamic d = ...; // Input
54	* int64_t integer; // Three outputs
55	* double real;
56	* std::map<std::string, bool> enabled_widgets;
57	* DynamicParser p(DynamicParser::OnError::RECORD, &d);
58	* p.required("my values", [&]() {
59	* p.optional("an int", [&](int64_t v) { integer = v; });
60	* p.required("a double", [&](double v) { real = v; });
61	* p.optional("keys & values", [&]() {
62	* p.objectItems([&](std::string widget, bool enabled) {
63	* enabled_widgets.emplace(widget, enabled);
64	* });
65	* });
66	* });
67	*
68	* Your code in the lambdas can throw, and this will be reported just like
69	* missing key and type conversion errors, with precise context on what part
70	* of the folly::dynamic caused the error. No need to throw:
71	* std::runtime_error("Value X at key Y caused a flux capacitor overload")
72	* This will do:
73	* std::runtime_error("Flux capacitor overload")
74	*
75	* == Keys and values are auto-converted to match your callback ==
76	*
77	* DynamicParser's optional(), required(), objectItems(), and
78	* arrayItems() automatically convert the current key and value to match the
79	* signature of the provided callback. parser.key() and parser.value() can
80	* be used to access the same data without conversion.
81	*
82	* The following types are supported -- you should generally take arguments
83	* by-value, or by-const-reference for dynamics & strings you do not copy.
84	*
85	* Key: folly::dynamic (no conversion), std::string, int64_t
86	* Value: folly::dynamic (no conversion), int64_t, bool, double, std::string
87	*
88	* There are 21 supported callback signatures, of three kinds:
89	*
90	* 1: No arguments -- useful if you will just call more parser methods.
91	*
92	* 5: The value alone -- the common case for optional() and required().
93	* [&](whatever_t value) {}
94	*
95	* 15: Both the key and the value converted according to the rules above:
96	* [&](whatever_t key, whatever_t) {}
97	*
98	* NB: The key alone should be rarely needed, but these callback styles
99	* provide it with no conversion overhead, and only minimal verbosity:
100	* [&](const std::string& k, const folly::dynamic&) {}
101	* [&]() { auto k = p.key().asString(); }
102	*
103	* == How `releaseErrors()` can make your parse lossless ==
104	*
105	* If you write parsing code by hand, you usually end up with error-handling
106	* resembling that of OnError::THROW -- the first error you hit aborts the
107	* whole parse, and you report it.
108	*
109	* OnError::RECORD offers a more user-friendly alternative for "parse,
110	* serialize, re-parse" pipelines, akin to what web-forms do. All
111	* exception-causing parts are losslessly recorded in a parallel
112	* folly::dynamic, available via releaseErrors() at the end of the parse.
113	*
114	* Suppose we fail to look up "key1" at the root, and hit a value error in
115	* "key2": {"subkey2": ...}. The error report will have the form:
116	*
117	* {"nested": {
118	* "key_errors": {"key1": "explanatory message"},
119	* "value": <whole input>,
120	* "nested": { "key2": { "nested": {
121	* "subkey2": {"value": <original value>, "error": "message"}
122	* } } }
123	* }}
124	*
125	* Errors in array items are handled just the same, but using integer keys.
126	*
127	* The advantage of this approach is that your parsing can throw wherever,
128	* and DynamicParser isolates it, allowing the good parts to parse.
129	*
130	* Put another way, this makes it easy to implement a transformation that
131	* splits a `folly::dynamic` into a "parsed" part (which might be your
132	* struct meant for runtime use), and a matching "errors" part. As long as
133	* your successful parses are lossless, you can always reconstruct the
134	* original input from the parse output and the recorded "errors".
135	*
136	* == Limitations ==
137	*
138	* - The input dynamic should be an object or array. wrapError() could be
139	* exposed to allow parsing single scalars, but this would not be a
140	* significant usability improvement over try-catch.
141	*
142	* - Do NOT try to parse the same part of the input dynamic twice. You
143	* might report multiple value errors, which is currently unsupported.
144	*
145	* - optional() does not support defaulting. This is unavoidable, since
146	* DynamicParser does not dictate how you record parsed data. If your
147	* parse writes into an output struct, then it ought to be initialized at
148	* construction time. If your output is initialized to default values,
149	* then you need no "default" feature. If it is not initialized, you are
150	* in trouble anyway. Suppose your optional() parse hits an error. What
151	* does your output contain?
152	* - Uninitialized data :(
153	* - You rely on an optional() feature to fall back to parsing some
154	* default dynamic. Sadly, the default hits a parse error. Now what?
155	* Since there is no good way to default, DynamicParser leaves it out.
156	*
157	* == Future: un-parsed items ==
158	*
159	* DynamicParser could support erroring on un-parsed items -- the parts of
160	* the folly::dynamic, which were never asked for. Here is an ok design:
161	*
162	* (i) At the start of parsing any value, the user may call:
163	* parser.recursivelyForbidUnparsed();
164	* parser.recursivelyAllowUnparsed();
165	* parser.locallyForbidUnparsed();
166	* parser.locallyAllowUnparsed();
167	*
168	* (ii) At the end of the parse, any unparsed items are dumped to "errors".
169	* For example, failing to parse index 1 out of ["v1", "v2", "v3"] yields:
170	* "nested": {1: {"unparsed": "v2"}}
171	* or perhaps more verbosely:
172	* "nested": {1: {"error": "unparsed value", "value": "v2"}}
173	*
174	* By default, unparsed items are allowed. Calling a "forbid" function after
175	* some keys have already been parsed is allowed to fail (this permits a
176	* lazy implementation, which has minimal overhead when "forbid" is not
177	* requested).
178	*
179	* == Future: multiple value errors ==
180	*
181	* The present contract is that exactly one value error is reported per
182	* location in the input (multiple key lookup errors are, of course,
183	* supported). If the need arises, multiple value errors could easily be
184	* supported by replacing the "error" string with an "errors" array.
185	*/
186
187	namespace detail {
188	// Why do DynamicParser error messages use folly::dynamic pseudo-JSON?
189	// Firstly, the input dynamic need not correspond to valid JSON. Secondly,
190	// wrapError() uses integer-keyed objects to report arrary-indexing errors.
191	std::string toPseudoJson(const folly::dynamic& d);
192	} // namespace detail
193
194	/**
195	* With DynamicParser::OnError::THROW, reports the first error.
196	* It is forbidden to call releaseErrors() if you catch this.
197	*/
198	struct FOLLY_EXPORT DynamicParserParseError : public std::runtime_error {
199	explicit DynamicParserParseError(folly::dynamic error)
200	: std::runtime_error (folly::to<std::string>(
201	"DynamicParserParseError: ",
202	detail::toPseudoJson(error))),
203	error_(std::move(error)) {}
204	/**
205	* Structured just like releaseErrors(), but with only 1 error inside:
206	* {"nested": {"key1": {"nested": {"key2": {"error": "err", "value": 5}}}}}
207	* or:
208	* {"nested": {"key1": {"key_errors": {"key3": "err"}, "value": 7}}}
209	*/
210	const folly::dynamic& error() const {
211	return error_;
212	}
213
214	private:
215	folly::dynamic error_;
216	};
217
218	/**
219	* When DynamicParser is used incorrectly, it will throw this exception
220	* instead of reporting an error via releaseErrors(). It is unsafe to call
221	* any parser methods after catching a LogicError.
222	*/
223	struct FOLLY_EXPORT DynamicParserLogicError : public std::logic_error {
224	template <typename... Args>
225	explicit DynamicParserLogicError(Args&&... args)
226	: std::logic_error(folly::to<std::string>(std::forward<Args>(args)...)) {}
227	};
228
229	class DynamicParser {
230	public:
231	enum class OnError {
232	// After parsing, releaseErrors() reports all parse errors.
233	// Throws DynamicParserLogicError on programmer errors.
234	RECORD,
235	// Throws DynamicParserParseError on the first parse error, or
236	// DynamicParserLogicError on programmer errors.
237	THROW,
238	};
239
240	// You MUST NOT destroy `d` before the parser.
241	DynamicParser(OnError on_error, const folly::dynamic* d)
242	: onError_(on_error), stack_(d) {} // Always access input through stack_
243
244	/**
245	* Once you finished the entire parse, returns a structured description of
246	* all parse errors (see top-of-file docblock). May ONLY be called once.
247	* May NOT be called if the parse threw any kind of exception. Returns an
248	* empty object for successful OnError::THROW parsers.
249	*/
250	folly::dynamic releaseErrors() {
251	return stack_.releaseErrors();
252	}
253
254	/**
255	* Error-wraps fn(auto-converted key & value) if d[key] is set. The
256	* top-of-file docblock explains the auto-conversion.
257	*/
258	template <typename Fn>
259	void optional(const folly::dynamic& key, Fn);
260
261	// Like optional(), but reports an error if d[key] does not exist.
262	template <typename Fn>
263	void required(const folly::dynamic& key, Fn);
264
265	/**
266	* Iterate over the current object's keys and values. Report each item's
267	* errors under its own key in a matching sub-object of "errors".
268	*/
269	template <typename Fn>
270	void objectItems(Fn);
271
272	/**
273	* Like objectItems() -- arrays are treated identically to objects with
274	* integer keys from 0 to size() - 1.
275	*/
276	template <typename Fn>
277	void arrayItems(Fn);
278
279	/**
280	* The key currently being parsed (integer if inside an array). Throws if
281	* called outside of a parser callback.
282	*/
283	inline const folly::dynamic& key() const {
284	return stack_.key();
285	}
286	/**
287	* The value currently being parsed (initially, the input dynamic).
288	* Throws if parsing nullptr, or parsing after releaseErrors().
289	*/
290	inline const folly::dynamic& value() const {
291	return stack_.value();
292	}
293
294	/**
295	* By default, DynamicParser's "nested" object coerces all keys to
296	* strings, whether from arrayItems() or from p.optional(some_int, ...),
297	* to allow errors be serialized to JSON. If you are parsing non-JSON
298	* dynamic objects with non-string keys, this is problematic. When set to
299	* true, "nested" objects will report integer keys for errors coming from
300	* inside arrays, or the original key type from inside values of objects.
301	*/
302	DynamicParser& setAllowNonStringKeyErrors(bool b) {
303	allowNonStringKeyErrors_ = b;
304	return *this;
305	}
306
307	private:
308	/**
309	* If `fn` throws an exception, wrapError() catches it and inserts an
310	* enriched description into stack_.errors_. If lookup_key is non-null,
311	* reports a key lookup error in "key_errors", otherwise reportse a value
312	* error in "error".
313	*
314	* Not public because that would encourage users to report multiple errors
315	* per input part, which is currently unsupported. It does not currently
316	* seem like normal user code should need this.
317	*/
318	template <typename Fn>
319	void wrapError(const folly::dynamic* lookup_key, Fn);
320
321	void reportError(const folly::dynamic* lookup_k, const std::exception& ex);
322
323	template <typename Fn>
324	void parse(const folly::dynamic& key, const folly::dynamic& value, Fn fn);
325
326	// All of the above business logic obtains the part of the folly::dynamic
327	// it is examining (and the location for reporting errors) via this class,
328	// which lets it correctly handle nesting.
329	struct ParserStack {
330	struct Pop {
331	explicit Pop(ParserStack* sp)
332	: key_(sp->key_), value_(sp->value_), stackPtr_(sp) {}
333	void operator()() noexcept; // ScopeGuard requires noexcept
334	private:
335	const folly::dynamic* key_;
336	const folly::dynamic* value_;
337	ParserStack* stackPtr_;
338	};
339	struct PopGuard {
340	explicit PopGuard(ParserStack* sp) : pop_(in_place, sp) {}
341	~PopGuard() {
342	pop_ && ((pop_)(), true*);
343	}
344
345	private:
346	Optional<Pop> pop_;
347	};
348
349	explicit ParserStack(const folly::dynamic* input)
350	: value_(input),
351	errors_(folly::dynamic::object()),
352	subErrors_({&errors_}) {}
353
354	// Not copiable or movable due to numerous internal pointers
355	ParserStack(const ParserStack&) = delete;
356	ParserStack& operator=(const ParserStack&) = delete;
357	ParserStack(ParserStack&&) = delete;
358	ParserStack& operator=(ParserStack&&) = delete;
359
360	// Lets user code nest parser calls by recording current key+value and
361	// returning an RAII guard to restore the old one. `noexcept` since it
362	// is used unwrapped.
363	PopGuard push(const folly::dynamic& k, const folly::dynamic& v) noexcept;
364
365	// Throws DynamicParserLogicError if used outside of a parsing function.
366	inline const folly::dynamic& key() const;
367	// Throws DynamicParserLogicError if used after releaseErrors().
368	inline const folly::dynamic& value() const;
369
370	// Lazily creates new "nested" sub-objects in errors_.
371	folly::dynamic& errors(bool allow_non_string_keys) noexcept;
372
373	// The user invokes this at most once after the parse is done.
374	folly::dynamic releaseErrors();
375
376	// Invoked on error when using OnError::THROW.
377	[[noreturn]] void throwErrors();
378
379	private:
380	friend struct Pop;
381
382	folly::dynamic releaseErrorsImpl(); // for releaseErrors() & throwErrors()
383
384	// Null outside of a parsing function.
385	const folly::dynamic* key_{nullptr};
386	// Null on errors: when the input was nullptr, or after releaseErrors().
387	const folly::dynamic* value_;
388
389	// An object containing some of these keys:
390	// "key_errors" -- {"key": "description of error looking up said key"}
391	// "error" -- why did we fail to parse this value?
392	// "value" -- a copy of the input causing the error, and
393	// "nested" -- {"key" or integer for arrays: <another errors_ object>}
394	//
395	// "nested" will contain identically structured objects with keys (array
396	// indices) identifying the origin of the errors. Of course, "input"
397	// would no longer refer to the whole input, but to a part.
398	folly::dynamic errors_;
399	// We only materialize errors_ sub-objects when needed. This stores keys
400	// for unmaterialized errors, from outermost to innermost.
401	std::vector<const folly::dynamic*> unmaterializedSubErrorKeys_;
402	// Materialized errors, from outermost to innermost
403	std::vector<folly::dynamic> subErrors_; // Point into errors_*
404	};
405
406	OnError onError_;
407	ParserStack stack_;
408	bool allowNonStringKeyErrors_{false}; // See the setter's docblock.
409	};
410
411	} // namespace folly
412
413	#include <folly/experimental/DynamicParser-inl.h>
414

Browse the source code of folly/experimental/DynamicParser.h