1/*
2 * Copyright 2016-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/*
17 * Copyright (c) 2015, Facebook, Inc.
18 * All rights reserved.
19 *
20 * This source code is licensed under the BSD-style license found in the
21 * LICENSE file in the root directory of this source tree. An additional grant
22 * of patent rights can be found in the PATENTS file in the same directory.
23 *
24 */
25#pragma once
26
27#include <folly/CPortability.h>
28#include <folly/ScopeGuard.h>
29#include <folly/dynamic.h>
30
31namespace folly {
32
33/**
34 * DynamicParser provides a tiny DSL for easily, correctly, and losslessly
35 * parsing a folly::dynamic into any other representation.
36 *
37 * To make this concrete, this lets you take a JSON config that potentially
38 * contains user errors, and parse __all__ of its valid parts, while
39 * automatically and __reversibly__ recording any parts that cause errors:
40 *
41 * {"my values": {
42 * "an int": "THIS WILL BE RECORDED AS AN ERROR, BUT WE'LL PARSE THE REST",
43 * "a double": 3.1415,
44 * "keys & values": {
45 * "the sky is blue": true,
46 * "THIS WILL ALSO BE RECORDED AS AN ERROR": "cheese",
47 * "2+2=5": false,
48 * }
49 * }}
50 *
51 * To parse this JSON, you need no exception handling, it is as easy as:
52 *
53 * folly::dynamic d = ...; // Input
54 * int64_t integer; // Three outputs
55 * double real;
56 * std::map<std::string, bool> enabled_widgets;
57 * DynamicParser p(DynamicParser::OnError::RECORD, &d);
58 * p.required("my values", [&]() {
59 * p.optional("an int", [&](int64_t v) { integer = v; });
60 * p.required("a double", [&](double v) { real = v; });
61 * p.optional("keys & values", [&]() {
62 * p.objectItems([&](std::string widget, bool enabled) {
63 * enabled_widgets.emplace(widget, enabled);
64 * });
65 * });
66 * });
67 *
68 * Your code in the lambdas can throw, and this will be reported just like
69 * missing key and type conversion errors, with precise context on what part
70 * of the folly::dynamic caused the error. No need to throw:
71 * std::runtime_error("Value X at key Y caused a flux capacitor overload")
72 * This will do:
73 * std::runtime_error("Flux capacitor overload")
74 *
75 * == Keys and values are auto-converted to match your callback ==
76 *
77 * DynamicParser's optional(), required(), objectItems(), and
78 * arrayItems() automatically convert the current key and value to match the
79 * signature of the provided callback. parser.key() and parser.value() can
80 * be used to access the same data without conversion.
81 *
82 * The following types are supported -- you should generally take arguments
83 * by-value, or by-const-reference for dynamics & strings you do not copy.
84 *
85 * Key: folly::dynamic (no conversion), std::string, int64_t
86 * Value: folly::dynamic (no conversion), int64_t, bool, double, std::string
87 *
88 * There are 21 supported callback signatures, of three kinds:
89 *
90 * 1: No arguments -- useful if you will just call more parser methods.
91 *
92 * 5: The value alone -- the common case for optional() and required().
93 * [&](whatever_t value) {}
94 *
95 * 15: Both the key and the value converted according to the rules above:
96 * [&](whatever_t key, whatever_t) {}
97 *
98 * NB: The key alone should be rarely needed, but these callback styles
99 * provide it with no conversion overhead, and only minimal verbosity:
100 * [&](const std::string& k, const folly::dynamic&) {}
101 * [&]() { auto k = p.key().asString(); }
102 *
103 * == How `releaseErrors()` can make your parse lossless ==
104 *
105 * If you write parsing code by hand, you usually end up with error-handling
106 * resembling that of OnError::THROW -- the first error you hit aborts the
107 * whole parse, and you report it.
108 *
109 * OnError::RECORD offers a more user-friendly alternative for "parse,
110 * serialize, re-parse" pipelines, akin to what web-forms do. All
111 * exception-causing parts are losslessly recorded in a parallel
112 * folly::dynamic, available via releaseErrors() at the end of the parse.
113 *
114 * Suppose we fail to look up "key1" at the root, and hit a value error in
115 * "key2": {"subkey2": ...}. The error report will have the form:
116 *
117 * {"nested": {
118 * "key_errors": {"key1": "explanatory message"},
119 * "value": <whole input>,
120 * "nested": { "key2": { "nested": {
121 * "subkey2": {"value": <original value>, "error": "message"}
122 * } } }
123 * }}
124 *
125 * Errors in array items are handled just the same, but using integer keys.
126 *
127 * The advantage of this approach is that your parsing can throw wherever,
128 * and DynamicParser isolates it, allowing the good parts to parse.
129 *
130 * Put another way, this makes it easy to implement a transformation that
131 * splits a `folly::dynamic` into a "parsed" part (which might be your
132 * struct meant for runtime use), and a matching "errors" part. As long as
133 * your successful parses are lossless, you can always reconstruct the
134 * original input from the parse output and the recorded "errors".
135 *
136 * == Limitations ==
137 *
138 * - The input dynamic should be an object or array. wrapError() could be
139 * exposed to allow parsing single scalars, but this would not be a
140 * significant usability improvement over try-catch.
141 *
142 * - Do NOT try to parse the same part of the input dynamic twice. You
143 * might report multiple value errors, which is currently unsupported.
144 *
145 * - optional() does not support defaulting. This is unavoidable, since
146 * DynamicParser does not dictate how you record parsed data. If your
147 * parse writes into an output struct, then it ought to be initialized at
148 * construction time. If your output is initialized to default values,
149 * then you need no "default" feature. If it is not initialized, you are
150 * in trouble anyway. Suppose your optional() parse hits an error. What
151 * does your output contain?
152 * - Uninitialized data :(
153 * - You rely on an optional() feature to fall back to parsing some
154 * default dynamic. Sadly, the default hits a parse error. Now what?
155 * Since there is no good way to default, DynamicParser leaves it out.
156 *
157 * == Future: un-parsed items ==
158 *
159 * DynamicParser could support erroring on un-parsed items -- the parts of
160 * the folly::dynamic, which were never asked for. Here is an ok design:
161 *
162 * (i) At the start of parsing any value, the user may call:
163 * parser.recursivelyForbidUnparsed();
164 * parser.recursivelyAllowUnparsed();
165 * parser.locallyForbidUnparsed();
166 * parser.locallyAllowUnparsed();
167 *
168 * (ii) At the end of the parse, any unparsed items are dumped to "errors".
169 * For example, failing to parse index 1 out of ["v1", "v2", "v3"] yields:
170 * "nested": {1: {"unparsed": "v2"}}
171 * or perhaps more verbosely:
172 * "nested": {1: {"error": "unparsed value", "value": "v2"}}
173 *
174 * By default, unparsed items are allowed. Calling a "forbid" function after
175 * some keys have already been parsed is allowed to fail (this permits a
176 * lazy implementation, which has minimal overhead when "forbid" is not
177 * requested).
178 *
179 * == Future: multiple value errors ==
180 *
181 * The present contract is that exactly one value error is reported per
182 * location in the input (multiple key lookup errors are, of course,
183 * supported). If the need arises, multiple value errors could easily be
184 * supported by replacing the "error" string with an "errors" array.
185 */
186
187namespace detail {
188// Why do DynamicParser error messages use folly::dynamic pseudo-JSON?
189// Firstly, the input dynamic need not correspond to valid JSON. Secondly,
190// wrapError() uses integer-keyed objects to report arrary-indexing errors.
191std::string toPseudoJson(const folly::dynamic& d);
192} // namespace detail
193
194/**
195 * With DynamicParser::OnError::THROW, reports the first error.
196 * It is forbidden to call releaseErrors() if you catch this.
197 */
198struct FOLLY_EXPORT DynamicParserParseError : public std::runtime_error {
199 explicit DynamicParserParseError(folly::dynamic error)
200 : std::runtime_error(folly::to<std::string>(
201 "DynamicParserParseError: ",
202 detail::toPseudoJson(error))),
203 error_(std::move(error)) {}
204 /**
205 * Structured just like releaseErrors(), but with only 1 error inside:
206 * {"nested": {"key1": {"nested": {"key2": {"error": "err", "value": 5}}}}}
207 * or:
208 * {"nested": {"key1": {"key_errors": {"key3": "err"}, "value": 7}}}
209 */
210 const folly::dynamic& error() const {
211 return error_;
212 }
213
214 private:
215 folly::dynamic error_;
216};
217
218/**
219 * When DynamicParser is used incorrectly, it will throw this exception
220 * instead of reporting an error via releaseErrors(). It is unsafe to call
221 * any parser methods after catching a LogicError.
222 */
223struct FOLLY_EXPORT DynamicParserLogicError : public std::logic_error {
224 template <typename... Args>
225 explicit DynamicParserLogicError(Args&&... args)
226 : std::logic_error(folly::to<std::string>(std::forward<Args>(args)...)) {}
227};
228
229class DynamicParser {
230 public:
231 enum class OnError {
232 // After parsing, releaseErrors() reports all parse errors.
233 // Throws DynamicParserLogicError on programmer errors.
234 RECORD,
235 // Throws DynamicParserParseError on the first parse error, or
236 // DynamicParserLogicError on programmer errors.
237 THROW,
238 };
239
240 // You MUST NOT destroy `d` before the parser.
241 DynamicParser(OnError on_error, const folly::dynamic* d)
242 : onError_(on_error), stack_(d) {} // Always access input through stack_
243
244 /**
245 * Once you finished the entire parse, returns a structured description of
246 * all parse errors (see top-of-file docblock). May ONLY be called once.
247 * May NOT be called if the parse threw any kind of exception. Returns an
248 * empty object for successful OnError::THROW parsers.
249 */
250 folly::dynamic releaseErrors() {
251 return stack_.releaseErrors();
252 }
253
254 /**
255 * Error-wraps fn(auto-converted key & value) if d[key] is set. The
256 * top-of-file docblock explains the auto-conversion.
257 */
258 template <typename Fn>
259 void optional(const folly::dynamic& key, Fn);
260
261 // Like optional(), but reports an error if d[key] does not exist.
262 template <typename Fn>
263 void required(const folly::dynamic& key, Fn);
264
265 /**
266 * Iterate over the current object's keys and values. Report each item's
267 * errors under its own key in a matching sub-object of "errors".
268 */
269 template <typename Fn>
270 void objectItems(Fn);
271
272 /**
273 * Like objectItems() -- arrays are treated identically to objects with
274 * integer keys from 0 to size() - 1.
275 */
276 template <typename Fn>
277 void arrayItems(Fn);
278
279 /**
280 * The key currently being parsed (integer if inside an array). Throws if
281 * called outside of a parser callback.
282 */
283 inline const folly::dynamic& key() const {
284 return stack_.key();
285 }
286 /**
287 * The value currently being parsed (initially, the input dynamic).
288 * Throws if parsing nullptr, or parsing after releaseErrors().
289 */
290 inline const folly::dynamic& value() const {
291 return stack_.value();
292 }
293
294 /**
295 * By default, DynamicParser's "nested" object coerces all keys to
296 * strings, whether from arrayItems() or from p.optional(some_int, ...),
297 * to allow errors be serialized to JSON. If you are parsing non-JSON
298 * dynamic objects with non-string keys, this is problematic. When set to
299 * true, "nested" objects will report integer keys for errors coming from
300 * inside arrays, or the original key type from inside values of objects.
301 */
302 DynamicParser& setAllowNonStringKeyErrors(bool b) {
303 allowNonStringKeyErrors_ = b;
304 return *this;
305 }
306
307 private:
308 /**
309 * If `fn` throws an exception, wrapError() catches it and inserts an
310 * enriched description into stack_.errors_. If lookup_key is non-null,
311 * reports a key lookup error in "key_errors", otherwise reportse a value
312 * error in "error".
313 *
314 * Not public because that would encourage users to report multiple errors
315 * per input part, which is currently unsupported. It does not currently
316 * seem like normal user code should need this.
317 */
318 template <typename Fn>
319 void wrapError(const folly::dynamic* lookup_key, Fn);
320
321 void reportError(const folly::dynamic* lookup_k, const std::exception& ex);
322
323 template <typename Fn>
324 void parse(const folly::dynamic& key, const folly::dynamic& value, Fn fn);
325
326 // All of the above business logic obtains the part of the folly::dynamic
327 // it is examining (and the location for reporting errors) via this class,
328 // which lets it correctly handle nesting.
329 struct ParserStack {
330 struct Pop {
331 explicit Pop(ParserStack* sp)
332 : key_(sp->key_), value_(sp->value_), stackPtr_(sp) {}
333 void operator()() noexcept; // ScopeGuard requires noexcept
334 private:
335 const folly::dynamic* key_;
336 const folly::dynamic* value_;
337 ParserStack* stackPtr_;
338 };
339 struct PopGuard {
340 explicit PopGuard(ParserStack* sp) : pop_(in_place, sp) {}
341 ~PopGuard() {
342 pop_ && ((*pop_)(), true);
343 }
344
345 private:
346 Optional<Pop> pop_;
347 };
348
349 explicit ParserStack(const folly::dynamic* input)
350 : value_(input),
351 errors_(folly::dynamic::object()),
352 subErrors_({&errors_}) {}
353
354 // Not copiable or movable due to numerous internal pointers
355 ParserStack(const ParserStack&) = delete;
356 ParserStack& operator=(const ParserStack&) = delete;
357 ParserStack(ParserStack&&) = delete;
358 ParserStack& operator=(ParserStack&&) = delete;
359
360 // Lets user code nest parser calls by recording current key+value and
361 // returning an RAII guard to restore the old one. `noexcept` since it
362 // is used unwrapped.
363 PopGuard push(const folly::dynamic& k, const folly::dynamic& v) noexcept;
364
365 // Throws DynamicParserLogicError if used outside of a parsing function.
366 inline const folly::dynamic& key() const;
367 // Throws DynamicParserLogicError if used after releaseErrors().
368 inline const folly::dynamic& value() const;
369
370 // Lazily creates new "nested" sub-objects in errors_.
371 folly::dynamic& errors(bool allow_non_string_keys) noexcept;
372
373 // The user invokes this at most once after the parse is done.
374 folly::dynamic releaseErrors();
375
376 // Invoked on error when using OnError::THROW.
377 [[noreturn]] void throwErrors();
378
379 private:
380 friend struct Pop;
381
382 folly::dynamic releaseErrorsImpl(); // for releaseErrors() & throwErrors()
383
384 // Null outside of a parsing function.
385 const folly::dynamic* key_{nullptr};
386 // Null on errors: when the input was nullptr, or after releaseErrors().
387 const folly::dynamic* value_;
388
389 // An object containing some of these keys:
390 // "key_errors" -- {"key": "description of error looking up said key"}
391 // "error" -- why did we fail to parse this value?
392 // "value" -- a copy of the input causing the error, and
393 // "nested" -- {"key" or integer for arrays: <another errors_ object>}
394 //
395 // "nested" will contain identically structured objects with keys (array
396 // indices) identifying the origin of the errors. Of course, "input"
397 // would no longer refer to the whole input, but to a part.
398 folly::dynamic errors_;
399 // We only materialize errors_ sub-objects when needed. This stores keys
400 // for unmaterialized errors, from outermost to innermost.
401 std::vector<const folly::dynamic*> unmaterializedSubErrorKeys_;
402 // Materialized errors, from outermost to innermost
403 std::vector<folly::dynamic*> subErrors_; // Point into errors_
404 };
405
406 OnError onError_;
407 ParserStack stack_;
408 bool allowNonStringKeyErrors_{false}; // See the setter's docblock.
409};
410
411} // namespace folly
412
413#include <folly/experimental/DynamicParser-inl.h>
414