1 | /* |
2 | * Copyright 2016-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | /* |
17 | * Copyright (c) 2015, Facebook, Inc. |
18 | * All rights reserved. |
19 | * |
20 | * This source code is licensed under the BSD-style license found in the |
21 | * LICENSE file in the root directory of this source tree. An additional grant |
22 | * of patent rights can be found in the PATENTS file in the same directory. |
23 | * |
24 | */ |
25 | #pragma once |
26 | |
27 | #include <folly/CPortability.h> |
28 | #include <folly/ScopeGuard.h> |
29 | #include <folly/dynamic.h> |
30 | |
31 | namespace folly { |
32 | |
33 | /** |
34 | * DynamicParser provides a tiny DSL for easily, correctly, and losslessly |
35 | * parsing a folly::dynamic into any other representation. |
36 | * |
37 | * To make this concrete, this lets you take a JSON config that potentially |
38 | * contains user errors, and parse __all__ of its valid parts, while |
39 | * automatically and __reversibly__ recording any parts that cause errors: |
40 | * |
41 | * {"my values": { |
42 | * "an int": "THIS WILL BE RECORDED AS AN ERROR, BUT WE'LL PARSE THE REST", |
43 | * "a double": 3.1415, |
44 | * "keys & values": { |
45 | * "the sky is blue": true, |
46 | * "THIS WILL ALSO BE RECORDED AS AN ERROR": "cheese", |
47 | * "2+2=5": false, |
48 | * } |
49 | * }} |
50 | * |
51 | * To parse this JSON, you need no exception handling, it is as easy as: |
52 | * |
53 | * folly::dynamic d = ...; // Input |
54 | * int64_t integer; // Three outputs |
55 | * double real; |
56 | * std::map<std::string, bool> enabled_widgets; |
57 | * DynamicParser p(DynamicParser::OnError::RECORD, &d); |
58 | * p.required("my values", [&]() { |
59 | * p.optional("an int", [&](int64_t v) { integer = v; }); |
60 | * p.required("a double", [&](double v) { real = v; }); |
61 | * p.optional("keys & values", [&]() { |
62 | * p.objectItems([&](std::string widget, bool enabled) { |
63 | * enabled_widgets.emplace(widget, enabled); |
64 | * }); |
65 | * }); |
66 | * }); |
67 | * |
68 | * Your code in the lambdas can throw, and this will be reported just like |
69 | * missing key and type conversion errors, with precise context on what part |
70 | * of the folly::dynamic caused the error. No need to throw: |
71 | * std::runtime_error("Value X at key Y caused a flux capacitor overload") |
72 | * This will do: |
73 | * std::runtime_error("Flux capacitor overload") |
74 | * |
75 | * == Keys and values are auto-converted to match your callback == |
76 | * |
77 | * DynamicParser's optional(), required(), objectItems(), and |
78 | * arrayItems() automatically convert the current key and value to match the |
79 | * signature of the provided callback. parser.key() and parser.value() can |
80 | * be used to access the same data without conversion. |
81 | * |
82 | * The following types are supported -- you should generally take arguments |
83 | * by-value, or by-const-reference for dynamics & strings you do not copy. |
84 | * |
85 | * Key: folly::dynamic (no conversion), std::string, int64_t |
86 | * Value: folly::dynamic (no conversion), int64_t, bool, double, std::string |
87 | * |
88 | * There are 21 supported callback signatures, of three kinds: |
89 | * |
90 | * 1: No arguments -- useful if you will just call more parser methods. |
91 | * |
92 | * 5: The value alone -- the common case for optional() and required(). |
93 | * [&](whatever_t value) {} |
94 | * |
95 | * 15: Both the key and the value converted according to the rules above: |
96 | * [&](whatever_t key, whatever_t) {} |
97 | * |
98 | * NB: The key alone should be rarely needed, but these callback styles |
99 | * provide it with no conversion overhead, and only minimal verbosity: |
100 | * [&](const std::string& k, const folly::dynamic&) {} |
101 | * [&]() { auto k = p.key().asString(); } |
102 | * |
103 | * == How `releaseErrors()` can make your parse lossless == |
104 | * |
105 | * If you write parsing code by hand, you usually end up with error-handling |
106 | * resembling that of OnError::THROW -- the first error you hit aborts the |
107 | * whole parse, and you report it. |
108 | * |
109 | * OnError::RECORD offers a more user-friendly alternative for "parse, |
110 | * serialize, re-parse" pipelines, akin to what web-forms do. All |
111 | * exception-causing parts are losslessly recorded in a parallel |
112 | * folly::dynamic, available via releaseErrors() at the end of the parse. |
113 | * |
114 | * Suppose we fail to look up "key1" at the root, and hit a value error in |
115 | * "key2": {"subkey2": ...}. The error report will have the form: |
116 | * |
117 | * {"nested": { |
118 | * "key_errors": {"key1": "explanatory message"}, |
119 | * "value": <whole input>, |
120 | * "nested": { "key2": { "nested": { |
121 | * "subkey2": {"value": <original value>, "error": "message"} |
122 | * } } } |
123 | * }} |
124 | * |
125 | * Errors in array items are handled just the same, but using integer keys. |
126 | * |
127 | * The advantage of this approach is that your parsing can throw wherever, |
128 | * and DynamicParser isolates it, allowing the good parts to parse. |
129 | * |
130 | * Put another way, this makes it easy to implement a transformation that |
131 | * splits a `folly::dynamic` into a "parsed" part (which might be your |
132 | * struct meant for runtime use), and a matching "errors" part. As long as |
133 | * your successful parses are lossless, you can always reconstruct the |
134 | * original input from the parse output and the recorded "errors". |
135 | * |
136 | * == Limitations == |
137 | * |
138 | * - The input dynamic should be an object or array. wrapError() could be |
139 | * exposed to allow parsing single scalars, but this would not be a |
140 | * significant usability improvement over try-catch. |
141 | * |
142 | * - Do NOT try to parse the same part of the input dynamic twice. You |
143 | * might report multiple value errors, which is currently unsupported. |
144 | * |
145 | * - optional() does not support defaulting. This is unavoidable, since |
146 | * DynamicParser does not dictate how you record parsed data. If your |
147 | * parse writes into an output struct, then it ought to be initialized at |
148 | * construction time. If your output is initialized to default values, |
149 | * then you need no "default" feature. If it is not initialized, you are |
150 | * in trouble anyway. Suppose your optional() parse hits an error. What |
151 | * does your output contain? |
152 | * - Uninitialized data :( |
153 | * - You rely on an optional() feature to fall back to parsing some |
154 | * default dynamic. Sadly, the default hits a parse error. Now what? |
155 | * Since there is no good way to default, DynamicParser leaves it out. |
156 | * |
157 | * == Future: un-parsed items == |
158 | * |
159 | * DynamicParser could support erroring on un-parsed items -- the parts of |
160 | * the folly::dynamic, which were never asked for. Here is an ok design: |
161 | * |
162 | * (i) At the start of parsing any value, the user may call: |
163 | * parser.recursivelyForbidUnparsed(); |
164 | * parser.recursivelyAllowUnparsed(); |
165 | * parser.locallyForbidUnparsed(); |
166 | * parser.locallyAllowUnparsed(); |
167 | * |
168 | * (ii) At the end of the parse, any unparsed items are dumped to "errors". |
169 | * For example, failing to parse index 1 out of ["v1", "v2", "v3"] yields: |
170 | * "nested": {1: {"unparsed": "v2"}} |
171 | * or perhaps more verbosely: |
172 | * "nested": {1: {"error": "unparsed value", "value": "v2"}} |
173 | * |
174 | * By default, unparsed items are allowed. Calling a "forbid" function after |
175 | * some keys have already been parsed is allowed to fail (this permits a |
176 | * lazy implementation, which has minimal overhead when "forbid" is not |
177 | * requested). |
178 | * |
179 | * == Future: multiple value errors == |
180 | * |
181 | * The present contract is that exactly one value error is reported per |
182 | * location in the input (multiple key lookup errors are, of course, |
183 | * supported). If the need arises, multiple value errors could easily be |
184 | * supported by replacing the "error" string with an "errors" array. |
185 | */ |
186 | |
187 | namespace detail { |
188 | // Why do DynamicParser error messages use folly::dynamic pseudo-JSON? |
189 | // Firstly, the input dynamic need not correspond to valid JSON. Secondly, |
190 | // wrapError() uses integer-keyed objects to report arrary-indexing errors. |
191 | std::string toPseudoJson(const folly::dynamic& d); |
192 | } // namespace detail |
193 | |
194 | /** |
195 | * With DynamicParser::OnError::THROW, reports the first error. |
196 | * It is forbidden to call releaseErrors() if you catch this. |
197 | */ |
198 | struct FOLLY_EXPORT DynamicParserParseError : public std::runtime_error { |
199 | explicit DynamicParserParseError(folly::dynamic error) |
200 | : std::runtime_error(folly::to<std::string>( |
201 | "DynamicParserParseError: " , |
202 | detail::toPseudoJson(error))), |
203 | error_(std::move(error)) {} |
204 | /** |
205 | * Structured just like releaseErrors(), but with only 1 error inside: |
206 | * {"nested": {"key1": {"nested": {"key2": {"error": "err", "value": 5}}}}} |
207 | * or: |
208 | * {"nested": {"key1": {"key_errors": {"key3": "err"}, "value": 7}}} |
209 | */ |
210 | const folly::dynamic& error() const { |
211 | return error_; |
212 | } |
213 | |
214 | private: |
215 | folly::dynamic error_; |
216 | }; |
217 | |
218 | /** |
219 | * When DynamicParser is used incorrectly, it will throw this exception |
220 | * instead of reporting an error via releaseErrors(). It is unsafe to call |
221 | * any parser methods after catching a LogicError. |
222 | */ |
223 | struct FOLLY_EXPORT DynamicParserLogicError : public std::logic_error { |
224 | template <typename... Args> |
225 | explicit DynamicParserLogicError(Args&&... args) |
226 | : std::logic_error(folly::to<std::string>(std::forward<Args>(args)...)) {} |
227 | }; |
228 | |
229 | class DynamicParser { |
230 | public: |
231 | enum class OnError { |
232 | // After parsing, releaseErrors() reports all parse errors. |
233 | // Throws DynamicParserLogicError on programmer errors. |
234 | RECORD, |
235 | // Throws DynamicParserParseError on the first parse error, or |
236 | // DynamicParserLogicError on programmer errors. |
237 | THROW, |
238 | }; |
239 | |
240 | // You MUST NOT destroy `d` before the parser. |
241 | DynamicParser(OnError on_error, const folly::dynamic* d) |
242 | : onError_(on_error), stack_(d) {} // Always access input through stack_ |
243 | |
244 | /** |
245 | * Once you finished the entire parse, returns a structured description of |
246 | * all parse errors (see top-of-file docblock). May ONLY be called once. |
247 | * May NOT be called if the parse threw any kind of exception. Returns an |
248 | * empty object for successful OnError::THROW parsers. |
249 | */ |
250 | folly::dynamic releaseErrors() { |
251 | return stack_.releaseErrors(); |
252 | } |
253 | |
254 | /** |
255 | * Error-wraps fn(auto-converted key & value) if d[key] is set. The |
256 | * top-of-file docblock explains the auto-conversion. |
257 | */ |
258 | template <typename Fn> |
259 | void optional(const folly::dynamic& key, Fn); |
260 | |
261 | // Like optional(), but reports an error if d[key] does not exist. |
262 | template <typename Fn> |
263 | void required(const folly::dynamic& key, Fn); |
264 | |
265 | /** |
266 | * Iterate over the current object's keys and values. Report each item's |
267 | * errors under its own key in a matching sub-object of "errors". |
268 | */ |
269 | template <typename Fn> |
270 | void objectItems(Fn); |
271 | |
272 | /** |
273 | * Like objectItems() -- arrays are treated identically to objects with |
274 | * integer keys from 0 to size() - 1. |
275 | */ |
276 | template <typename Fn> |
277 | void arrayItems(Fn); |
278 | |
279 | /** |
280 | * The key currently being parsed (integer if inside an array). Throws if |
281 | * called outside of a parser callback. |
282 | */ |
283 | inline const folly::dynamic& key() const { |
284 | return stack_.key(); |
285 | } |
286 | /** |
287 | * The value currently being parsed (initially, the input dynamic). |
288 | * Throws if parsing nullptr, or parsing after releaseErrors(). |
289 | */ |
290 | inline const folly::dynamic& value() const { |
291 | return stack_.value(); |
292 | } |
293 | |
294 | /** |
295 | * By default, DynamicParser's "nested" object coerces all keys to |
296 | * strings, whether from arrayItems() or from p.optional(some_int, ...), |
297 | * to allow errors be serialized to JSON. If you are parsing non-JSON |
298 | * dynamic objects with non-string keys, this is problematic. When set to |
299 | * true, "nested" objects will report integer keys for errors coming from |
300 | * inside arrays, or the original key type from inside values of objects. |
301 | */ |
302 | DynamicParser& setAllowNonStringKeyErrors(bool b) { |
303 | allowNonStringKeyErrors_ = b; |
304 | return *this; |
305 | } |
306 | |
307 | private: |
308 | /** |
309 | * If `fn` throws an exception, wrapError() catches it and inserts an |
310 | * enriched description into stack_.errors_. If lookup_key is non-null, |
311 | * reports a key lookup error in "key_errors", otherwise reportse a value |
312 | * error in "error". |
313 | * |
314 | * Not public because that would encourage users to report multiple errors |
315 | * per input part, which is currently unsupported. It does not currently |
316 | * seem like normal user code should need this. |
317 | */ |
318 | template <typename Fn> |
319 | void wrapError(const folly::dynamic* lookup_key, Fn); |
320 | |
321 | void reportError(const folly::dynamic* lookup_k, const std::exception& ex); |
322 | |
323 | template <typename Fn> |
324 | void parse(const folly::dynamic& key, const folly::dynamic& value, Fn fn); |
325 | |
326 | // All of the above business logic obtains the part of the folly::dynamic |
327 | // it is examining (and the location for reporting errors) via this class, |
328 | // which lets it correctly handle nesting. |
329 | struct ParserStack { |
330 | struct Pop { |
331 | explicit Pop(ParserStack* sp) |
332 | : key_(sp->key_), value_(sp->value_), stackPtr_(sp) {} |
333 | void operator()() noexcept; // ScopeGuard requires noexcept |
334 | private: |
335 | const folly::dynamic* key_; |
336 | const folly::dynamic* value_; |
337 | ParserStack* stackPtr_; |
338 | }; |
339 | struct PopGuard { |
340 | explicit PopGuard(ParserStack* sp) : pop_(in_place, sp) {} |
341 | ~PopGuard() { |
342 | pop_ && ((*pop_)(), true); |
343 | } |
344 | |
345 | private: |
346 | Optional<Pop> pop_; |
347 | }; |
348 | |
349 | explicit ParserStack(const folly::dynamic* input) |
350 | : value_(input), |
351 | errors_(folly::dynamic::object()), |
352 | subErrors_({&errors_}) {} |
353 | |
354 | // Not copiable or movable due to numerous internal pointers |
355 | ParserStack(const ParserStack&) = delete; |
356 | ParserStack& operator=(const ParserStack&) = delete; |
357 | ParserStack(ParserStack&&) = delete; |
358 | ParserStack& operator=(ParserStack&&) = delete; |
359 | |
360 | // Lets user code nest parser calls by recording current key+value and |
361 | // returning an RAII guard to restore the old one. `noexcept` since it |
362 | // is used unwrapped. |
363 | PopGuard push(const folly::dynamic& k, const folly::dynamic& v) noexcept; |
364 | |
365 | // Throws DynamicParserLogicError if used outside of a parsing function. |
366 | inline const folly::dynamic& key() const; |
367 | // Throws DynamicParserLogicError if used after releaseErrors(). |
368 | inline const folly::dynamic& value() const; |
369 | |
370 | // Lazily creates new "nested" sub-objects in errors_. |
371 | folly::dynamic& errors(bool allow_non_string_keys) noexcept; |
372 | |
373 | // The user invokes this at most once after the parse is done. |
374 | folly::dynamic releaseErrors(); |
375 | |
376 | // Invoked on error when using OnError::THROW. |
377 | [[noreturn]] void throwErrors(); |
378 | |
379 | private: |
380 | friend struct Pop; |
381 | |
382 | folly::dynamic releaseErrorsImpl(); // for releaseErrors() & throwErrors() |
383 | |
384 | // Null outside of a parsing function. |
385 | const folly::dynamic* key_{nullptr}; |
386 | // Null on errors: when the input was nullptr, or after releaseErrors(). |
387 | const folly::dynamic* value_; |
388 | |
389 | // An object containing some of these keys: |
390 | // "key_errors" -- {"key": "description of error looking up said key"} |
391 | // "error" -- why did we fail to parse this value? |
392 | // "value" -- a copy of the input causing the error, and |
393 | // "nested" -- {"key" or integer for arrays: <another errors_ object>} |
394 | // |
395 | // "nested" will contain identically structured objects with keys (array |
396 | // indices) identifying the origin of the errors. Of course, "input" |
397 | // would no longer refer to the whole input, but to a part. |
398 | folly::dynamic errors_; |
399 | // We only materialize errors_ sub-objects when needed. This stores keys |
400 | // for unmaterialized errors, from outermost to innermost. |
401 | std::vector<const folly::dynamic*> unmaterializedSubErrorKeys_; |
402 | // Materialized errors, from outermost to innermost |
403 | std::vector<folly::dynamic*> subErrors_; // Point into errors_ |
404 | }; |
405 | |
406 | OnError onError_; |
407 | ParserStack stack_; |
408 | bool allowNonStringKeyErrors_{false}; // See the setter's docblock. |
409 | }; |
410 | |
411 | } // namespace folly |
412 | |
413 | #include <folly/experimental/DynamicParser-inl.h> |
414 | |