1 | #pragma once |
2 | |
3 | #include <cassert> // assert |
4 | #include <cstddef> |
5 | #include <string> // string |
6 | #include <utility> // move |
7 | #include <vector> // vector |
8 | |
9 | #include <nlohmann/detail/exceptions.hpp> |
10 | #include <nlohmann/detail/macro_scope.hpp> |
11 | |
12 | namespace nlohmann |
13 | { |
14 | |
15 | /*! |
16 | @brief SAX interface |
17 | |
18 | This class describes the SAX interface used by @ref nlohmann::json::sax_parse. |
19 | Each function is called in different situations while the input is parsed. The |
20 | boolean return value informs the parser whether to continue processing the |
21 | input. |
22 | */ |
23 | template<typename BasicJsonType> |
24 | struct json_sax |
25 | { |
26 | /// type for (signed) integers |
27 | using number_integer_t = typename BasicJsonType::number_integer_t; |
28 | /// type for unsigned integers |
29 | using number_unsigned_t = typename BasicJsonType::number_unsigned_t; |
30 | /// type for floating-point numbers |
31 | using number_float_t = typename BasicJsonType::number_float_t; |
32 | /// type for strings |
33 | using string_t = typename BasicJsonType::string_t; |
34 | |
35 | /*! |
36 | @brief a null value was read |
37 | @return whether parsing should proceed |
38 | */ |
39 | virtual bool null() = 0; |
40 | |
41 | /*! |
42 | @brief a boolean value was read |
43 | @param[in] val boolean value |
44 | @return whether parsing should proceed |
45 | */ |
46 | virtual bool boolean(bool val) = 0; |
47 | |
48 | /*! |
49 | @brief an integer number was read |
50 | @param[in] val integer value |
51 | @return whether parsing should proceed |
52 | */ |
53 | virtual bool number_integer(number_integer_t val) = 0; |
54 | |
55 | /*! |
56 | @brief an unsigned integer number was read |
57 | @param[in] val unsigned integer value |
58 | @return whether parsing should proceed |
59 | */ |
60 | virtual bool number_unsigned(number_unsigned_t val) = 0; |
61 | |
62 | /*! |
63 | @brief an floating-point number was read |
64 | @param[in] val floating-point value |
65 | @param[in] s raw token value |
66 | @return whether parsing should proceed |
67 | */ |
68 | virtual bool number_float(number_float_t val, const string_t& s) = 0; |
69 | |
70 | /*! |
71 | @brief a string was read |
72 | @param[in] val string value |
73 | @return whether parsing should proceed |
74 | @note It is safe to move the passed string. |
75 | */ |
76 | virtual bool string(string_t& val) = 0; |
77 | |
78 | /*! |
79 | @brief the beginning of an object was read |
80 | @param[in] elements number of object elements or -1 if unknown |
81 | @return whether parsing should proceed |
82 | @note binary formats may report the number of elements |
83 | */ |
84 | virtual bool start_object(std::size_t elements) = 0; |
85 | |
86 | /*! |
87 | @brief an object key was read |
88 | @param[in] val object key |
89 | @return whether parsing should proceed |
90 | @note It is safe to move the passed string. |
91 | */ |
92 | virtual bool key(string_t& val) = 0; |
93 | |
94 | /*! |
95 | @brief the end of an object was read |
96 | @return whether parsing should proceed |
97 | */ |
98 | virtual bool end_object() = 0; |
99 | |
100 | /*! |
101 | @brief the beginning of an array was read |
102 | @param[in] elements number of array elements or -1 if unknown |
103 | @return whether parsing should proceed |
104 | @note binary formats may report the number of elements |
105 | */ |
106 | virtual bool start_array(std::size_t elements) = 0; |
107 | |
108 | /*! |
109 | @brief the end of an array was read |
110 | @return whether parsing should proceed |
111 | */ |
112 | virtual bool end_array() = 0; |
113 | |
114 | /*! |
115 | @brief a parse error occurred |
116 | @param[in] position the position in the input where the error occurs |
117 | @param[in] last_token the last read token |
118 | @param[in] ex an exception object describing the error |
119 | @return whether parsing should proceed (must return false) |
120 | */ |
121 | virtual bool parse_error(std::size_t position, |
122 | const std::string& last_token, |
123 | const detail::exception& ex) = 0; |
124 | |
125 | virtual ~json_sax() = default; |
126 | }; |
127 | |
128 | |
129 | namespace detail |
130 | { |
131 | /*! |
132 | @brief SAX implementation to create a JSON value from SAX events |
133 | |
134 | This class implements the @ref json_sax interface and processes the SAX events |
135 | to create a JSON value which makes it basically a DOM parser. The structure or |
136 | hierarchy of the JSON value is managed by the stack `ref_stack` which contains |
137 | a pointer to the respective array or object for each recursion depth. |
138 | |
139 | After successful parsing, the value that is passed by reference to the |
140 | constructor contains the parsed value. |
141 | |
142 | @tparam BasicJsonType the JSON type |
143 | */ |
144 | template<typename BasicJsonType> |
145 | class json_sax_dom_parser |
146 | { |
147 | public: |
148 | using number_integer_t = typename BasicJsonType::number_integer_t; |
149 | using number_unsigned_t = typename BasicJsonType::number_unsigned_t; |
150 | using number_float_t = typename BasicJsonType::number_float_t; |
151 | using string_t = typename BasicJsonType::string_t; |
152 | |
153 | /*! |
154 | @param[in, out] r reference to a JSON value that is manipulated while |
155 | parsing |
156 | @param[in] allow_exceptions_ whether parse errors yield exceptions |
157 | */ |
158 | explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) |
159 | : root(r), allow_exceptions(allow_exceptions_) |
160 | {} |
161 | |
162 | // make class move-only |
163 | json_sax_dom_parser(const json_sax_dom_parser&) = delete; |
164 | json_sax_dom_parser(json_sax_dom_parser&&) = default; |
165 | json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete; |
166 | json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; |
167 | ~json_sax_dom_parser() = default; |
168 | |
169 | bool null() |
170 | { |
171 | handle_value(nullptr); |
172 | return true; |
173 | } |
174 | |
175 | bool boolean(bool val) |
176 | { |
177 | handle_value(val); |
178 | return true; |
179 | } |
180 | |
181 | bool number_integer(number_integer_t val) |
182 | { |
183 | handle_value(val); |
184 | return true; |
185 | } |
186 | |
187 | bool number_unsigned(number_unsigned_t val) |
188 | { |
189 | handle_value(val); |
190 | return true; |
191 | } |
192 | |
193 | bool number_float(number_float_t val, const string_t& /*unused*/) |
194 | { |
195 | handle_value(val); |
196 | return true; |
197 | } |
198 | |
199 | bool string(string_t& val) |
200 | { |
201 | handle_value(val); |
202 | return true; |
203 | } |
204 | |
205 | bool start_object(std::size_t len) |
206 | { |
207 | ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); |
208 | |
209 | if (JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) |
210 | { |
211 | JSON_THROW(out_of_range::create(408, |
212 | "excessive object size: " + std::to_string(len))); |
213 | } |
214 | |
215 | return true; |
216 | } |
217 | |
218 | bool key(string_t& val) |
219 | { |
220 | // add null at given key and store the reference for later |
221 | object_element = &(ref_stack.back()->m_value.object->operator[](val)); |
222 | return true; |
223 | } |
224 | |
225 | bool end_object() |
226 | { |
227 | ref_stack.pop_back(); |
228 | return true; |
229 | } |
230 | |
231 | bool start_array(std::size_t len) |
232 | { |
233 | ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); |
234 | |
235 | if (JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) |
236 | { |
237 | JSON_THROW(out_of_range::create(408, |
238 | "excessive array size: " + std::to_string(len))); |
239 | } |
240 | |
241 | return true; |
242 | } |
243 | |
244 | bool end_array() |
245 | { |
246 | ref_stack.pop_back(); |
247 | return true; |
248 | } |
249 | |
250 | bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, |
251 | const detail::exception& ex) |
252 | { |
253 | errored = true; |
254 | if (allow_exceptions) |
255 | { |
256 | // determine the proper exception type from the id |
257 | switch ((ex.id / 100) % 100) |
258 | { |
259 | case 1: |
260 | JSON_THROW(*static_cast<const detail::parse_error*>(&ex)); |
261 | case 4: |
262 | JSON_THROW(*static_cast<const detail::out_of_range*>(&ex)); |
263 | // LCOV_EXCL_START |
264 | case 2: |
265 | JSON_THROW(*static_cast<const detail::invalid_iterator*>(&ex)); |
266 | case 3: |
267 | JSON_THROW(*static_cast<const detail::type_error*>(&ex)); |
268 | case 5: |
269 | JSON_THROW(*static_cast<const detail::other_error*>(&ex)); |
270 | default: |
271 | assert(false); |
272 | // LCOV_EXCL_STOP |
273 | } |
274 | } |
275 | return false; |
276 | } |
277 | |
278 | constexpr bool is_errored() const |
279 | { |
280 | return errored; |
281 | } |
282 | |
283 | private: |
284 | /*! |
285 | @invariant If the ref stack is empty, then the passed value will be the new |
286 | root. |
287 | @invariant If the ref stack contains a value, then it is an array or an |
288 | object to which we can add elements |
289 | */ |
290 | template<typename Value> |
291 | JSON_HEDLEY_RETURNS_NON_NULL |
292 | BasicJsonType* handle_value(Value&& v) |
293 | { |
294 | if (ref_stack.empty()) |
295 | { |
296 | root = BasicJsonType(std::forward<Value>(v)); |
297 | return &root; |
298 | } |
299 | |
300 | assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); |
301 | |
302 | if (ref_stack.back()->is_array()) |
303 | { |
304 | ref_stack.back()->m_value.array->emplace_back(std::forward<Value>(v)); |
305 | return &(ref_stack.back()->m_value.array->back()); |
306 | } |
307 | |
308 | assert(ref_stack.back()->is_object()); |
309 | assert(object_element); |
310 | *object_element = BasicJsonType(std::forward<Value>(v)); |
311 | return object_element; |
312 | } |
313 | |
314 | /// the parsed JSON value |
315 | BasicJsonType& root; |
316 | /// stack to model hierarchy of values |
317 | std::vector<BasicJsonType*> ref_stack {}; |
318 | /// helper to hold the reference for the next object element |
319 | BasicJsonType* object_element = nullptr; |
320 | /// whether a syntax error occurred |
321 | bool errored = false; |
322 | /// whether to throw exceptions in case of errors |
323 | const bool allow_exceptions = true; |
324 | }; |
325 | |
326 | template<typename BasicJsonType> |
327 | class json_sax_dom_callback_parser |
328 | { |
329 | public: |
330 | using number_integer_t = typename BasicJsonType::number_integer_t; |
331 | using number_unsigned_t = typename BasicJsonType::number_unsigned_t; |
332 | using number_float_t = typename BasicJsonType::number_float_t; |
333 | using string_t = typename BasicJsonType::string_t; |
334 | using parser_callback_t = typename BasicJsonType::parser_callback_t; |
335 | using parse_event_t = typename BasicJsonType::parse_event_t; |
336 | |
337 | json_sax_dom_callback_parser(BasicJsonType& r, |
338 | const parser_callback_t cb, |
339 | const bool allow_exceptions_ = true) |
340 | : root(r), callback(cb), allow_exceptions(allow_exceptions_) |
341 | { |
342 | keep_stack.push_back(true); |
343 | } |
344 | |
345 | // make class move-only |
346 | json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete; |
347 | json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; |
348 | json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete; |
349 | json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; |
350 | ~json_sax_dom_callback_parser() = default; |
351 | |
352 | bool null() |
353 | { |
354 | handle_value(nullptr); |
355 | return true; |
356 | } |
357 | |
358 | bool boolean(bool val) |
359 | { |
360 | handle_value(val); |
361 | return true; |
362 | } |
363 | |
364 | bool number_integer(number_integer_t val) |
365 | { |
366 | handle_value(val); |
367 | return true; |
368 | } |
369 | |
370 | bool number_unsigned(number_unsigned_t val) |
371 | { |
372 | handle_value(val); |
373 | return true; |
374 | } |
375 | |
376 | bool number_float(number_float_t val, const string_t& /*unused*/) |
377 | { |
378 | handle_value(val); |
379 | return true; |
380 | } |
381 | |
382 | bool string(string_t& val) |
383 | { |
384 | handle_value(val); |
385 | return true; |
386 | } |
387 | |
388 | bool start_object(std::size_t len) |
389 | { |
390 | // check callback for object start |
391 | const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::object_start, discarded); |
392 | keep_stack.push_back(keep); |
393 | |
394 | auto val = handle_value(BasicJsonType::value_t::object, true); |
395 | ref_stack.push_back(val.second); |
396 | |
397 | // check object limit |
398 | if (ref_stack.back() and JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) |
399 | { |
400 | JSON_THROW(out_of_range::create(408, "excessive object size: " + std::to_string(len))); |
401 | } |
402 | |
403 | return true; |
404 | } |
405 | |
406 | bool key(string_t& val) |
407 | { |
408 | BasicJsonType k = BasicJsonType(val); |
409 | |
410 | // check callback for key |
411 | const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::key, k); |
412 | key_keep_stack.push_back(keep); |
413 | |
414 | // add discarded value at given key and store the reference for later |
415 | if (keep and ref_stack.back()) |
416 | { |
417 | object_element = &(ref_stack.back()->m_value.object->operator[](val) = discarded); |
418 | } |
419 | |
420 | return true; |
421 | } |
422 | |
423 | bool end_object() |
424 | { |
425 | if (ref_stack.back() and not callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) |
426 | { |
427 | // discard object |
428 | *ref_stack.back() = discarded; |
429 | } |
430 | |
431 | assert(not ref_stack.empty()); |
432 | assert(not keep_stack.empty()); |
433 | ref_stack.pop_back(); |
434 | keep_stack.pop_back(); |
435 | |
436 | if (not ref_stack.empty() and ref_stack.back() and ref_stack.back()->is_object()) |
437 | { |
438 | // remove discarded value |
439 | for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) |
440 | { |
441 | if (it->is_discarded()) |
442 | { |
443 | ref_stack.back()->erase(it); |
444 | break; |
445 | } |
446 | } |
447 | } |
448 | |
449 | return true; |
450 | } |
451 | |
452 | bool start_array(std::size_t len) |
453 | { |
454 | const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::array_start, discarded); |
455 | keep_stack.push_back(keep); |
456 | |
457 | auto val = handle_value(BasicJsonType::value_t::array, true); |
458 | ref_stack.push_back(val.second); |
459 | |
460 | // check array limit |
461 | if (ref_stack.back() and JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) |
462 | { |
463 | JSON_THROW(out_of_range::create(408, "excessive array size: " + std::to_string(len))); |
464 | } |
465 | |
466 | return true; |
467 | } |
468 | |
469 | bool end_array() |
470 | { |
471 | bool keep = true; |
472 | |
473 | if (ref_stack.back()) |
474 | { |
475 | keep = callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); |
476 | if (not keep) |
477 | { |
478 | // discard array |
479 | *ref_stack.back() = discarded; |
480 | } |
481 | } |
482 | |
483 | assert(not ref_stack.empty()); |
484 | assert(not keep_stack.empty()); |
485 | ref_stack.pop_back(); |
486 | keep_stack.pop_back(); |
487 | |
488 | // remove discarded value |
489 | if (not keep and not ref_stack.empty() and ref_stack.back()->is_array()) |
490 | { |
491 | ref_stack.back()->m_value.array->pop_back(); |
492 | } |
493 | |
494 | return true; |
495 | } |
496 | |
497 | bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, |
498 | const detail::exception& ex) |
499 | { |
500 | errored = true; |
501 | if (allow_exceptions) |
502 | { |
503 | // determine the proper exception type from the id |
504 | switch ((ex.id / 100) % 100) |
505 | { |
506 | case 1: |
507 | JSON_THROW(*static_cast<const detail::parse_error*>(&ex)); |
508 | case 4: |
509 | JSON_THROW(*static_cast<const detail::out_of_range*>(&ex)); |
510 | // LCOV_EXCL_START |
511 | case 2: |
512 | JSON_THROW(*static_cast<const detail::invalid_iterator*>(&ex)); |
513 | case 3: |
514 | JSON_THROW(*static_cast<const detail::type_error*>(&ex)); |
515 | case 5: |
516 | JSON_THROW(*static_cast<const detail::other_error*>(&ex)); |
517 | default: |
518 | assert(false); |
519 | // LCOV_EXCL_STOP |
520 | } |
521 | } |
522 | return false; |
523 | } |
524 | |
525 | constexpr bool is_errored() const |
526 | { |
527 | return errored; |
528 | } |
529 | |
530 | private: |
531 | /*! |
532 | @param[in] v value to add to the JSON value we build during parsing |
533 | @param[in] skip_callback whether we should skip calling the callback |
534 | function; this is required after start_array() and |
535 | start_object() SAX events, because otherwise we would call the |
536 | callback function with an empty array or object, respectively. |
537 | |
538 | @invariant If the ref stack is empty, then the passed value will be the new |
539 | root. |
540 | @invariant If the ref stack contains a value, then it is an array or an |
541 | object to which we can add elements |
542 | |
543 | @return pair of boolean (whether value should be kept) and pointer (to the |
544 | passed value in the ref_stack hierarchy; nullptr if not kept) |
545 | */ |
546 | template<typename Value> |
547 | std::pair<bool, BasicJsonType*> handle_value(Value&& v, const bool skip_callback = false) |
548 | { |
549 | assert(not keep_stack.empty()); |
550 | |
551 | // do not handle this value if we know it would be added to a discarded |
552 | // container |
553 | if (not keep_stack.back()) |
554 | { |
555 | return {false, nullptr}; |
556 | } |
557 | |
558 | // create value |
559 | auto value = BasicJsonType(std::forward<Value>(v)); |
560 | |
561 | // check callback |
562 | const bool keep = skip_callback or callback(static_cast<int>(ref_stack.size()), parse_event_t::value, value); |
563 | |
564 | // do not handle this value if we just learnt it shall be discarded |
565 | if (not keep) |
566 | { |
567 | return {false, nullptr}; |
568 | } |
569 | |
570 | if (ref_stack.empty()) |
571 | { |
572 | root = std::move(value); |
573 | return {true, &root}; |
574 | } |
575 | |
576 | // skip this value if we already decided to skip the parent |
577 | // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) |
578 | if (not ref_stack.back()) |
579 | { |
580 | return {false, nullptr}; |
581 | } |
582 | |
583 | // we now only expect arrays and objects |
584 | assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); |
585 | |
586 | // array |
587 | if (ref_stack.back()->is_array()) |
588 | { |
589 | ref_stack.back()->m_value.array->push_back(std::move(value)); |
590 | return {true, &(ref_stack.back()->m_value.array->back())}; |
591 | } |
592 | |
593 | // object |
594 | assert(ref_stack.back()->is_object()); |
595 | // check if we should store an element for the current key |
596 | assert(not key_keep_stack.empty()); |
597 | const bool store_element = key_keep_stack.back(); |
598 | key_keep_stack.pop_back(); |
599 | |
600 | if (not store_element) |
601 | { |
602 | return {false, nullptr}; |
603 | } |
604 | |
605 | assert(object_element); |
606 | *object_element = std::move(value); |
607 | return {true, object_element}; |
608 | } |
609 | |
610 | /// the parsed JSON value |
611 | BasicJsonType& root; |
612 | /// stack to model hierarchy of values |
613 | std::vector<BasicJsonType*> ref_stack {}; |
614 | /// stack to manage which values to keep |
615 | std::vector<bool> keep_stack {}; |
616 | /// stack to manage which object keys to keep |
617 | std::vector<bool> key_keep_stack {}; |
618 | /// helper to hold the reference for the next object element |
619 | BasicJsonType* object_element = nullptr; |
620 | /// whether a syntax error occurred |
621 | bool errored = false; |
622 | /// callback function |
623 | const parser_callback_t callback = nullptr; |
624 | /// whether to throw exceptions in case of errors |
625 | const bool allow_exceptions = true; |
626 | /// a discarded value for the callback |
627 | BasicJsonType discarded = BasicJsonType::value_t::discarded; |
628 | }; |
629 | |
630 | template<typename BasicJsonType> |
631 | class json_sax_acceptor |
632 | { |
633 | public: |
634 | using number_integer_t = typename BasicJsonType::number_integer_t; |
635 | using number_unsigned_t = typename BasicJsonType::number_unsigned_t; |
636 | using number_float_t = typename BasicJsonType::number_float_t; |
637 | using string_t = typename BasicJsonType::string_t; |
638 | |
639 | bool null() |
640 | { |
641 | return true; |
642 | } |
643 | |
644 | bool boolean(bool /*unused*/) |
645 | { |
646 | return true; |
647 | } |
648 | |
649 | bool number_integer(number_integer_t /*unused*/) |
650 | { |
651 | return true; |
652 | } |
653 | |
654 | bool number_unsigned(number_unsigned_t /*unused*/) |
655 | { |
656 | return true; |
657 | } |
658 | |
659 | bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) |
660 | { |
661 | return true; |
662 | } |
663 | |
664 | bool string(string_t& /*unused*/) |
665 | { |
666 | return true; |
667 | } |
668 | |
669 | bool start_object(std::size_t /*unused*/ = std::size_t(-1)) |
670 | { |
671 | return true; |
672 | } |
673 | |
674 | bool key(string_t& /*unused*/) |
675 | { |
676 | return true; |
677 | } |
678 | |
679 | bool end_object() |
680 | { |
681 | return true; |
682 | } |
683 | |
684 | bool start_array(std::size_t /*unused*/ = std::size_t(-1)) |
685 | { |
686 | return true; |
687 | } |
688 | |
689 | bool end_array() |
690 | { |
691 | return true; |
692 | } |
693 | |
694 | bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) |
695 | { |
696 | return false; |
697 | } |
698 | }; |
699 | } // namespace detail |
700 | |
701 | } // namespace nlohmann |
702 | |