1#pragma once
2
3#include <algorithm> // generate_n
4#include <array> // array
5#include <cassert> // assert
6#include <cmath> // ldexp
7#include <cstddef> // size_t
8#include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
9#include <cstdio> // snprintf
10#include <cstring> // memcpy
11#include <iterator> // back_inserter
12#include <limits> // numeric_limits
13#include <string> // char_traits, string
14#include <utility> // make_pair, move
15
16#include <nlohmann/detail/exceptions.hpp>
17#include <nlohmann/detail/input/input_adapters.hpp>
18#include <nlohmann/detail/input/json_sax.hpp>
19#include <nlohmann/detail/macro_scope.hpp>
20#include <nlohmann/detail/meta/is_sax.hpp>
21#include <nlohmann/detail/value_t.hpp>
22
23namespace nlohmann
24{
25namespace detail
26{
27///////////////////
28// binary reader //
29///////////////////
30
31/*!
32@brief deserialization of CBOR, MessagePack, and UBJSON values
33*/
34template<typename BasicJsonType, typename SAX = json_sax_dom_parser<BasicJsonType>>
35class binary_reader
36{
37 using number_integer_t = typename BasicJsonType::number_integer_t;
38 using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
39 using number_float_t = typename BasicJsonType::number_float_t;
40 using string_t = typename BasicJsonType::string_t;
41 using json_sax_t = SAX;
42
43 public:
44 /*!
45 @brief create a binary reader
46
47 @param[in] adapter input adapter to read from
48 */
49 explicit binary_reader(input_adapter_t adapter) : ia(std::move(adapter))
50 {
51 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
52 assert(ia);
53 }
54
55 // make class move-only
56 binary_reader(const binary_reader&) = delete;
57 binary_reader(binary_reader&&) = default;
58 binary_reader& operator=(const binary_reader&) = delete;
59 binary_reader& operator=(binary_reader&&) = default;
60 ~binary_reader() = default;
61
62 /*!
63 @param[in] format the binary format to parse
64 @param[in] sax_ a SAX event processor
65 @param[in] strict whether to expect the input to be consumed completed
66
67 @return
68 */
69 JSON_HEDLEY_NON_NULL(3)
70 bool sax_parse(const input_format_t format,
71 json_sax_t* sax_,
72 const bool strict = true)
73 {
74 sax = sax_;
75 bool result = false;
76
77 switch (format)
78 {
79 case input_format_t::bson:
80 result = parse_bson_internal();
81 break;
82
83 case input_format_t::cbor:
84 result = parse_cbor_internal();
85 break;
86
87 case input_format_t::msgpack:
88 result = parse_msgpack_internal();
89 break;
90
91 case input_format_t::ubjson:
92 result = parse_ubjson_internal();
93 break;
94
95 default: // LCOV_EXCL_LINE
96 assert(false); // LCOV_EXCL_LINE
97 }
98
99 // strict mode: next byte must be EOF
100 if (result and strict)
101 {
102 if (format == input_format_t::ubjson)
103 {
104 get_ignore_noop();
105 }
106 else
107 {
108 get();
109 }
110
111 if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char>::eof()))
112 {
113 return sax->parse_error(chars_read, get_token_string(),
114 parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value")));
115 }
116 }
117
118 return result;
119 }
120
121 /*!
122 @brief determine system byte order
123
124 @return true if and only if system's byte order is little endian
125
126 @note from http://stackoverflow.com/a/1001328/266378
127 */
128 static constexpr bool little_endianess(int num = 1) noexcept
129 {
130 return *reinterpret_cast<char*>(&num) == 1;
131 }
132
133 private:
134 //////////
135 // BSON //
136 //////////
137
138 /*!
139 @brief Reads in a BSON-object and passes it to the SAX-parser.
140 @return whether a valid BSON-value was passed to the SAX parser
141 */
142 bool parse_bson_internal()
143 {
144 std::int32_t document_size;
145 get_number<std::int32_t, true>(input_format_t::bson, document_size);
146
147 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(std::size_t(-1))))
148 {
149 return false;
150 }
151
152 if (JSON_HEDLEY_UNLIKELY(not parse_bson_element_list(/*is_array*/false)))
153 {
154 return false;
155 }
156
157 return sax->end_object();
158 }
159
160 /*!
161 @brief Parses a C-style string from the BSON input.
162 @param[in, out] result A reference to the string variable where the read
163 string is to be stored.
164 @return `true` if the \x00-byte indicating the end of the string was
165 encountered before the EOF; false` indicates an unexpected EOF.
166 */
167 bool get_bson_cstr(string_t& result)
168 {
169 auto out = std::back_inserter(result);
170 while (true)
171 {
172 get();
173 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring")))
174 {
175 return false;
176 }
177 if (current == 0x00)
178 {
179 return true;
180 }
181 *out++ = static_cast<char>(current);
182 }
183
184 return true;
185 }
186
187 /*!
188 @brief Parses a zero-terminated string of length @a len from the BSON
189 input.
190 @param[in] len The length (including the zero-byte at the end) of the
191 string to be read.
192 @param[in, out] result A reference to the string variable where the read
193 string is to be stored.
194 @tparam NumberType The type of the length @a len
195 @pre len >= 1
196 @return `true` if the string was successfully parsed
197 */
198 template<typename NumberType>
199 bool get_bson_string(const NumberType len, string_t& result)
200 {
201 if (JSON_HEDLEY_UNLIKELY(len < 1))
202 {
203 auto last_token = get_token_string();
204 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string")));
205 }
206
207 return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof();
208 }
209
210 /*!
211 @brief Read a BSON document element of the given @a element_type.
212 @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
213 @param[in] element_type_parse_position The position in the input stream,
214 where the `element_type` was read.
215 @warning Not all BSON element types are supported yet. An unsupported
216 @a element_type will give rise to a parse_error.114:
217 Unsupported BSON record type 0x...
218 @return whether a valid BSON-object/array was passed to the SAX parser
219 */
220 bool parse_bson_element_internal(const int element_type,
221 const std::size_t element_type_parse_position)
222 {
223 switch (element_type)
224 {
225 case 0x01: // double
226 {
227 double number;
228 return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), "");
229 }
230
231 case 0x02: // string
232 {
233 std::int32_t len;
234 string_t value;
235 return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value);
236 }
237
238 case 0x03: // object
239 {
240 return parse_bson_internal();
241 }
242
243 case 0x04: // array
244 {
245 return parse_bson_array();
246 }
247
248 case 0x08: // boolean
249 {
250 return sax->boolean(get() != 0);
251 }
252
253 case 0x0A: // null
254 {
255 return sax->null();
256 }
257
258 case 0x10: // int32
259 {
260 std::int32_t value;
261 return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(value);
262 }
263
264 case 0x12: // int64
265 {
266 std::int64_t value;
267 return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(value);
268 }
269
270 default: // anything else not supported (yet)
271 {
272 std::array<char, 3> cr{{}};
273 (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type));
274 return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data())));
275 }
276 }
277 }
278
279 /*!
280 @brief Read a BSON element list (as specified in the BSON-spec)
281
282 The same binary layout is used for objects and arrays, hence it must be
283 indicated with the argument @a is_array which one is expected
284 (true --> array, false --> object).
285
286 @param[in] is_array Determines if the element list being read is to be
287 treated as an object (@a is_array == false), or as an
288 array (@a is_array == true).
289 @return whether a valid BSON-object/array was passed to the SAX parser
290 */
291 bool parse_bson_element_list(const bool is_array)
292 {
293 string_t key;
294 while (int element_type = get())
295 {
296 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list")))
297 {
298 return false;
299 }
300
301 const std::size_t element_type_parse_position = chars_read;
302 if (JSON_HEDLEY_UNLIKELY(not get_bson_cstr(key)))
303 {
304 return false;
305 }
306
307 if (not is_array and not sax->key(key))
308 {
309 return false;
310 }
311
312 if (JSON_HEDLEY_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position)))
313 {
314 return false;
315 }
316
317 // get_bson_cstr only appends
318 key.clear();
319 }
320
321 return true;
322 }
323
324 /*!
325 @brief Reads an array from the BSON input and passes it to the SAX-parser.
326 @return whether a valid BSON-array was passed to the SAX parser
327 */
328 bool parse_bson_array()
329 {
330 std::int32_t document_size;
331 get_number<std::int32_t, true>(input_format_t::bson, document_size);
332
333 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(std::size_t(-1))))
334 {
335 return false;
336 }
337
338 if (JSON_HEDLEY_UNLIKELY(not parse_bson_element_list(/*is_array*/true)))
339 {
340 return false;
341 }
342
343 return sax->end_array();
344 }
345
346 //////////
347 // CBOR //
348 //////////
349
350 /*!
351 @param[in] get_char whether a new character should be retrieved from the
352 input (true, default) or whether the last read
353 character should be considered instead
354
355 @return whether a valid CBOR value was passed to the SAX parser
356 */
357 bool parse_cbor_internal(const bool get_char = true)
358 {
359 switch (get_char ? get() : current)
360 {
361 // EOF
362 case std::char_traits<char>::eof():
363 return unexpect_eof(input_format_t::cbor, "value");
364
365 // Integer 0x00..0x17 (0..23)
366 case 0x00:
367 case 0x01:
368 case 0x02:
369 case 0x03:
370 case 0x04:
371 case 0x05:
372 case 0x06:
373 case 0x07:
374 case 0x08:
375 case 0x09:
376 case 0x0A:
377 case 0x0B:
378 case 0x0C:
379 case 0x0D:
380 case 0x0E:
381 case 0x0F:
382 case 0x10:
383 case 0x11:
384 case 0x12:
385 case 0x13:
386 case 0x14:
387 case 0x15:
388 case 0x16:
389 case 0x17:
390 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
391
392 case 0x18: // Unsigned integer (one-byte uint8_t follows)
393 {
394 std::uint8_t number;
395 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
396 }
397
398 case 0x19: // Unsigned integer (two-byte uint16_t follows)
399 {
400 std::uint16_t number;
401 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
402 }
403
404 case 0x1A: // Unsigned integer (four-byte uint32_t follows)
405 {
406 std::uint32_t number;
407 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
408 }
409
410 case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
411 {
412 std::uint64_t number;
413 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
414 }
415
416 // Negative integer -1-0x00..-1-0x17 (-1..-24)
417 case 0x20:
418 case 0x21:
419 case 0x22:
420 case 0x23:
421 case 0x24:
422 case 0x25:
423 case 0x26:
424 case 0x27:
425 case 0x28:
426 case 0x29:
427 case 0x2A:
428 case 0x2B:
429 case 0x2C:
430 case 0x2D:
431 case 0x2E:
432 case 0x2F:
433 case 0x30:
434 case 0x31:
435 case 0x32:
436 case 0x33:
437 case 0x34:
438 case 0x35:
439 case 0x36:
440 case 0x37:
441 return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
442
443 case 0x38: // Negative integer (one-byte uint8_t follows)
444 {
445 std::uint8_t number;
446 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number);
447 }
448
449 case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
450 {
451 std::uint16_t number;
452 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number);
453 }
454
455 case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
456 {
457 std::uint32_t number;
458 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number);
459 }
460
461 case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
462 {
463 std::uint64_t number;
464 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1)
465 - static_cast<number_integer_t>(number));
466 }
467
468 // UTF-8 string (0x00..0x17 bytes follow)
469 case 0x60:
470 case 0x61:
471 case 0x62:
472 case 0x63:
473 case 0x64:
474 case 0x65:
475 case 0x66:
476 case 0x67:
477 case 0x68:
478 case 0x69:
479 case 0x6A:
480 case 0x6B:
481 case 0x6C:
482 case 0x6D:
483 case 0x6E:
484 case 0x6F:
485 case 0x70:
486 case 0x71:
487 case 0x72:
488 case 0x73:
489 case 0x74:
490 case 0x75:
491 case 0x76:
492 case 0x77:
493 case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
494 case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
495 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
496 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
497 case 0x7F: // UTF-8 string (indefinite length)
498 {
499 string_t s;
500 return get_cbor_string(s) and sax->string(s);
501 }
502
503 // array (0x00..0x17 data items follow)
504 case 0x80:
505 case 0x81:
506 case 0x82:
507 case 0x83:
508 case 0x84:
509 case 0x85:
510 case 0x86:
511 case 0x87:
512 case 0x88:
513 case 0x89:
514 case 0x8A:
515 case 0x8B:
516 case 0x8C:
517 case 0x8D:
518 case 0x8E:
519 case 0x8F:
520 case 0x90:
521 case 0x91:
522 case 0x92:
523 case 0x93:
524 case 0x94:
525 case 0x95:
526 case 0x96:
527 case 0x97:
528 return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu));
529
530 case 0x98: // array (one-byte uint8_t for n follows)
531 {
532 std::uint8_t len;
533 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len));
534 }
535
536 case 0x99: // array (two-byte uint16_t for n follow)
537 {
538 std::uint16_t len;
539 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len));
540 }
541
542 case 0x9A: // array (four-byte uint32_t for n follow)
543 {
544 std::uint32_t len;
545 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len));
546 }
547
548 case 0x9B: // array (eight-byte uint64_t for n follow)
549 {
550 std::uint64_t len;
551 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len));
552 }
553
554 case 0x9F: // array (indefinite length)
555 return get_cbor_array(std::size_t(-1));
556
557 // map (0x00..0x17 pairs of data items follow)
558 case 0xA0:
559 case 0xA1:
560 case 0xA2:
561 case 0xA3:
562 case 0xA4:
563 case 0xA5:
564 case 0xA6:
565 case 0xA7:
566 case 0xA8:
567 case 0xA9:
568 case 0xAA:
569 case 0xAB:
570 case 0xAC:
571 case 0xAD:
572 case 0xAE:
573 case 0xAF:
574 case 0xB0:
575 case 0xB1:
576 case 0xB2:
577 case 0xB3:
578 case 0xB4:
579 case 0xB5:
580 case 0xB6:
581 case 0xB7:
582 return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu));
583
584 case 0xB8: // map (one-byte uint8_t for n follows)
585 {
586 std::uint8_t len;
587 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len));
588 }
589
590 case 0xB9: // map (two-byte uint16_t for n follow)
591 {
592 std::uint16_t len;
593 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len));
594 }
595
596 case 0xBA: // map (four-byte uint32_t for n follow)
597 {
598 std::uint32_t len;
599 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len));
600 }
601
602 case 0xBB: // map (eight-byte uint64_t for n follow)
603 {
604 std::uint64_t len;
605 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len));
606 }
607
608 case 0xBF: // map (indefinite length)
609 return get_cbor_object(std::size_t(-1));
610
611 case 0xF4: // false
612 return sax->boolean(false);
613
614 case 0xF5: // true
615 return sax->boolean(true);
616
617 case 0xF6: // null
618 return sax->null();
619
620 case 0xF9: // Half-Precision Float (two-byte IEEE 754)
621 {
622 const int byte1_raw = get();
623 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number")))
624 {
625 return false;
626 }
627 const int byte2_raw = get();
628 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number")))
629 {
630 return false;
631 }
632
633 const auto byte1 = static_cast<unsigned char>(byte1_raw);
634 const auto byte2 = static_cast<unsigned char>(byte2_raw);
635
636 // code from RFC 7049, Appendix D, Figure 3:
637 // As half-precision floating-point numbers were only added
638 // to IEEE 754 in 2008, today's programming platforms often
639 // still only have limited support for them. It is very
640 // easy to include at least decoding support for them even
641 // without such support. An example of a small decoder for
642 // half-precision floating-point numbers in the C language
643 // is shown in Fig. 3.
644 const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
645 const double val = [&half]
646 {
647 const int exp = (half >> 10u) & 0x1Fu;
648 const unsigned int mant = half & 0x3FFu;
649 assert(0 <= exp and exp <= 32);
650 assert(mant <= 1024);
651 switch (exp)
652 {
653 case 0:
654 return std::ldexp(mant, -24);
655 case 31:
656 return (mant == 0)
657 ? std::numeric_limits<double>::infinity()
658 : std::numeric_limits<double>::quiet_NaN();
659 default:
660 return std::ldexp(mant + 1024, exp - 25);
661 }
662 }();
663 return sax->number_float((half & 0x8000u) != 0
664 ? static_cast<number_float_t>(-val)
665 : static_cast<number_float_t>(val), "");
666 }
667
668 case 0xFA: // Single-Precision Float (four-byte IEEE 754)
669 {
670 float number;
671 return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), "");
672 }
673
674 case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
675 {
676 double number;
677 return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), "");
678 }
679
680 default: // anything else (0xFF is handled inside the other types)
681 {
682 auto last_token = get_token_string();
683 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value")));
684 }
685 }
686 }
687
688 /*!
689 @brief reads a CBOR string
690
691 This function first reads starting bytes to determine the expected
692 string length and then copies this number of bytes into a string.
693 Additionally, CBOR's strings with indefinite lengths are supported.
694
695 @param[out] result created string
696
697 @return whether string creation completed
698 */
699 bool get_cbor_string(string_t& result)
700 {
701 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string")))
702 {
703 return false;
704 }
705
706 switch (current)
707 {
708 // UTF-8 string (0x00..0x17 bytes follow)
709 case 0x60:
710 case 0x61:
711 case 0x62:
712 case 0x63:
713 case 0x64:
714 case 0x65:
715 case 0x66:
716 case 0x67:
717 case 0x68:
718 case 0x69:
719 case 0x6A:
720 case 0x6B:
721 case 0x6C:
722 case 0x6D:
723 case 0x6E:
724 case 0x6F:
725 case 0x70:
726 case 0x71:
727 case 0x72:
728 case 0x73:
729 case 0x74:
730 case 0x75:
731 case 0x76:
732 case 0x77:
733 {
734 return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
735 }
736
737 case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
738 {
739 std::uint8_t len;
740 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
741 }
742
743 case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
744 {
745 std::uint16_t len;
746 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
747 }
748
749 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
750 {
751 std::uint32_t len;
752 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
753 }
754
755 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
756 {
757 std::uint64_t len;
758 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
759 }
760
761 case 0x7F: // UTF-8 string (indefinite length)
762 {
763 while (get() != 0xFF)
764 {
765 string_t chunk;
766 if (not get_cbor_string(chunk))
767 {
768 return false;
769 }
770 result.append(chunk);
771 }
772 return true;
773 }
774
775 default:
776 {
777 auto last_token = get_token_string();
778 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
779 }
780 }
781 }
782
783 /*!
784 @param[in] len the length of the array or std::size_t(-1) for an
785 array of indefinite size
786 @return whether array creation completed
787 */
788 bool get_cbor_array(const std::size_t len)
789 {
790 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(len)))
791 {
792 return false;
793 }
794
795 if (len != std::size_t(-1))
796 {
797 for (std::size_t i = 0; i < len; ++i)
798 {
799 if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal()))
800 {
801 return false;
802 }
803 }
804 }
805 else
806 {
807 while (get() != 0xFF)
808 {
809 if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal(false)))
810 {
811 return false;
812 }
813 }
814 }
815
816 return sax->end_array();
817 }
818
819 /*!
820 @param[in] len the length of the object or std::size_t(-1) for an
821 object of indefinite size
822 @return whether object creation completed
823 */
824 bool get_cbor_object(const std::size_t len)
825 {
826 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(len)))
827 {
828 return false;
829 }
830
831 string_t key;
832 if (len != std::size_t(-1))
833 {
834 for (std::size_t i = 0; i < len; ++i)
835 {
836 get();
837 if (JSON_HEDLEY_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
838 {
839 return false;
840 }
841
842 if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal()))
843 {
844 return false;
845 }
846 key.clear();
847 }
848 }
849 else
850 {
851 while (get() != 0xFF)
852 {
853 if (JSON_HEDLEY_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
854 {
855 return false;
856 }
857
858 if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal()))
859 {
860 return false;
861 }
862 key.clear();
863 }
864 }
865
866 return sax->end_object();
867 }
868
869 /////////////
870 // MsgPack //
871 /////////////
872
873 /*!
874 @return whether a valid MessagePack value was passed to the SAX parser
875 */
876 bool parse_msgpack_internal()
877 {
878 switch (get())
879 {
880 // EOF
881 case std::char_traits<char>::eof():
882 return unexpect_eof(input_format_t::msgpack, "value");
883
884 // positive fixint
885 case 0x00:
886 case 0x01:
887 case 0x02:
888 case 0x03:
889 case 0x04:
890 case 0x05:
891 case 0x06:
892 case 0x07:
893 case 0x08:
894 case 0x09:
895 case 0x0A:
896 case 0x0B:
897 case 0x0C:
898 case 0x0D:
899 case 0x0E:
900 case 0x0F:
901 case 0x10:
902 case 0x11:
903 case 0x12:
904 case 0x13:
905 case 0x14:
906 case 0x15:
907 case 0x16:
908 case 0x17:
909 case 0x18:
910 case 0x19:
911 case 0x1A:
912 case 0x1B:
913 case 0x1C:
914 case 0x1D:
915 case 0x1E:
916 case 0x1F:
917 case 0x20:
918 case 0x21:
919 case 0x22:
920 case 0x23:
921 case 0x24:
922 case 0x25:
923 case 0x26:
924 case 0x27:
925 case 0x28:
926 case 0x29:
927 case 0x2A:
928 case 0x2B:
929 case 0x2C:
930 case 0x2D:
931 case 0x2E:
932 case 0x2F:
933 case 0x30:
934 case 0x31:
935 case 0x32:
936 case 0x33:
937 case 0x34:
938 case 0x35:
939 case 0x36:
940 case 0x37:
941 case 0x38:
942 case 0x39:
943 case 0x3A:
944 case 0x3B:
945 case 0x3C:
946 case 0x3D:
947 case 0x3E:
948 case 0x3F:
949 case 0x40:
950 case 0x41:
951 case 0x42:
952 case 0x43:
953 case 0x44:
954 case 0x45:
955 case 0x46:
956 case 0x47:
957 case 0x48:
958 case 0x49:
959 case 0x4A:
960 case 0x4B:
961 case 0x4C:
962 case 0x4D:
963 case 0x4E:
964 case 0x4F:
965 case 0x50:
966 case 0x51:
967 case 0x52:
968 case 0x53:
969 case 0x54:
970 case 0x55:
971 case 0x56:
972 case 0x57:
973 case 0x58:
974 case 0x59:
975 case 0x5A:
976 case 0x5B:
977 case 0x5C:
978 case 0x5D:
979 case 0x5E:
980 case 0x5F:
981 case 0x60:
982 case 0x61:
983 case 0x62:
984 case 0x63:
985 case 0x64:
986 case 0x65:
987 case 0x66:
988 case 0x67:
989 case 0x68:
990 case 0x69:
991 case 0x6A:
992 case 0x6B:
993 case 0x6C:
994 case 0x6D:
995 case 0x6E:
996 case 0x6F:
997 case 0x70:
998 case 0x71:
999 case 0x72:
1000 case 0x73:
1001 case 0x74:
1002 case 0x75:
1003 case 0x76:
1004 case 0x77:
1005 case 0x78:
1006 case 0x79:
1007 case 0x7A:
1008 case 0x7B:
1009 case 0x7C:
1010 case 0x7D:
1011 case 0x7E:
1012 case 0x7F:
1013 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
1014
1015 // fixmap
1016 case 0x80:
1017 case 0x81:
1018 case 0x82:
1019 case 0x83:
1020 case 0x84:
1021 case 0x85:
1022 case 0x86:
1023 case 0x87:
1024 case 0x88:
1025 case 0x89:
1026 case 0x8A:
1027 case 0x8B:
1028 case 0x8C:
1029 case 0x8D:
1030 case 0x8E:
1031 case 0x8F:
1032 return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1033
1034 // fixarray
1035 case 0x90:
1036 case 0x91:
1037 case 0x92:
1038 case 0x93:
1039 case 0x94:
1040 case 0x95:
1041 case 0x96:
1042 case 0x97:
1043 case 0x98:
1044 case 0x99:
1045 case 0x9A:
1046 case 0x9B:
1047 case 0x9C:
1048 case 0x9D:
1049 case 0x9E:
1050 case 0x9F:
1051 return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1052
1053 // fixstr
1054 case 0xA0:
1055 case 0xA1:
1056 case 0xA2:
1057 case 0xA3:
1058 case 0xA4:
1059 case 0xA5:
1060 case 0xA6:
1061 case 0xA7:
1062 case 0xA8:
1063 case 0xA9:
1064 case 0xAA:
1065 case 0xAB:
1066 case 0xAC:
1067 case 0xAD:
1068 case 0xAE:
1069 case 0xAF:
1070 case 0xB0:
1071 case 0xB1:
1072 case 0xB2:
1073 case 0xB3:
1074 case 0xB4:
1075 case 0xB5:
1076 case 0xB6:
1077 case 0xB7:
1078 case 0xB8:
1079 case 0xB9:
1080 case 0xBA:
1081 case 0xBB:
1082 case 0xBC:
1083 case 0xBD:
1084 case 0xBE:
1085 case 0xBF:
1086 case 0xD9: // str 8
1087 case 0xDA: // str 16
1088 case 0xDB: // str 32
1089 {
1090 string_t s;
1091 return get_msgpack_string(s) and sax->string(s);
1092 }
1093
1094 case 0xC0: // nil
1095 return sax->null();
1096
1097 case 0xC2: // false
1098 return sax->boolean(false);
1099
1100 case 0xC3: // true
1101 return sax->boolean(true);
1102
1103 case 0xCA: // float 32
1104 {
1105 float number;
1106 return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
1107 }
1108
1109 case 0xCB: // float 64
1110 {
1111 double number;
1112 return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
1113 }
1114
1115 case 0xCC: // uint 8
1116 {
1117 std::uint8_t number;
1118 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1119 }
1120
1121 case 0xCD: // uint 16
1122 {
1123 std::uint16_t number;
1124 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1125 }
1126
1127 case 0xCE: // uint 32
1128 {
1129 std::uint32_t number;
1130 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1131 }
1132
1133 case 0xCF: // uint 64
1134 {
1135 std::uint64_t number;
1136 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1137 }
1138
1139 case 0xD0: // int 8
1140 {
1141 std::int8_t number;
1142 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1143 }
1144
1145 case 0xD1: // int 16
1146 {
1147 std::int16_t number;
1148 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1149 }
1150
1151 case 0xD2: // int 32
1152 {
1153 std::int32_t number;
1154 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1155 }
1156
1157 case 0xD3: // int 64
1158 {
1159 std::int64_t number;
1160 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1161 }
1162
1163 case 0xDC: // array 16
1164 {
1165 std::uint16_t len;
1166 return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
1167 }
1168
1169 case 0xDD: // array 32
1170 {
1171 std::uint32_t len;
1172 return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
1173 }
1174
1175 case 0xDE: // map 16
1176 {
1177 std::uint16_t len;
1178 return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
1179 }
1180
1181 case 0xDF: // map 32
1182 {
1183 std::uint32_t len;
1184 return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
1185 }
1186
1187 // negative fixint
1188 case 0xE0:
1189 case 0xE1:
1190 case 0xE2:
1191 case 0xE3:
1192 case 0xE4:
1193 case 0xE5:
1194 case 0xE6:
1195 case 0xE7:
1196 case 0xE8:
1197 case 0xE9:
1198 case 0xEA:
1199 case 0xEB:
1200 case 0xEC:
1201 case 0xED:
1202 case 0xEE:
1203 case 0xEF:
1204 case 0xF0:
1205 case 0xF1:
1206 case 0xF2:
1207 case 0xF3:
1208 case 0xF4:
1209 case 0xF5:
1210 case 0xF6:
1211 case 0xF7:
1212 case 0xF8:
1213 case 0xF9:
1214 case 0xFA:
1215 case 0xFB:
1216 case 0xFC:
1217 case 0xFD:
1218 case 0xFE:
1219 case 0xFF:
1220 return sax->number_integer(static_cast<std::int8_t>(current));
1221
1222 default: // anything else
1223 {
1224 auto last_token = get_token_string();
1225 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value")));
1226 }
1227 }
1228 }
1229
1230 /*!
1231 @brief reads a MessagePack string
1232
1233 This function first reads starting bytes to determine the expected
1234 string length and then copies this number of bytes into a string.
1235
1236 @param[out] result created string
1237
1238 @return whether string creation completed
1239 */
1240 bool get_msgpack_string(string_t& result)
1241 {
1242 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::msgpack, "string")))
1243 {
1244 return false;
1245 }
1246
1247 switch (current)
1248 {
1249 // fixstr
1250 case 0xA0:
1251 case 0xA1:
1252 case 0xA2:
1253 case 0xA3:
1254 case 0xA4:
1255 case 0xA5:
1256 case 0xA6:
1257 case 0xA7:
1258 case 0xA8:
1259 case 0xA9:
1260 case 0xAA:
1261 case 0xAB:
1262 case 0xAC:
1263 case 0xAD:
1264 case 0xAE:
1265 case 0xAF:
1266 case 0xB0:
1267 case 0xB1:
1268 case 0xB2:
1269 case 0xB3:
1270 case 0xB4:
1271 case 0xB5:
1272 case 0xB6:
1273 case 0xB7:
1274 case 0xB8:
1275 case 0xB9:
1276 case 0xBA:
1277 case 0xBB:
1278 case 0xBC:
1279 case 0xBD:
1280 case 0xBE:
1281 case 0xBF:
1282 {
1283 return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
1284 }
1285
1286 case 0xD9: // str 8
1287 {
1288 std::uint8_t len;
1289 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result);
1290 }
1291
1292 case 0xDA: // str 16
1293 {
1294 std::uint16_t len;
1295 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result);
1296 }
1297
1298 case 0xDB: // str 32
1299 {
1300 std::uint32_t len;
1301 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result);
1302 }
1303
1304 default:
1305 {
1306 auto last_token = get_token_string();
1307 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string")));
1308 }
1309 }
1310 }
1311
1312 /*!
1313 @param[in] len the length of the array
1314 @return whether array creation completed
1315 */
1316 bool get_msgpack_array(const std::size_t len)
1317 {
1318 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(len)))
1319 {
1320 return false;
1321 }
1322
1323 for (std::size_t i = 0; i < len; ++i)
1324 {
1325 if (JSON_HEDLEY_UNLIKELY(not parse_msgpack_internal()))
1326 {
1327 return false;
1328 }
1329 }
1330
1331 return sax->end_array();
1332 }
1333
1334 /*!
1335 @param[in] len the length of the object
1336 @return whether object creation completed
1337 */
1338 bool get_msgpack_object(const std::size_t len)
1339 {
1340 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(len)))
1341 {
1342 return false;
1343 }
1344
1345 string_t key;
1346 for (std::size_t i = 0; i < len; ++i)
1347 {
1348 get();
1349 if (JSON_HEDLEY_UNLIKELY(not get_msgpack_string(key) or not sax->key(key)))
1350 {
1351 return false;
1352 }
1353
1354 if (JSON_HEDLEY_UNLIKELY(not parse_msgpack_internal()))
1355 {
1356 return false;
1357 }
1358 key.clear();
1359 }
1360
1361 return sax->end_object();
1362 }
1363
1364 ////////////
1365 // UBJSON //
1366 ////////////
1367
1368 /*!
1369 @param[in] get_char whether a new character should be retrieved from the
1370 input (true, default) or whether the last read
1371 character should be considered instead
1372
1373 @return whether a valid UBJSON value was passed to the SAX parser
1374 */
1375 bool parse_ubjson_internal(const bool get_char = true)
1376 {
1377 return get_ubjson_value(get_char ? get_ignore_noop() : current);
1378 }
1379
1380 /*!
1381 @brief reads a UBJSON string
1382
1383 This function is either called after reading the 'S' byte explicitly
1384 indicating a string, or in case of an object key where the 'S' byte can be
1385 left out.
1386
1387 @param[out] result created string
1388 @param[in] get_char whether a new character should be retrieved from the
1389 input (true, default) or whether the last read
1390 character should be considered instead
1391
1392 @return whether string creation completed
1393 */
1394 bool get_ubjson_string(string_t& result, const bool get_char = true)
1395 {
1396 if (get_char)
1397 {
1398 get(); // TODO(niels): may we ignore N here?
1399 }
1400
1401 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value")))
1402 {
1403 return false;
1404 }
1405
1406 switch (current)
1407 {
1408 case 'U':
1409 {
1410 std::uint8_t len;
1411 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1412 }
1413
1414 case 'i':
1415 {
1416 std::int8_t len;
1417 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1418 }
1419
1420 case 'I':
1421 {
1422 std::int16_t len;
1423 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1424 }
1425
1426 case 'l':
1427 {
1428 std::int32_t len;
1429 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1430 }
1431
1432 case 'L':
1433 {
1434 std::int64_t len;
1435 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1436 }
1437
1438 default:
1439 auto last_token = get_token_string();
1440 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string")));
1441 }
1442 }
1443
1444 /*!
1445 @param[out] result determined size
1446 @return whether size determination completed
1447 */
1448 bool get_ubjson_size_value(std::size_t& result)
1449 {
1450 switch (get_ignore_noop())
1451 {
1452 case 'U':
1453 {
1454 std::uint8_t number;
1455 if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1456 {
1457 return false;
1458 }
1459 result = static_cast<std::size_t>(number);
1460 return true;
1461 }
1462
1463 case 'i':
1464 {
1465 std::int8_t number;
1466 if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1467 {
1468 return false;
1469 }
1470 result = static_cast<std::size_t>(number);
1471 return true;
1472 }
1473
1474 case 'I':
1475 {
1476 std::int16_t number;
1477 if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1478 {
1479 return false;
1480 }
1481 result = static_cast<std::size_t>(number);
1482 return true;
1483 }
1484
1485 case 'l':
1486 {
1487 std::int32_t number;
1488 if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1489 {
1490 return false;
1491 }
1492 result = static_cast<std::size_t>(number);
1493 return true;
1494 }
1495
1496 case 'L':
1497 {
1498 std::int64_t number;
1499 if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1500 {
1501 return false;
1502 }
1503 result = static_cast<std::size_t>(number);
1504 return true;
1505 }
1506
1507 default:
1508 {
1509 auto last_token = get_token_string();
1510 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size")));
1511 }
1512 }
1513 }
1514
1515 /*!
1516 @brief determine the type and size for a container
1517
1518 In the optimized UBJSON format, a type and a size can be provided to allow
1519 for a more compact representation.
1520
1521 @param[out] result pair of the size and the type
1522
1523 @return whether pair creation completed
1524 */
1525 bool get_ubjson_size_type(std::pair<std::size_t, int>& result)
1526 {
1527 result.first = string_t::npos; // size
1528 result.second = 0; // type
1529
1530 get_ignore_noop();
1531
1532 if (current == '$')
1533 {
1534 result.second = get(); // must not ignore 'N', because 'N' maybe the type
1535 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "type")))
1536 {
1537 return false;
1538 }
1539
1540 get_ignore_noop();
1541 if (JSON_HEDLEY_UNLIKELY(current != '#'))
1542 {
1543 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value")))
1544 {
1545 return false;
1546 }
1547 auto last_token = get_token_string();
1548 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size")));
1549 }
1550
1551 return get_ubjson_size_value(result.first);
1552 }
1553
1554 if (current == '#')
1555 {
1556 return get_ubjson_size_value(result.first);
1557 }
1558
1559 return true;
1560 }
1561
1562 /*!
1563 @param prefix the previously read or set type prefix
1564 @return whether value creation completed
1565 */
1566 bool get_ubjson_value(const int prefix)
1567 {
1568 switch (prefix)
1569 {
1570 case std::char_traits<char>::eof(): // EOF
1571 return unexpect_eof(input_format_t::ubjson, "value");
1572
1573 case 'T': // true
1574 return sax->boolean(true);
1575 case 'F': // false
1576 return sax->boolean(false);
1577
1578 case 'Z': // null
1579 return sax->null();
1580
1581 case 'U':
1582 {
1583 std::uint8_t number;
1584 return get_number(input_format_t::ubjson, number) and sax->number_unsigned(number);
1585 }
1586
1587 case 'i':
1588 {
1589 std::int8_t number;
1590 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1591 }
1592
1593 case 'I':
1594 {
1595 std::int16_t number;
1596 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1597 }
1598
1599 case 'l':
1600 {
1601 std::int32_t number;
1602 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1603 }
1604
1605 case 'L':
1606 {
1607 std::int64_t number;
1608 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1609 }
1610
1611 case 'd':
1612 {
1613 float number;
1614 return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), "");
1615 }
1616
1617 case 'D':
1618 {
1619 double number;
1620 return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), "");
1621 }
1622
1623 case 'C': // char
1624 {
1625 get();
1626 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "char")))
1627 {
1628 return false;
1629 }
1630 if (JSON_HEDLEY_UNLIKELY(current > 127))
1631 {
1632 auto last_token = get_token_string();
1633 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char")));
1634 }
1635 string_t s(1, static_cast<char>(current));
1636 return sax->string(s);
1637 }
1638
1639 case 'S': // string
1640 {
1641 string_t s;
1642 return get_ubjson_string(s) and sax->string(s);
1643 }
1644
1645 case '[': // array
1646 return get_ubjson_array();
1647
1648 case '{': // object
1649 return get_ubjson_object();
1650
1651 default: // anything else
1652 {
1653 auto last_token = get_token_string();
1654 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value")));
1655 }
1656 }
1657 }
1658
1659 /*!
1660 @return whether array creation completed
1661 */
1662 bool get_ubjson_array()
1663 {
1664 std::pair<std::size_t, int> size_and_type;
1665 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_size_type(size_and_type)))
1666 {
1667 return false;
1668 }
1669
1670 if (size_and_type.first != string_t::npos)
1671 {
1672 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(size_and_type.first)))
1673 {
1674 return false;
1675 }
1676
1677 if (size_and_type.second != 0)
1678 {
1679 if (size_and_type.second != 'N')
1680 {
1681 for (std::size_t i = 0; i < size_and_type.first; ++i)
1682 {
1683 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_value(size_and_type.second)))
1684 {
1685 return false;
1686 }
1687 }
1688 }
1689 }
1690 else
1691 {
1692 for (std::size_t i = 0; i < size_and_type.first; ++i)
1693 {
1694 if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal()))
1695 {
1696 return false;
1697 }
1698 }
1699 }
1700 }
1701 else
1702 {
1703 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(std::size_t(-1))))
1704 {
1705 return false;
1706 }
1707
1708 while (current != ']')
1709 {
1710 if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal(false)))
1711 {
1712 return false;
1713 }
1714 get_ignore_noop();
1715 }
1716 }
1717
1718 return sax->end_array();
1719 }
1720
1721 /*!
1722 @return whether object creation completed
1723 */
1724 bool get_ubjson_object()
1725 {
1726 std::pair<std::size_t, int> size_and_type;
1727 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_size_type(size_and_type)))
1728 {
1729 return false;
1730 }
1731
1732 string_t key;
1733 if (size_and_type.first != string_t::npos)
1734 {
1735 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(size_and_type.first)))
1736 {
1737 return false;
1738 }
1739
1740 if (size_and_type.second != 0)
1741 {
1742 for (std::size_t i = 0; i < size_and_type.first; ++i)
1743 {
1744 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_string(key) or not sax->key(key)))
1745 {
1746 return false;
1747 }
1748 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_value(size_and_type.second)))
1749 {
1750 return false;
1751 }
1752 key.clear();
1753 }
1754 }
1755 else
1756 {
1757 for (std::size_t i = 0; i < size_and_type.first; ++i)
1758 {
1759 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_string(key) or not sax->key(key)))
1760 {
1761 return false;
1762 }
1763 if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal()))
1764 {
1765 return false;
1766 }
1767 key.clear();
1768 }
1769 }
1770 }
1771 else
1772 {
1773 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(std::size_t(-1))))
1774 {
1775 return false;
1776 }
1777
1778 while (current != '}')
1779 {
1780 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key)))
1781 {
1782 return false;
1783 }
1784 if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal()))
1785 {
1786 return false;
1787 }
1788 get_ignore_noop();
1789 key.clear();
1790 }
1791 }
1792
1793 return sax->end_object();
1794 }
1795
1796 ///////////////////////
1797 // Utility functions //
1798 ///////////////////////
1799
1800 /*!
1801 @brief get next character from the input
1802
1803 This function provides the interface to the used input adapter. It does
1804 not throw in case the input reached EOF, but returns a -'ve valued
1805 `std::char_traits<char>::eof()` in that case.
1806
1807 @return character read from the input
1808 */
1809 int get()
1810 {
1811 ++chars_read;
1812 return current = ia->get_character();
1813 }
1814
1815 /*!
1816 @return character read from the input after ignoring all 'N' entries
1817 */
1818 int get_ignore_noop()
1819 {
1820 do
1821 {
1822 get();
1823 }
1824 while (current == 'N');
1825
1826 return current;
1827 }
1828
1829 /*
1830 @brief read a number from the input
1831
1832 @tparam NumberType the type of the number
1833 @param[in] format the current format (for diagnostics)
1834 @param[out] result number of type @a NumberType
1835
1836 @return whether conversion completed
1837
1838 @note This function needs to respect the system's endianess, because
1839 bytes in CBOR, MessagePack, and UBJSON are stored in network order
1840 (big endian) and therefore need reordering on little endian systems.
1841 */
1842 template<typename NumberType, bool InputIsLittleEndian = false>
1843 bool get_number(const input_format_t format, NumberType& result)
1844 {
1845 // step 1: read input into array with system's byte order
1846 std::array<std::uint8_t, sizeof(NumberType)> vec;
1847 for (std::size_t i = 0; i < sizeof(NumberType); ++i)
1848 {
1849 get();
1850 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(format, "number")))
1851 {
1852 return false;
1853 }
1854
1855 // reverse byte order prior to conversion if necessary
1856 if (is_little_endian != InputIsLittleEndian)
1857 {
1858 vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
1859 }
1860 else
1861 {
1862 vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
1863 }
1864 }
1865
1866 // step 2: convert array into number of type T and return
1867 std::memcpy(&result, vec.data(), sizeof(NumberType));
1868 return true;
1869 }
1870
1871 /*!
1872 @brief create a string by reading characters from the input
1873
1874 @tparam NumberType the type of the number
1875 @param[in] format the current format (for diagnostics)
1876 @param[in] len number of characters to read
1877 @param[out] result string created by reading @a len bytes
1878
1879 @return whether string creation completed
1880
1881 @note We can not reserve @a len bytes for the result, because @a len
1882 may be too large. Usually, @ref unexpect_eof() detects the end of
1883 the input before we run out of string memory.
1884 */
1885 template<typename NumberType>
1886 bool get_string(const input_format_t format,
1887 const NumberType len,
1888 string_t& result)
1889 {
1890 bool success = true;
1891 std::generate_n(std::back_inserter(result), len, [this, &success, &format]()
1892 {
1893 get();
1894 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(format, "string")))
1895 {
1896 success = false;
1897 }
1898 return static_cast<char>(current);
1899 });
1900 return success;
1901 }
1902
1903 /*!
1904 @param[in] format the current format (for diagnostics)
1905 @param[in] context further context information (for diagnostics)
1906 @return whether the last read character is not EOF
1907 */
1908 JSON_HEDLEY_NON_NULL(3)
1909 bool unexpect_eof(const input_format_t format, const char* context) const
1910 {
1911 if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char>::eof()))
1912 {
1913 return sax->parse_error(chars_read, "<end of file>",
1914 parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context)));
1915 }
1916 return true;
1917 }
1918
1919 /*!
1920 @return a string representation of the last read byte
1921 */
1922 std::string get_token_string() const
1923 {
1924 std::array<char, 3> cr{{}};
1925 (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current));
1926 return std::string{cr.data()};
1927 }
1928
1929 /*!
1930 @param[in] format the current format
1931 @param[in] detail a detailed error message
1932 @param[in] context further context information
1933 @return a message string to use in the parse_error exceptions
1934 */
1935 std::string exception_message(const input_format_t format,
1936 const std::string& detail,
1937 const std::string& context) const
1938 {
1939 std::string error_msg = "syntax error while parsing ";
1940
1941 switch (format)
1942 {
1943 case input_format_t::cbor:
1944 error_msg += "CBOR";
1945 break;
1946
1947 case input_format_t::msgpack:
1948 error_msg += "MessagePack";
1949 break;
1950
1951 case input_format_t::ubjson:
1952 error_msg += "UBJSON";
1953 break;
1954
1955 case input_format_t::bson:
1956 error_msg += "BSON";
1957 break;
1958
1959 default: // LCOV_EXCL_LINE
1960 assert(false); // LCOV_EXCL_LINE
1961 }
1962
1963 return error_msg + " " + context + ": " + detail;
1964 }
1965
1966 private:
1967 /// input adapter
1968 input_adapter_t ia = nullptr;
1969
1970 /// the current character
1971 int current = std::char_traits<char>::eof();
1972
1973 /// the number of characters read
1974 std::size_t chars_read = 0;
1975
1976 /// whether we can assume little endianess
1977 const bool is_little_endian = little_endianess();
1978
1979 /// the SAX parser
1980 json_sax_t* sax = nullptr;
1981};
1982} // namespace detail
1983} // namespace nlohmann
1984