1 | #pragma once |
2 | |
3 | #include <algorithm> // generate_n |
4 | #include <array> // array |
5 | #include <cassert> // assert |
6 | #include <cmath> // ldexp |
7 | #include <cstddef> // size_t |
8 | #include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t |
9 | #include <cstdio> // snprintf |
10 | #include <cstring> // memcpy |
11 | #include <iterator> // back_inserter |
12 | #include <limits> // numeric_limits |
13 | #include <string> // char_traits, string |
14 | #include <utility> // make_pair, move |
15 | |
16 | #include <nlohmann/detail/exceptions.hpp> |
17 | #include <nlohmann/detail/input/input_adapters.hpp> |
18 | #include <nlohmann/detail/input/json_sax.hpp> |
19 | #include <nlohmann/detail/macro_scope.hpp> |
20 | #include <nlohmann/detail/meta/is_sax.hpp> |
21 | #include <nlohmann/detail/value_t.hpp> |
22 | |
23 | namespace nlohmann |
24 | { |
25 | namespace detail |
26 | { |
27 | /////////////////// |
28 | // binary reader // |
29 | /////////////////// |
30 | |
31 | /*! |
32 | @brief deserialization of CBOR, MessagePack, and UBJSON values |
33 | */ |
34 | template<typename BasicJsonType, typename SAX = json_sax_dom_parser<BasicJsonType>> |
35 | class binary_reader |
36 | { |
37 | using number_integer_t = typename BasicJsonType::number_integer_t; |
38 | using number_unsigned_t = typename BasicJsonType::number_unsigned_t; |
39 | using number_float_t = typename BasicJsonType::number_float_t; |
40 | using string_t = typename BasicJsonType::string_t; |
41 | using json_sax_t = SAX; |
42 | |
43 | public: |
44 | /*! |
45 | @brief create a binary reader |
46 | |
47 | @param[in] adapter input adapter to read from |
48 | */ |
49 | explicit binary_reader(input_adapter_t adapter) : ia(std::move(adapter)) |
50 | { |
51 | (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; |
52 | assert(ia); |
53 | } |
54 | |
55 | // make class move-only |
56 | binary_reader(const binary_reader&) = delete; |
57 | binary_reader(binary_reader&&) = default; |
58 | binary_reader& operator=(const binary_reader&) = delete; |
59 | binary_reader& operator=(binary_reader&&) = default; |
60 | ~binary_reader() = default; |
61 | |
62 | /*! |
63 | @param[in] format the binary format to parse |
64 | @param[in] sax_ a SAX event processor |
65 | @param[in] strict whether to expect the input to be consumed completed |
66 | |
67 | @return |
68 | */ |
69 | JSON_HEDLEY_NON_NULL(3) |
70 | bool sax_parse(const input_format_t format, |
71 | json_sax_t* sax_, |
72 | const bool strict = true) |
73 | { |
74 | sax = sax_; |
75 | bool result = false; |
76 | |
77 | switch (format) |
78 | { |
79 | case input_format_t::bson: |
80 | result = parse_bson_internal(); |
81 | break; |
82 | |
83 | case input_format_t::cbor: |
84 | result = parse_cbor_internal(); |
85 | break; |
86 | |
87 | case input_format_t::msgpack: |
88 | result = parse_msgpack_internal(); |
89 | break; |
90 | |
91 | case input_format_t::ubjson: |
92 | result = parse_ubjson_internal(); |
93 | break; |
94 | |
95 | default: // LCOV_EXCL_LINE |
96 | assert(false); // LCOV_EXCL_LINE |
97 | } |
98 | |
99 | // strict mode: next byte must be EOF |
100 | if (result and strict) |
101 | { |
102 | if (format == input_format_t::ubjson) |
103 | { |
104 | get_ignore_noop(); |
105 | } |
106 | else |
107 | { |
108 | get(); |
109 | } |
110 | |
111 | if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char>::eof())) |
112 | { |
113 | return sax->parse_error(chars_read, get_token_string(), |
114 | parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value" ))); |
115 | } |
116 | } |
117 | |
118 | return result; |
119 | } |
120 | |
121 | /*! |
122 | @brief determine system byte order |
123 | |
124 | @return true if and only if system's byte order is little endian |
125 | |
126 | @note from http://stackoverflow.com/a/1001328/266378 |
127 | */ |
128 | static constexpr bool little_endianess(int num = 1) noexcept |
129 | { |
130 | return *reinterpret_cast<char*>(&num) == 1; |
131 | } |
132 | |
133 | private: |
134 | ////////// |
135 | // BSON // |
136 | ////////// |
137 | |
138 | /*! |
139 | @brief Reads in a BSON-object and passes it to the SAX-parser. |
140 | @return whether a valid BSON-value was passed to the SAX parser |
141 | */ |
142 | bool parse_bson_internal() |
143 | { |
144 | std::int32_t document_size; |
145 | get_number<std::int32_t, true>(input_format_t::bson, document_size); |
146 | |
147 | if (JSON_HEDLEY_UNLIKELY(not sax->start_object(std::size_t(-1)))) |
148 | { |
149 | return false; |
150 | } |
151 | |
152 | if (JSON_HEDLEY_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) |
153 | { |
154 | return false; |
155 | } |
156 | |
157 | return sax->end_object(); |
158 | } |
159 | |
160 | /*! |
161 | @brief Parses a C-style string from the BSON input. |
162 | @param[in, out] result A reference to the string variable where the read |
163 | string is to be stored. |
164 | @return `true` if the \x00-byte indicating the end of the string was |
165 | encountered before the EOF; false` indicates an unexpected EOF. |
166 | */ |
167 | bool get_bson_cstr(string_t& result) |
168 | { |
169 | auto out = std::back_inserter(result); |
170 | while (true) |
171 | { |
172 | get(); |
173 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring" ))) |
174 | { |
175 | return false; |
176 | } |
177 | if (current == 0x00) |
178 | { |
179 | return true; |
180 | } |
181 | *out++ = static_cast<char>(current); |
182 | } |
183 | |
184 | return true; |
185 | } |
186 | |
187 | /*! |
188 | @brief Parses a zero-terminated string of length @a len from the BSON |
189 | input. |
190 | @param[in] len The length (including the zero-byte at the end) of the |
191 | string to be read. |
192 | @param[in, out] result A reference to the string variable where the read |
193 | string is to be stored. |
194 | @tparam NumberType The type of the length @a len |
195 | @pre len >= 1 |
196 | @return `true` if the string was successfully parsed |
197 | */ |
198 | template<typename NumberType> |
199 | bool get_bson_string(const NumberType len, string_t& result) |
200 | { |
201 | if (JSON_HEDLEY_UNLIKELY(len < 1)) |
202 | { |
203 | auto last_token = get_token_string(); |
204 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string" ))); |
205 | } |
206 | |
207 | return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof(); |
208 | } |
209 | |
210 | /*! |
211 | @brief Read a BSON document element of the given @a element_type. |
212 | @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html |
213 | @param[in] element_type_parse_position The position in the input stream, |
214 | where the `element_type` was read. |
215 | @warning Not all BSON element types are supported yet. An unsupported |
216 | @a element_type will give rise to a parse_error.114: |
217 | Unsupported BSON record type 0x... |
218 | @return whether a valid BSON-object/array was passed to the SAX parser |
219 | */ |
220 | bool parse_bson_element_internal(const int element_type, |
221 | const std::size_t element_type_parse_position) |
222 | { |
223 | switch (element_type) |
224 | { |
225 | case 0x01: // double |
226 | { |
227 | double number; |
228 | return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), "" ); |
229 | } |
230 | |
231 | case 0x02: // string |
232 | { |
233 | std::int32_t len; |
234 | string_t value; |
235 | return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); |
236 | } |
237 | |
238 | case 0x03: // object |
239 | { |
240 | return parse_bson_internal(); |
241 | } |
242 | |
243 | case 0x04: // array |
244 | { |
245 | return parse_bson_array(); |
246 | } |
247 | |
248 | case 0x08: // boolean |
249 | { |
250 | return sax->boolean(get() != 0); |
251 | } |
252 | |
253 | case 0x0A: // null |
254 | { |
255 | return sax->null(); |
256 | } |
257 | |
258 | case 0x10: // int32 |
259 | { |
260 | std::int32_t value; |
261 | return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(value); |
262 | } |
263 | |
264 | case 0x12: // int64 |
265 | { |
266 | std::int64_t value; |
267 | return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(value); |
268 | } |
269 | |
270 | default: // anything else not supported (yet) |
271 | { |
272 | std::array<char, 3> cr{{}}; |
273 | (std::snprintf)(cr.data(), cr.size(), "%.2hhX" , static_cast<unsigned char>(element_type)); |
274 | return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data()))); |
275 | } |
276 | } |
277 | } |
278 | |
279 | /*! |
280 | @brief Read a BSON element list (as specified in the BSON-spec) |
281 | |
282 | The same binary layout is used for objects and arrays, hence it must be |
283 | indicated with the argument @a is_array which one is expected |
284 | (true --> array, false --> object). |
285 | |
286 | @param[in] is_array Determines if the element list being read is to be |
287 | treated as an object (@a is_array == false), or as an |
288 | array (@a is_array == true). |
289 | @return whether a valid BSON-object/array was passed to the SAX parser |
290 | */ |
291 | bool parse_bson_element_list(const bool is_array) |
292 | { |
293 | string_t key; |
294 | while (int element_type = get()) |
295 | { |
296 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list" ))) |
297 | { |
298 | return false; |
299 | } |
300 | |
301 | const std::size_t element_type_parse_position = chars_read; |
302 | if (JSON_HEDLEY_UNLIKELY(not get_bson_cstr(key))) |
303 | { |
304 | return false; |
305 | } |
306 | |
307 | if (not is_array and not sax->key(key)) |
308 | { |
309 | return false; |
310 | } |
311 | |
312 | if (JSON_HEDLEY_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) |
313 | { |
314 | return false; |
315 | } |
316 | |
317 | // get_bson_cstr only appends |
318 | key.clear(); |
319 | } |
320 | |
321 | return true; |
322 | } |
323 | |
324 | /*! |
325 | @brief Reads an array from the BSON input and passes it to the SAX-parser. |
326 | @return whether a valid BSON-array was passed to the SAX parser |
327 | */ |
328 | bool parse_bson_array() |
329 | { |
330 | std::int32_t document_size; |
331 | get_number<std::int32_t, true>(input_format_t::bson, document_size); |
332 | |
333 | if (JSON_HEDLEY_UNLIKELY(not sax->start_array(std::size_t(-1)))) |
334 | { |
335 | return false; |
336 | } |
337 | |
338 | if (JSON_HEDLEY_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) |
339 | { |
340 | return false; |
341 | } |
342 | |
343 | return sax->end_array(); |
344 | } |
345 | |
346 | ////////// |
347 | // CBOR // |
348 | ////////// |
349 | |
350 | /*! |
351 | @param[in] get_char whether a new character should be retrieved from the |
352 | input (true, default) or whether the last read |
353 | character should be considered instead |
354 | |
355 | @return whether a valid CBOR value was passed to the SAX parser |
356 | */ |
357 | bool parse_cbor_internal(const bool get_char = true) |
358 | { |
359 | switch (get_char ? get() : current) |
360 | { |
361 | // EOF |
362 | case std::char_traits<char>::eof(): |
363 | return unexpect_eof(input_format_t::cbor, "value" ); |
364 | |
365 | // Integer 0x00..0x17 (0..23) |
366 | case 0x00: |
367 | case 0x01: |
368 | case 0x02: |
369 | case 0x03: |
370 | case 0x04: |
371 | case 0x05: |
372 | case 0x06: |
373 | case 0x07: |
374 | case 0x08: |
375 | case 0x09: |
376 | case 0x0A: |
377 | case 0x0B: |
378 | case 0x0C: |
379 | case 0x0D: |
380 | case 0x0E: |
381 | case 0x0F: |
382 | case 0x10: |
383 | case 0x11: |
384 | case 0x12: |
385 | case 0x13: |
386 | case 0x14: |
387 | case 0x15: |
388 | case 0x16: |
389 | case 0x17: |
390 | return sax->number_unsigned(static_cast<number_unsigned_t>(current)); |
391 | |
392 | case 0x18: // Unsigned integer (one-byte uint8_t follows) |
393 | { |
394 | std::uint8_t number; |
395 | return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); |
396 | } |
397 | |
398 | case 0x19: // Unsigned integer (two-byte uint16_t follows) |
399 | { |
400 | std::uint16_t number; |
401 | return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); |
402 | } |
403 | |
404 | case 0x1A: // Unsigned integer (four-byte uint32_t follows) |
405 | { |
406 | std::uint32_t number; |
407 | return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); |
408 | } |
409 | |
410 | case 0x1B: // Unsigned integer (eight-byte uint64_t follows) |
411 | { |
412 | std::uint64_t number; |
413 | return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); |
414 | } |
415 | |
416 | // Negative integer -1-0x00..-1-0x17 (-1..-24) |
417 | case 0x20: |
418 | case 0x21: |
419 | case 0x22: |
420 | case 0x23: |
421 | case 0x24: |
422 | case 0x25: |
423 | case 0x26: |
424 | case 0x27: |
425 | case 0x28: |
426 | case 0x29: |
427 | case 0x2A: |
428 | case 0x2B: |
429 | case 0x2C: |
430 | case 0x2D: |
431 | case 0x2E: |
432 | case 0x2F: |
433 | case 0x30: |
434 | case 0x31: |
435 | case 0x32: |
436 | case 0x33: |
437 | case 0x34: |
438 | case 0x35: |
439 | case 0x36: |
440 | case 0x37: |
441 | return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current)); |
442 | |
443 | case 0x38: // Negative integer (one-byte uint8_t follows) |
444 | { |
445 | std::uint8_t number; |
446 | return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); |
447 | } |
448 | |
449 | case 0x39: // Negative integer -1-n (two-byte uint16_t follows) |
450 | { |
451 | std::uint16_t number; |
452 | return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); |
453 | } |
454 | |
455 | case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) |
456 | { |
457 | std::uint32_t number; |
458 | return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); |
459 | } |
460 | |
461 | case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) |
462 | { |
463 | std::uint64_t number; |
464 | return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) |
465 | - static_cast<number_integer_t>(number)); |
466 | } |
467 | |
468 | // UTF-8 string (0x00..0x17 bytes follow) |
469 | case 0x60: |
470 | case 0x61: |
471 | case 0x62: |
472 | case 0x63: |
473 | case 0x64: |
474 | case 0x65: |
475 | case 0x66: |
476 | case 0x67: |
477 | case 0x68: |
478 | case 0x69: |
479 | case 0x6A: |
480 | case 0x6B: |
481 | case 0x6C: |
482 | case 0x6D: |
483 | case 0x6E: |
484 | case 0x6F: |
485 | case 0x70: |
486 | case 0x71: |
487 | case 0x72: |
488 | case 0x73: |
489 | case 0x74: |
490 | case 0x75: |
491 | case 0x76: |
492 | case 0x77: |
493 | case 0x78: // UTF-8 string (one-byte uint8_t for n follows) |
494 | case 0x79: // UTF-8 string (two-byte uint16_t for n follow) |
495 | case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) |
496 | case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) |
497 | case 0x7F: // UTF-8 string (indefinite length) |
498 | { |
499 | string_t s; |
500 | return get_cbor_string(s) and sax->string(s); |
501 | } |
502 | |
503 | // array (0x00..0x17 data items follow) |
504 | case 0x80: |
505 | case 0x81: |
506 | case 0x82: |
507 | case 0x83: |
508 | case 0x84: |
509 | case 0x85: |
510 | case 0x86: |
511 | case 0x87: |
512 | case 0x88: |
513 | case 0x89: |
514 | case 0x8A: |
515 | case 0x8B: |
516 | case 0x8C: |
517 | case 0x8D: |
518 | case 0x8E: |
519 | case 0x8F: |
520 | case 0x90: |
521 | case 0x91: |
522 | case 0x92: |
523 | case 0x93: |
524 | case 0x94: |
525 | case 0x95: |
526 | case 0x96: |
527 | case 0x97: |
528 | return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu)); |
529 | |
530 | case 0x98: // array (one-byte uint8_t for n follows) |
531 | { |
532 | std::uint8_t len; |
533 | return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); |
534 | } |
535 | |
536 | case 0x99: // array (two-byte uint16_t for n follow) |
537 | { |
538 | std::uint16_t len; |
539 | return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); |
540 | } |
541 | |
542 | case 0x9A: // array (four-byte uint32_t for n follow) |
543 | { |
544 | std::uint32_t len; |
545 | return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); |
546 | } |
547 | |
548 | case 0x9B: // array (eight-byte uint64_t for n follow) |
549 | { |
550 | std::uint64_t len; |
551 | return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); |
552 | } |
553 | |
554 | case 0x9F: // array (indefinite length) |
555 | return get_cbor_array(std::size_t(-1)); |
556 | |
557 | // map (0x00..0x17 pairs of data items follow) |
558 | case 0xA0: |
559 | case 0xA1: |
560 | case 0xA2: |
561 | case 0xA3: |
562 | case 0xA4: |
563 | case 0xA5: |
564 | case 0xA6: |
565 | case 0xA7: |
566 | case 0xA8: |
567 | case 0xA9: |
568 | case 0xAA: |
569 | case 0xAB: |
570 | case 0xAC: |
571 | case 0xAD: |
572 | case 0xAE: |
573 | case 0xAF: |
574 | case 0xB0: |
575 | case 0xB1: |
576 | case 0xB2: |
577 | case 0xB3: |
578 | case 0xB4: |
579 | case 0xB5: |
580 | case 0xB6: |
581 | case 0xB7: |
582 | return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu)); |
583 | |
584 | case 0xB8: // map (one-byte uint8_t for n follows) |
585 | { |
586 | std::uint8_t len; |
587 | return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); |
588 | } |
589 | |
590 | case 0xB9: // map (two-byte uint16_t for n follow) |
591 | { |
592 | std::uint16_t len; |
593 | return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); |
594 | } |
595 | |
596 | case 0xBA: // map (four-byte uint32_t for n follow) |
597 | { |
598 | std::uint32_t len; |
599 | return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); |
600 | } |
601 | |
602 | case 0xBB: // map (eight-byte uint64_t for n follow) |
603 | { |
604 | std::uint64_t len; |
605 | return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); |
606 | } |
607 | |
608 | case 0xBF: // map (indefinite length) |
609 | return get_cbor_object(std::size_t(-1)); |
610 | |
611 | case 0xF4: // false |
612 | return sax->boolean(false); |
613 | |
614 | case 0xF5: // true |
615 | return sax->boolean(true); |
616 | |
617 | case 0xF6: // null |
618 | return sax->null(); |
619 | |
620 | case 0xF9: // Half-Precision Float (two-byte IEEE 754) |
621 | { |
622 | const int byte1_raw = get(); |
623 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number" ))) |
624 | { |
625 | return false; |
626 | } |
627 | const int byte2_raw = get(); |
628 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number" ))) |
629 | { |
630 | return false; |
631 | } |
632 | |
633 | const auto byte1 = static_cast<unsigned char>(byte1_raw); |
634 | const auto byte2 = static_cast<unsigned char>(byte2_raw); |
635 | |
636 | // code from RFC 7049, Appendix D, Figure 3: |
637 | // As half-precision floating-point numbers were only added |
638 | // to IEEE 754 in 2008, today's programming platforms often |
639 | // still only have limited support for them. It is very |
640 | // easy to include at least decoding support for them even |
641 | // without such support. An example of a small decoder for |
642 | // half-precision floating-point numbers in the C language |
643 | // is shown in Fig. 3. |
644 | const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2); |
645 | const double val = [&half] |
646 | { |
647 | const int exp = (half >> 10u) & 0x1Fu; |
648 | const unsigned int mant = half & 0x3FFu; |
649 | assert(0 <= exp and exp <= 32); |
650 | assert(mant <= 1024); |
651 | switch (exp) |
652 | { |
653 | case 0: |
654 | return std::ldexp(mant, -24); |
655 | case 31: |
656 | return (mant == 0) |
657 | ? std::numeric_limits<double>::infinity() |
658 | : std::numeric_limits<double>::quiet_NaN(); |
659 | default: |
660 | return std::ldexp(mant + 1024, exp - 25); |
661 | } |
662 | }(); |
663 | return sax->number_float((half & 0x8000u) != 0 |
664 | ? static_cast<number_float_t>(-val) |
665 | : static_cast<number_float_t>(val), "" ); |
666 | } |
667 | |
668 | case 0xFA: // Single-Precision Float (four-byte IEEE 754) |
669 | { |
670 | float number; |
671 | return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), "" ); |
672 | } |
673 | |
674 | case 0xFB: // Double-Precision Float (eight-byte IEEE 754) |
675 | { |
676 | double number; |
677 | return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), "" ); |
678 | } |
679 | |
680 | default: // anything else (0xFF is handled inside the other types) |
681 | { |
682 | auto last_token = get_token_string(); |
683 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value" ))); |
684 | } |
685 | } |
686 | } |
687 | |
688 | /*! |
689 | @brief reads a CBOR string |
690 | |
691 | This function first reads starting bytes to determine the expected |
692 | string length and then copies this number of bytes into a string. |
693 | Additionally, CBOR's strings with indefinite lengths are supported. |
694 | |
695 | @param[out] result created string |
696 | |
697 | @return whether string creation completed |
698 | */ |
699 | bool get_cbor_string(string_t& result) |
700 | { |
701 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string" ))) |
702 | { |
703 | return false; |
704 | } |
705 | |
706 | switch (current) |
707 | { |
708 | // UTF-8 string (0x00..0x17 bytes follow) |
709 | case 0x60: |
710 | case 0x61: |
711 | case 0x62: |
712 | case 0x63: |
713 | case 0x64: |
714 | case 0x65: |
715 | case 0x66: |
716 | case 0x67: |
717 | case 0x68: |
718 | case 0x69: |
719 | case 0x6A: |
720 | case 0x6B: |
721 | case 0x6C: |
722 | case 0x6D: |
723 | case 0x6E: |
724 | case 0x6F: |
725 | case 0x70: |
726 | case 0x71: |
727 | case 0x72: |
728 | case 0x73: |
729 | case 0x74: |
730 | case 0x75: |
731 | case 0x76: |
732 | case 0x77: |
733 | { |
734 | return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result); |
735 | } |
736 | |
737 | case 0x78: // UTF-8 string (one-byte uint8_t for n follows) |
738 | { |
739 | std::uint8_t len; |
740 | return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); |
741 | } |
742 | |
743 | case 0x79: // UTF-8 string (two-byte uint16_t for n follow) |
744 | { |
745 | std::uint16_t len; |
746 | return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); |
747 | } |
748 | |
749 | case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) |
750 | { |
751 | std::uint32_t len; |
752 | return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); |
753 | } |
754 | |
755 | case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) |
756 | { |
757 | std::uint64_t len; |
758 | return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); |
759 | } |
760 | |
761 | case 0x7F: // UTF-8 string (indefinite length) |
762 | { |
763 | while (get() != 0xFF) |
764 | { |
765 | string_t chunk; |
766 | if (not get_cbor_string(chunk)) |
767 | { |
768 | return false; |
769 | } |
770 | result.append(chunk); |
771 | } |
772 | return true; |
773 | } |
774 | |
775 | default: |
776 | { |
777 | auto last_token = get_token_string(); |
778 | return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string" ))); |
779 | } |
780 | } |
781 | } |
782 | |
783 | /*! |
784 | @param[in] len the length of the array or std::size_t(-1) for an |
785 | array of indefinite size |
786 | @return whether array creation completed |
787 | */ |
788 | bool get_cbor_array(const std::size_t len) |
789 | { |
790 | if (JSON_HEDLEY_UNLIKELY(not sax->start_array(len))) |
791 | { |
792 | return false; |
793 | } |
794 | |
795 | if (len != std::size_t(-1)) |
796 | { |
797 | for (std::size_t i = 0; i < len; ++i) |
798 | { |
799 | if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal())) |
800 | { |
801 | return false; |
802 | } |
803 | } |
804 | } |
805 | else |
806 | { |
807 | while (get() != 0xFF) |
808 | { |
809 | if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal(false))) |
810 | { |
811 | return false; |
812 | } |
813 | } |
814 | } |
815 | |
816 | return sax->end_array(); |
817 | } |
818 | |
819 | /*! |
820 | @param[in] len the length of the object or std::size_t(-1) for an |
821 | object of indefinite size |
822 | @return whether object creation completed |
823 | */ |
824 | bool get_cbor_object(const std::size_t len) |
825 | { |
826 | if (JSON_HEDLEY_UNLIKELY(not sax->start_object(len))) |
827 | { |
828 | return false; |
829 | } |
830 | |
831 | string_t key; |
832 | if (len != std::size_t(-1)) |
833 | { |
834 | for (std::size_t i = 0; i < len; ++i) |
835 | { |
836 | get(); |
837 | if (JSON_HEDLEY_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) |
838 | { |
839 | return false; |
840 | } |
841 | |
842 | if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal())) |
843 | { |
844 | return false; |
845 | } |
846 | key.clear(); |
847 | } |
848 | } |
849 | else |
850 | { |
851 | while (get() != 0xFF) |
852 | { |
853 | if (JSON_HEDLEY_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) |
854 | { |
855 | return false; |
856 | } |
857 | |
858 | if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal())) |
859 | { |
860 | return false; |
861 | } |
862 | key.clear(); |
863 | } |
864 | } |
865 | |
866 | return sax->end_object(); |
867 | } |
868 | |
869 | ///////////// |
870 | // MsgPack // |
871 | ///////////// |
872 | |
873 | /*! |
874 | @return whether a valid MessagePack value was passed to the SAX parser |
875 | */ |
876 | bool parse_msgpack_internal() |
877 | { |
878 | switch (get()) |
879 | { |
880 | // EOF |
881 | case std::char_traits<char>::eof(): |
882 | return unexpect_eof(input_format_t::msgpack, "value" ); |
883 | |
884 | // positive fixint |
885 | case 0x00: |
886 | case 0x01: |
887 | case 0x02: |
888 | case 0x03: |
889 | case 0x04: |
890 | case 0x05: |
891 | case 0x06: |
892 | case 0x07: |
893 | case 0x08: |
894 | case 0x09: |
895 | case 0x0A: |
896 | case 0x0B: |
897 | case 0x0C: |
898 | case 0x0D: |
899 | case 0x0E: |
900 | case 0x0F: |
901 | case 0x10: |
902 | case 0x11: |
903 | case 0x12: |
904 | case 0x13: |
905 | case 0x14: |
906 | case 0x15: |
907 | case 0x16: |
908 | case 0x17: |
909 | case 0x18: |
910 | case 0x19: |
911 | case 0x1A: |
912 | case 0x1B: |
913 | case 0x1C: |
914 | case 0x1D: |
915 | case 0x1E: |
916 | case 0x1F: |
917 | case 0x20: |
918 | case 0x21: |
919 | case 0x22: |
920 | case 0x23: |
921 | case 0x24: |
922 | case 0x25: |
923 | case 0x26: |
924 | case 0x27: |
925 | case 0x28: |
926 | case 0x29: |
927 | case 0x2A: |
928 | case 0x2B: |
929 | case 0x2C: |
930 | case 0x2D: |
931 | case 0x2E: |
932 | case 0x2F: |
933 | case 0x30: |
934 | case 0x31: |
935 | case 0x32: |
936 | case 0x33: |
937 | case 0x34: |
938 | case 0x35: |
939 | case 0x36: |
940 | case 0x37: |
941 | case 0x38: |
942 | case 0x39: |
943 | case 0x3A: |
944 | case 0x3B: |
945 | case 0x3C: |
946 | case 0x3D: |
947 | case 0x3E: |
948 | case 0x3F: |
949 | case 0x40: |
950 | case 0x41: |
951 | case 0x42: |
952 | case 0x43: |
953 | case 0x44: |
954 | case 0x45: |
955 | case 0x46: |
956 | case 0x47: |
957 | case 0x48: |
958 | case 0x49: |
959 | case 0x4A: |
960 | case 0x4B: |
961 | case 0x4C: |
962 | case 0x4D: |
963 | case 0x4E: |
964 | case 0x4F: |
965 | case 0x50: |
966 | case 0x51: |
967 | case 0x52: |
968 | case 0x53: |
969 | case 0x54: |
970 | case 0x55: |
971 | case 0x56: |
972 | case 0x57: |
973 | case 0x58: |
974 | case 0x59: |
975 | case 0x5A: |
976 | case 0x5B: |
977 | case 0x5C: |
978 | case 0x5D: |
979 | case 0x5E: |
980 | case 0x5F: |
981 | case 0x60: |
982 | case 0x61: |
983 | case 0x62: |
984 | case 0x63: |
985 | case 0x64: |
986 | case 0x65: |
987 | case 0x66: |
988 | case 0x67: |
989 | case 0x68: |
990 | case 0x69: |
991 | case 0x6A: |
992 | case 0x6B: |
993 | case 0x6C: |
994 | case 0x6D: |
995 | case 0x6E: |
996 | case 0x6F: |
997 | case 0x70: |
998 | case 0x71: |
999 | case 0x72: |
1000 | case 0x73: |
1001 | case 0x74: |
1002 | case 0x75: |
1003 | case 0x76: |
1004 | case 0x77: |
1005 | case 0x78: |
1006 | case 0x79: |
1007 | case 0x7A: |
1008 | case 0x7B: |
1009 | case 0x7C: |
1010 | case 0x7D: |
1011 | case 0x7E: |
1012 | case 0x7F: |
1013 | return sax->number_unsigned(static_cast<number_unsigned_t>(current)); |
1014 | |
1015 | // fixmap |
1016 | case 0x80: |
1017 | case 0x81: |
1018 | case 0x82: |
1019 | case 0x83: |
1020 | case 0x84: |
1021 | case 0x85: |
1022 | case 0x86: |
1023 | case 0x87: |
1024 | case 0x88: |
1025 | case 0x89: |
1026 | case 0x8A: |
1027 | case 0x8B: |
1028 | case 0x8C: |
1029 | case 0x8D: |
1030 | case 0x8E: |
1031 | case 0x8F: |
1032 | return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu)); |
1033 | |
1034 | // fixarray |
1035 | case 0x90: |
1036 | case 0x91: |
1037 | case 0x92: |
1038 | case 0x93: |
1039 | case 0x94: |
1040 | case 0x95: |
1041 | case 0x96: |
1042 | case 0x97: |
1043 | case 0x98: |
1044 | case 0x99: |
1045 | case 0x9A: |
1046 | case 0x9B: |
1047 | case 0x9C: |
1048 | case 0x9D: |
1049 | case 0x9E: |
1050 | case 0x9F: |
1051 | return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu)); |
1052 | |
1053 | // fixstr |
1054 | case 0xA0: |
1055 | case 0xA1: |
1056 | case 0xA2: |
1057 | case 0xA3: |
1058 | case 0xA4: |
1059 | case 0xA5: |
1060 | case 0xA6: |
1061 | case 0xA7: |
1062 | case 0xA8: |
1063 | case 0xA9: |
1064 | case 0xAA: |
1065 | case 0xAB: |
1066 | case 0xAC: |
1067 | case 0xAD: |
1068 | case 0xAE: |
1069 | case 0xAF: |
1070 | case 0xB0: |
1071 | case 0xB1: |
1072 | case 0xB2: |
1073 | case 0xB3: |
1074 | case 0xB4: |
1075 | case 0xB5: |
1076 | case 0xB6: |
1077 | case 0xB7: |
1078 | case 0xB8: |
1079 | case 0xB9: |
1080 | case 0xBA: |
1081 | case 0xBB: |
1082 | case 0xBC: |
1083 | case 0xBD: |
1084 | case 0xBE: |
1085 | case 0xBF: |
1086 | case 0xD9: // str 8 |
1087 | case 0xDA: // str 16 |
1088 | case 0xDB: // str 32 |
1089 | { |
1090 | string_t s; |
1091 | return get_msgpack_string(s) and sax->string(s); |
1092 | } |
1093 | |
1094 | case 0xC0: // nil |
1095 | return sax->null(); |
1096 | |
1097 | case 0xC2: // false |
1098 | return sax->boolean(false); |
1099 | |
1100 | case 0xC3: // true |
1101 | return sax->boolean(true); |
1102 | |
1103 | case 0xCA: // float 32 |
1104 | { |
1105 | float number; |
1106 | return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "" ); |
1107 | } |
1108 | |
1109 | case 0xCB: // float 64 |
1110 | { |
1111 | double number; |
1112 | return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "" ); |
1113 | } |
1114 | |
1115 | case 0xCC: // uint 8 |
1116 | { |
1117 | std::uint8_t number; |
1118 | return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); |
1119 | } |
1120 | |
1121 | case 0xCD: // uint 16 |
1122 | { |
1123 | std::uint16_t number; |
1124 | return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); |
1125 | } |
1126 | |
1127 | case 0xCE: // uint 32 |
1128 | { |
1129 | std::uint32_t number; |
1130 | return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); |
1131 | } |
1132 | |
1133 | case 0xCF: // uint 64 |
1134 | { |
1135 | std::uint64_t number; |
1136 | return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); |
1137 | } |
1138 | |
1139 | case 0xD0: // int 8 |
1140 | { |
1141 | std::int8_t number; |
1142 | return get_number(input_format_t::msgpack, number) and sax->number_integer(number); |
1143 | } |
1144 | |
1145 | case 0xD1: // int 16 |
1146 | { |
1147 | std::int16_t number; |
1148 | return get_number(input_format_t::msgpack, number) and sax->number_integer(number); |
1149 | } |
1150 | |
1151 | case 0xD2: // int 32 |
1152 | { |
1153 | std::int32_t number; |
1154 | return get_number(input_format_t::msgpack, number) and sax->number_integer(number); |
1155 | } |
1156 | |
1157 | case 0xD3: // int 64 |
1158 | { |
1159 | std::int64_t number; |
1160 | return get_number(input_format_t::msgpack, number) and sax->number_integer(number); |
1161 | } |
1162 | |
1163 | case 0xDC: // array 16 |
1164 | { |
1165 | std::uint16_t len; |
1166 | return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); |
1167 | } |
1168 | |
1169 | case 0xDD: // array 32 |
1170 | { |
1171 | std::uint32_t len; |
1172 | return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); |
1173 | } |
1174 | |
1175 | case 0xDE: // map 16 |
1176 | { |
1177 | std::uint16_t len; |
1178 | return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); |
1179 | } |
1180 | |
1181 | case 0xDF: // map 32 |
1182 | { |
1183 | std::uint32_t len; |
1184 | return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); |
1185 | } |
1186 | |
1187 | // negative fixint |
1188 | case 0xE0: |
1189 | case 0xE1: |
1190 | case 0xE2: |
1191 | case 0xE3: |
1192 | case 0xE4: |
1193 | case 0xE5: |
1194 | case 0xE6: |
1195 | case 0xE7: |
1196 | case 0xE8: |
1197 | case 0xE9: |
1198 | case 0xEA: |
1199 | case 0xEB: |
1200 | case 0xEC: |
1201 | case 0xED: |
1202 | case 0xEE: |
1203 | case 0xEF: |
1204 | case 0xF0: |
1205 | case 0xF1: |
1206 | case 0xF2: |
1207 | case 0xF3: |
1208 | case 0xF4: |
1209 | case 0xF5: |
1210 | case 0xF6: |
1211 | case 0xF7: |
1212 | case 0xF8: |
1213 | case 0xF9: |
1214 | case 0xFA: |
1215 | case 0xFB: |
1216 | case 0xFC: |
1217 | case 0xFD: |
1218 | case 0xFE: |
1219 | case 0xFF: |
1220 | return sax->number_integer(static_cast<std::int8_t>(current)); |
1221 | |
1222 | default: // anything else |
1223 | { |
1224 | auto last_token = get_token_string(); |
1225 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value" ))); |
1226 | } |
1227 | } |
1228 | } |
1229 | |
1230 | /*! |
1231 | @brief reads a MessagePack string |
1232 | |
1233 | This function first reads starting bytes to determine the expected |
1234 | string length and then copies this number of bytes into a string. |
1235 | |
1236 | @param[out] result created string |
1237 | |
1238 | @return whether string creation completed |
1239 | */ |
1240 | bool get_msgpack_string(string_t& result) |
1241 | { |
1242 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::msgpack, "string" ))) |
1243 | { |
1244 | return false; |
1245 | } |
1246 | |
1247 | switch (current) |
1248 | { |
1249 | // fixstr |
1250 | case 0xA0: |
1251 | case 0xA1: |
1252 | case 0xA2: |
1253 | case 0xA3: |
1254 | case 0xA4: |
1255 | case 0xA5: |
1256 | case 0xA6: |
1257 | case 0xA7: |
1258 | case 0xA8: |
1259 | case 0xA9: |
1260 | case 0xAA: |
1261 | case 0xAB: |
1262 | case 0xAC: |
1263 | case 0xAD: |
1264 | case 0xAE: |
1265 | case 0xAF: |
1266 | case 0xB0: |
1267 | case 0xB1: |
1268 | case 0xB2: |
1269 | case 0xB3: |
1270 | case 0xB4: |
1271 | case 0xB5: |
1272 | case 0xB6: |
1273 | case 0xB7: |
1274 | case 0xB8: |
1275 | case 0xB9: |
1276 | case 0xBA: |
1277 | case 0xBB: |
1278 | case 0xBC: |
1279 | case 0xBD: |
1280 | case 0xBE: |
1281 | case 0xBF: |
1282 | { |
1283 | return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result); |
1284 | } |
1285 | |
1286 | case 0xD9: // str 8 |
1287 | { |
1288 | std::uint8_t len; |
1289 | return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); |
1290 | } |
1291 | |
1292 | case 0xDA: // str 16 |
1293 | { |
1294 | std::uint16_t len; |
1295 | return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); |
1296 | } |
1297 | |
1298 | case 0xDB: // str 32 |
1299 | { |
1300 | std::uint32_t len; |
1301 | return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); |
1302 | } |
1303 | |
1304 | default: |
1305 | { |
1306 | auto last_token = get_token_string(); |
1307 | return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string" ))); |
1308 | } |
1309 | } |
1310 | } |
1311 | |
1312 | /*! |
1313 | @param[in] len the length of the array |
1314 | @return whether array creation completed |
1315 | */ |
1316 | bool get_msgpack_array(const std::size_t len) |
1317 | { |
1318 | if (JSON_HEDLEY_UNLIKELY(not sax->start_array(len))) |
1319 | { |
1320 | return false; |
1321 | } |
1322 | |
1323 | for (std::size_t i = 0; i < len; ++i) |
1324 | { |
1325 | if (JSON_HEDLEY_UNLIKELY(not parse_msgpack_internal())) |
1326 | { |
1327 | return false; |
1328 | } |
1329 | } |
1330 | |
1331 | return sax->end_array(); |
1332 | } |
1333 | |
1334 | /*! |
1335 | @param[in] len the length of the object |
1336 | @return whether object creation completed |
1337 | */ |
1338 | bool get_msgpack_object(const std::size_t len) |
1339 | { |
1340 | if (JSON_HEDLEY_UNLIKELY(not sax->start_object(len))) |
1341 | { |
1342 | return false; |
1343 | } |
1344 | |
1345 | string_t key; |
1346 | for (std::size_t i = 0; i < len; ++i) |
1347 | { |
1348 | get(); |
1349 | if (JSON_HEDLEY_UNLIKELY(not get_msgpack_string(key) or not sax->key(key))) |
1350 | { |
1351 | return false; |
1352 | } |
1353 | |
1354 | if (JSON_HEDLEY_UNLIKELY(not parse_msgpack_internal())) |
1355 | { |
1356 | return false; |
1357 | } |
1358 | key.clear(); |
1359 | } |
1360 | |
1361 | return sax->end_object(); |
1362 | } |
1363 | |
1364 | //////////// |
1365 | // UBJSON // |
1366 | //////////// |
1367 | |
1368 | /*! |
1369 | @param[in] get_char whether a new character should be retrieved from the |
1370 | input (true, default) or whether the last read |
1371 | character should be considered instead |
1372 | |
1373 | @return whether a valid UBJSON value was passed to the SAX parser |
1374 | */ |
1375 | bool parse_ubjson_internal(const bool get_char = true) |
1376 | { |
1377 | return get_ubjson_value(get_char ? get_ignore_noop() : current); |
1378 | } |
1379 | |
1380 | /*! |
1381 | @brief reads a UBJSON string |
1382 | |
1383 | This function is either called after reading the 'S' byte explicitly |
1384 | indicating a string, or in case of an object key where the 'S' byte can be |
1385 | left out. |
1386 | |
1387 | @param[out] result created string |
1388 | @param[in] get_char whether a new character should be retrieved from the |
1389 | input (true, default) or whether the last read |
1390 | character should be considered instead |
1391 | |
1392 | @return whether string creation completed |
1393 | */ |
1394 | bool get_ubjson_string(string_t& result, const bool get_char = true) |
1395 | { |
1396 | if (get_char) |
1397 | { |
1398 | get(); // TODO(niels): may we ignore N here? |
1399 | } |
1400 | |
1401 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value" ))) |
1402 | { |
1403 | return false; |
1404 | } |
1405 | |
1406 | switch (current) |
1407 | { |
1408 | case 'U': |
1409 | { |
1410 | std::uint8_t len; |
1411 | return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); |
1412 | } |
1413 | |
1414 | case 'i': |
1415 | { |
1416 | std::int8_t len; |
1417 | return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); |
1418 | } |
1419 | |
1420 | case 'I': |
1421 | { |
1422 | std::int16_t len; |
1423 | return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); |
1424 | } |
1425 | |
1426 | case 'l': |
1427 | { |
1428 | std::int32_t len; |
1429 | return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); |
1430 | } |
1431 | |
1432 | case 'L': |
1433 | { |
1434 | std::int64_t len; |
1435 | return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); |
1436 | } |
1437 | |
1438 | default: |
1439 | auto last_token = get_token_string(); |
1440 | return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string" ))); |
1441 | } |
1442 | } |
1443 | |
1444 | /*! |
1445 | @param[out] result determined size |
1446 | @return whether size determination completed |
1447 | */ |
1448 | bool get_ubjson_size_value(std::size_t& result) |
1449 | { |
1450 | switch (get_ignore_noop()) |
1451 | { |
1452 | case 'U': |
1453 | { |
1454 | std::uint8_t number; |
1455 | if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number))) |
1456 | { |
1457 | return false; |
1458 | } |
1459 | result = static_cast<std::size_t>(number); |
1460 | return true; |
1461 | } |
1462 | |
1463 | case 'i': |
1464 | { |
1465 | std::int8_t number; |
1466 | if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number))) |
1467 | { |
1468 | return false; |
1469 | } |
1470 | result = static_cast<std::size_t>(number); |
1471 | return true; |
1472 | } |
1473 | |
1474 | case 'I': |
1475 | { |
1476 | std::int16_t number; |
1477 | if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number))) |
1478 | { |
1479 | return false; |
1480 | } |
1481 | result = static_cast<std::size_t>(number); |
1482 | return true; |
1483 | } |
1484 | |
1485 | case 'l': |
1486 | { |
1487 | std::int32_t number; |
1488 | if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number))) |
1489 | { |
1490 | return false; |
1491 | } |
1492 | result = static_cast<std::size_t>(number); |
1493 | return true; |
1494 | } |
1495 | |
1496 | case 'L': |
1497 | { |
1498 | std::int64_t number; |
1499 | if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number))) |
1500 | { |
1501 | return false; |
1502 | } |
1503 | result = static_cast<std::size_t>(number); |
1504 | return true; |
1505 | } |
1506 | |
1507 | default: |
1508 | { |
1509 | auto last_token = get_token_string(); |
1510 | return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size" ))); |
1511 | } |
1512 | } |
1513 | } |
1514 | |
1515 | /*! |
1516 | @brief determine the type and size for a container |
1517 | |
1518 | In the optimized UBJSON format, a type and a size can be provided to allow |
1519 | for a more compact representation. |
1520 | |
1521 | @param[out] result pair of the size and the type |
1522 | |
1523 | @return whether pair creation completed |
1524 | */ |
1525 | bool get_ubjson_size_type(std::pair<std::size_t, int>& result) |
1526 | { |
1527 | result.first = string_t::npos; // size |
1528 | result.second = 0; // type |
1529 | |
1530 | get_ignore_noop(); |
1531 | |
1532 | if (current == '$') |
1533 | { |
1534 | result.second = get(); // must not ignore 'N', because 'N' maybe the type |
1535 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "type" ))) |
1536 | { |
1537 | return false; |
1538 | } |
1539 | |
1540 | get_ignore_noop(); |
1541 | if (JSON_HEDLEY_UNLIKELY(current != '#')) |
1542 | { |
1543 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value" ))) |
1544 | { |
1545 | return false; |
1546 | } |
1547 | auto last_token = get_token_string(); |
1548 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size" ))); |
1549 | } |
1550 | |
1551 | return get_ubjson_size_value(result.first); |
1552 | } |
1553 | |
1554 | if (current == '#') |
1555 | { |
1556 | return get_ubjson_size_value(result.first); |
1557 | } |
1558 | |
1559 | return true; |
1560 | } |
1561 | |
1562 | /*! |
1563 | @param prefix the previously read or set type prefix |
1564 | @return whether value creation completed |
1565 | */ |
1566 | bool get_ubjson_value(const int prefix) |
1567 | { |
1568 | switch (prefix) |
1569 | { |
1570 | case std::char_traits<char>::eof(): // EOF |
1571 | return unexpect_eof(input_format_t::ubjson, "value" ); |
1572 | |
1573 | case 'T': // true |
1574 | return sax->boolean(true); |
1575 | case 'F': // false |
1576 | return sax->boolean(false); |
1577 | |
1578 | case 'Z': // null |
1579 | return sax->null(); |
1580 | |
1581 | case 'U': |
1582 | { |
1583 | std::uint8_t number; |
1584 | return get_number(input_format_t::ubjson, number) and sax->number_unsigned(number); |
1585 | } |
1586 | |
1587 | case 'i': |
1588 | { |
1589 | std::int8_t number; |
1590 | return get_number(input_format_t::ubjson, number) and sax->number_integer(number); |
1591 | } |
1592 | |
1593 | case 'I': |
1594 | { |
1595 | std::int16_t number; |
1596 | return get_number(input_format_t::ubjson, number) and sax->number_integer(number); |
1597 | } |
1598 | |
1599 | case 'l': |
1600 | { |
1601 | std::int32_t number; |
1602 | return get_number(input_format_t::ubjson, number) and sax->number_integer(number); |
1603 | } |
1604 | |
1605 | case 'L': |
1606 | { |
1607 | std::int64_t number; |
1608 | return get_number(input_format_t::ubjson, number) and sax->number_integer(number); |
1609 | } |
1610 | |
1611 | case 'd': |
1612 | { |
1613 | float number; |
1614 | return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), "" ); |
1615 | } |
1616 | |
1617 | case 'D': |
1618 | { |
1619 | double number; |
1620 | return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), "" ); |
1621 | } |
1622 | |
1623 | case 'C': // char |
1624 | { |
1625 | get(); |
1626 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "char" ))) |
1627 | { |
1628 | return false; |
1629 | } |
1630 | if (JSON_HEDLEY_UNLIKELY(current > 127)) |
1631 | { |
1632 | auto last_token = get_token_string(); |
1633 | return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char" ))); |
1634 | } |
1635 | string_t s(1, static_cast<char>(current)); |
1636 | return sax->string(s); |
1637 | } |
1638 | |
1639 | case 'S': // string |
1640 | { |
1641 | string_t s; |
1642 | return get_ubjson_string(s) and sax->string(s); |
1643 | } |
1644 | |
1645 | case '[': // array |
1646 | return get_ubjson_array(); |
1647 | |
1648 | case '{': // object |
1649 | return get_ubjson_object(); |
1650 | |
1651 | default: // anything else |
1652 | { |
1653 | auto last_token = get_token_string(); |
1654 | return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value" ))); |
1655 | } |
1656 | } |
1657 | } |
1658 | |
1659 | /*! |
1660 | @return whether array creation completed |
1661 | */ |
1662 | bool get_ubjson_array() |
1663 | { |
1664 | std::pair<std::size_t, int> size_and_type; |
1665 | if (JSON_HEDLEY_UNLIKELY(not get_ubjson_size_type(size_and_type))) |
1666 | { |
1667 | return false; |
1668 | } |
1669 | |
1670 | if (size_and_type.first != string_t::npos) |
1671 | { |
1672 | if (JSON_HEDLEY_UNLIKELY(not sax->start_array(size_and_type.first))) |
1673 | { |
1674 | return false; |
1675 | } |
1676 | |
1677 | if (size_and_type.second != 0) |
1678 | { |
1679 | if (size_and_type.second != 'N') |
1680 | { |
1681 | for (std::size_t i = 0; i < size_and_type.first; ++i) |
1682 | { |
1683 | if (JSON_HEDLEY_UNLIKELY(not get_ubjson_value(size_and_type.second))) |
1684 | { |
1685 | return false; |
1686 | } |
1687 | } |
1688 | } |
1689 | } |
1690 | else |
1691 | { |
1692 | for (std::size_t i = 0; i < size_and_type.first; ++i) |
1693 | { |
1694 | if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal())) |
1695 | { |
1696 | return false; |
1697 | } |
1698 | } |
1699 | } |
1700 | } |
1701 | else |
1702 | { |
1703 | if (JSON_HEDLEY_UNLIKELY(not sax->start_array(std::size_t(-1)))) |
1704 | { |
1705 | return false; |
1706 | } |
1707 | |
1708 | while (current != ']') |
1709 | { |
1710 | if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal(false))) |
1711 | { |
1712 | return false; |
1713 | } |
1714 | get_ignore_noop(); |
1715 | } |
1716 | } |
1717 | |
1718 | return sax->end_array(); |
1719 | } |
1720 | |
1721 | /*! |
1722 | @return whether object creation completed |
1723 | */ |
1724 | bool get_ubjson_object() |
1725 | { |
1726 | std::pair<std::size_t, int> size_and_type; |
1727 | if (JSON_HEDLEY_UNLIKELY(not get_ubjson_size_type(size_and_type))) |
1728 | { |
1729 | return false; |
1730 | } |
1731 | |
1732 | string_t key; |
1733 | if (size_and_type.first != string_t::npos) |
1734 | { |
1735 | if (JSON_HEDLEY_UNLIKELY(not sax->start_object(size_and_type.first))) |
1736 | { |
1737 | return false; |
1738 | } |
1739 | |
1740 | if (size_and_type.second != 0) |
1741 | { |
1742 | for (std::size_t i = 0; i < size_and_type.first; ++i) |
1743 | { |
1744 | if (JSON_HEDLEY_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) |
1745 | { |
1746 | return false; |
1747 | } |
1748 | if (JSON_HEDLEY_UNLIKELY(not get_ubjson_value(size_and_type.second))) |
1749 | { |
1750 | return false; |
1751 | } |
1752 | key.clear(); |
1753 | } |
1754 | } |
1755 | else |
1756 | { |
1757 | for (std::size_t i = 0; i < size_and_type.first; ++i) |
1758 | { |
1759 | if (JSON_HEDLEY_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) |
1760 | { |
1761 | return false; |
1762 | } |
1763 | if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal())) |
1764 | { |
1765 | return false; |
1766 | } |
1767 | key.clear(); |
1768 | } |
1769 | } |
1770 | } |
1771 | else |
1772 | { |
1773 | if (JSON_HEDLEY_UNLIKELY(not sax->start_object(std::size_t(-1)))) |
1774 | { |
1775 | return false; |
1776 | } |
1777 | |
1778 | while (current != '}') |
1779 | { |
1780 | if (JSON_HEDLEY_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key))) |
1781 | { |
1782 | return false; |
1783 | } |
1784 | if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal())) |
1785 | { |
1786 | return false; |
1787 | } |
1788 | get_ignore_noop(); |
1789 | key.clear(); |
1790 | } |
1791 | } |
1792 | |
1793 | return sax->end_object(); |
1794 | } |
1795 | |
1796 | /////////////////////// |
1797 | // Utility functions // |
1798 | /////////////////////// |
1799 | |
1800 | /*! |
1801 | @brief get next character from the input |
1802 | |
1803 | This function provides the interface to the used input adapter. It does |
1804 | not throw in case the input reached EOF, but returns a -'ve valued |
1805 | `std::char_traits<char>::eof()` in that case. |
1806 | |
1807 | @return character read from the input |
1808 | */ |
1809 | int get() |
1810 | { |
1811 | ++chars_read; |
1812 | return current = ia->get_character(); |
1813 | } |
1814 | |
1815 | /*! |
1816 | @return character read from the input after ignoring all 'N' entries |
1817 | */ |
1818 | int get_ignore_noop() |
1819 | { |
1820 | do |
1821 | { |
1822 | get(); |
1823 | } |
1824 | while (current == 'N'); |
1825 | |
1826 | return current; |
1827 | } |
1828 | |
1829 | /* |
1830 | @brief read a number from the input |
1831 | |
1832 | @tparam NumberType the type of the number |
1833 | @param[in] format the current format (for diagnostics) |
1834 | @param[out] result number of type @a NumberType |
1835 | |
1836 | @return whether conversion completed |
1837 | |
1838 | @note This function needs to respect the system's endianess, because |
1839 | bytes in CBOR, MessagePack, and UBJSON are stored in network order |
1840 | (big endian) and therefore need reordering on little endian systems. |
1841 | */ |
1842 | template<typename NumberType, bool InputIsLittleEndian = false> |
1843 | bool get_number(const input_format_t format, NumberType& result) |
1844 | { |
1845 | // step 1: read input into array with system's byte order |
1846 | std::array<std::uint8_t, sizeof(NumberType)> vec; |
1847 | for (std::size_t i = 0; i < sizeof(NumberType); ++i) |
1848 | { |
1849 | get(); |
1850 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(format, "number" ))) |
1851 | { |
1852 | return false; |
1853 | } |
1854 | |
1855 | // reverse byte order prior to conversion if necessary |
1856 | if (is_little_endian != InputIsLittleEndian) |
1857 | { |
1858 | vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current); |
1859 | } |
1860 | else |
1861 | { |
1862 | vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE |
1863 | } |
1864 | } |
1865 | |
1866 | // step 2: convert array into number of type T and return |
1867 | std::memcpy(&result, vec.data(), sizeof(NumberType)); |
1868 | return true; |
1869 | } |
1870 | |
1871 | /*! |
1872 | @brief create a string by reading characters from the input |
1873 | |
1874 | @tparam NumberType the type of the number |
1875 | @param[in] format the current format (for diagnostics) |
1876 | @param[in] len number of characters to read |
1877 | @param[out] result string created by reading @a len bytes |
1878 | |
1879 | @return whether string creation completed |
1880 | |
1881 | @note We can not reserve @a len bytes for the result, because @a len |
1882 | may be too large. Usually, @ref unexpect_eof() detects the end of |
1883 | the input before we run out of string memory. |
1884 | */ |
1885 | template<typename NumberType> |
1886 | bool get_string(const input_format_t format, |
1887 | const NumberType len, |
1888 | string_t& result) |
1889 | { |
1890 | bool success = true; |
1891 | std::generate_n(std::back_inserter(result), len, [this, &success, &format]() |
1892 | { |
1893 | get(); |
1894 | if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(format, "string" ))) |
1895 | { |
1896 | success = false; |
1897 | } |
1898 | return static_cast<char>(current); |
1899 | }); |
1900 | return success; |
1901 | } |
1902 | |
1903 | /*! |
1904 | @param[in] format the current format (for diagnostics) |
1905 | @param[in] context further context information (for diagnostics) |
1906 | @return whether the last read character is not EOF |
1907 | */ |
1908 | JSON_HEDLEY_NON_NULL(3) |
1909 | bool unexpect_eof(const input_format_t format, const char* context) const |
1910 | { |
1911 | if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char>::eof())) |
1912 | { |
1913 | return sax->parse_error(chars_read, "<end of file>" , |
1914 | parse_error::create(110, chars_read, exception_message(format, "unexpected end of input" , context))); |
1915 | } |
1916 | return true; |
1917 | } |
1918 | |
1919 | /*! |
1920 | @return a string representation of the last read byte |
1921 | */ |
1922 | std::string get_token_string() const |
1923 | { |
1924 | std::array<char, 3> cr{{}}; |
1925 | (std::snprintf)(cr.data(), cr.size(), "%.2hhX" , static_cast<unsigned char>(current)); |
1926 | return std::string{cr.data()}; |
1927 | } |
1928 | |
1929 | /*! |
1930 | @param[in] format the current format |
1931 | @param[in] detail a detailed error message |
1932 | @param[in] context further context information |
1933 | @return a message string to use in the parse_error exceptions |
1934 | */ |
1935 | std::string exception_message(const input_format_t format, |
1936 | const std::string& detail, |
1937 | const std::string& context) const |
1938 | { |
1939 | std::string error_msg = "syntax error while parsing " ; |
1940 | |
1941 | switch (format) |
1942 | { |
1943 | case input_format_t::cbor: |
1944 | error_msg += "CBOR" ; |
1945 | break; |
1946 | |
1947 | case input_format_t::msgpack: |
1948 | error_msg += "MessagePack" ; |
1949 | break; |
1950 | |
1951 | case input_format_t::ubjson: |
1952 | error_msg += "UBJSON" ; |
1953 | break; |
1954 | |
1955 | case input_format_t::bson: |
1956 | error_msg += "BSON" ; |
1957 | break; |
1958 | |
1959 | default: // LCOV_EXCL_LINE |
1960 | assert(false); // LCOV_EXCL_LINE |
1961 | } |
1962 | |
1963 | return error_msg + " " + context + ": " + detail; |
1964 | } |
1965 | |
1966 | private: |
1967 | /// input adapter |
1968 | input_adapter_t ia = nullptr; |
1969 | |
1970 | /// the current character |
1971 | int current = std::char_traits<char>::eof(); |
1972 | |
1973 | /// the number of characters read |
1974 | std::size_t chars_read = 0; |
1975 | |
1976 | /// whether we can assume little endianess |
1977 | const bool is_little_endian = little_endianess(); |
1978 | |
1979 | /// the SAX parser |
1980 | json_sax_t* sax = nullptr; |
1981 | }; |
1982 | } // namespace detail |
1983 | } // namespace nlohmann |
1984 | |