1 | #pragma once |
2 | |
3 | #include <array> // array |
4 | #include <clocale> // localeconv |
5 | #include <cstddef> // size_t |
6 | #include <cstdio> // snprintf |
7 | #include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull |
8 | #include <initializer_list> // initializer_list |
9 | #include <string> // char_traits, string |
10 | #include <utility> // move |
11 | #include <vector> // vector |
12 | |
13 | #include <nlohmann/detail/input/input_adapters.hpp> |
14 | #include <nlohmann/detail/input/position_t.hpp> |
15 | #include <nlohmann/detail/macro_scope.hpp> |
16 | |
17 | namespace nlohmann |
18 | { |
19 | namespace detail |
20 | { |
21 | /////////// |
22 | // lexer // |
23 | /////////// |
24 | |
25 | /*! |
26 | @brief lexical analysis |
27 | |
28 | This class organizes the lexical analysis during JSON deserialization. |
29 | */ |
30 | template<typename BasicJsonType> |
31 | class lexer |
32 | { |
33 | using number_integer_t = typename BasicJsonType::number_integer_t; |
34 | using number_unsigned_t = typename BasicJsonType::number_unsigned_t; |
35 | using number_float_t = typename BasicJsonType::number_float_t; |
36 | using string_t = typename BasicJsonType::string_t; |
37 | |
38 | public: |
39 | /// token types for the parser |
40 | enum class token_type |
41 | { |
42 | uninitialized, ///< indicating the scanner is uninitialized |
43 | literal_true, ///< the `true` literal |
44 | literal_false, ///< the `false` literal |
45 | literal_null, ///< the `null` literal |
46 | value_string, ///< a string -- use get_string() for actual value |
47 | value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value |
48 | value_integer, ///< a signed integer -- use get_number_integer() for actual value |
49 | value_float, ///< an floating point number -- use get_number_float() for actual value |
50 | begin_array, ///< the character for array begin `[` |
51 | begin_object, ///< the character for object begin `{` |
52 | end_array, ///< the character for array end `]` |
53 | end_object, ///< the character for object end `}` |
54 | name_separator, ///< the name separator `:` |
55 | value_separator, ///< the value separator `,` |
56 | parse_error, ///< indicating a parse error |
57 | end_of_input, ///< indicating the end of the input buffer |
58 | literal_or_value ///< a literal or the begin of a value (only for diagnostics) |
59 | }; |
60 | |
61 | /// return name of values of type token_type (only used for errors) |
62 | JSON_HEDLEY_RETURNS_NON_NULL |
63 | JSON_HEDLEY_CONST |
64 | static const char* token_type_name(const token_type t) noexcept |
65 | { |
66 | switch (t) |
67 | { |
68 | case token_type::uninitialized: |
69 | return "<uninitialized>" ; |
70 | case token_type::literal_true: |
71 | return "true literal" ; |
72 | case token_type::literal_false: |
73 | return "false literal" ; |
74 | case token_type::literal_null: |
75 | return "null literal" ; |
76 | case token_type::value_string: |
77 | return "string literal" ; |
78 | case lexer::token_type::value_unsigned: |
79 | case lexer::token_type::value_integer: |
80 | case lexer::token_type::value_float: |
81 | return "number literal" ; |
82 | case token_type::begin_array: |
83 | return "'['" ; |
84 | case token_type::begin_object: |
85 | return "'{'" ; |
86 | case token_type::end_array: |
87 | return "']'" ; |
88 | case token_type::end_object: |
89 | return "'}'" ; |
90 | case token_type::name_separator: |
91 | return "':'" ; |
92 | case token_type::value_separator: |
93 | return "','" ; |
94 | case token_type::parse_error: |
95 | return "<parse error>" ; |
96 | case token_type::end_of_input: |
97 | return "end of input" ; |
98 | case token_type::literal_or_value: |
99 | return "'[', '{', or a literal" ; |
100 | // LCOV_EXCL_START |
101 | default: // catch non-enum values |
102 | return "unknown token" ; |
103 | // LCOV_EXCL_STOP |
104 | } |
105 | } |
106 | |
107 | explicit lexer(detail::input_adapter_t&& adapter) |
108 | : ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {} |
109 | |
110 | // delete because of pointer members |
111 | lexer(const lexer&) = delete; |
112 | lexer(lexer&&) = delete; |
113 | lexer& operator=(lexer&) = delete; |
114 | lexer& operator=(lexer&&) = delete; |
115 | ~lexer() = default; |
116 | |
117 | private: |
118 | ///////////////////// |
119 | // locales |
120 | ///////////////////// |
121 | |
122 | /// return the locale-dependent decimal point |
123 | JSON_HEDLEY_PURE |
124 | static char get_decimal_point() noexcept |
125 | { |
126 | const auto loc = localeconv(); |
127 | assert(loc != nullptr); |
128 | return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); |
129 | } |
130 | |
131 | ///////////////////// |
132 | // scan functions |
133 | ///////////////////// |
134 | |
135 | /*! |
136 | @brief get codepoint from 4 hex characters following `\u` |
137 | |
138 | For input "\u c1 c2 c3 c4" the codepoint is: |
139 | (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4 |
140 | = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0) |
141 | |
142 | Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f' |
143 | must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The |
144 | conversion is done by subtracting the offset (0x30, 0x37, and 0x57) |
145 | between the ASCII value of the character and the desired integer value. |
146 | |
147 | @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or |
148 | non-hex character) |
149 | */ |
150 | int get_codepoint() |
151 | { |
152 | // this function only makes sense after reading `\u` |
153 | assert(current == 'u'); |
154 | int codepoint = 0; |
155 | |
156 | const auto factors = { 12u, 8u, 4u, 0u }; |
157 | for (const auto factor : factors) |
158 | { |
159 | get(); |
160 | |
161 | if (current >= '0' and current <= '9') |
162 | { |
163 | codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x30u) << factor); |
164 | } |
165 | else if (current >= 'A' and current <= 'F') |
166 | { |
167 | codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x37u) << factor); |
168 | } |
169 | else if (current >= 'a' and current <= 'f') |
170 | { |
171 | codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x57u) << factor); |
172 | } |
173 | else |
174 | { |
175 | return -1; |
176 | } |
177 | } |
178 | |
179 | assert(0x0000 <= codepoint and codepoint <= 0xFFFF); |
180 | return codepoint; |
181 | } |
182 | |
183 | /*! |
184 | @brief check if the next byte(s) are inside a given range |
185 | |
186 | Adds the current byte and, for each passed range, reads a new byte and |
187 | checks if it is inside the range. If a violation was detected, set up an |
188 | error message and return false. Otherwise, return true. |
189 | |
190 | @param[in] ranges list of integers; interpreted as list of pairs of |
191 | inclusive lower and upper bound, respectively |
192 | |
193 | @pre The passed list @a ranges must have 2, 4, or 6 elements; that is, |
194 | 1, 2, or 3 pairs. This precondition is enforced by an assertion. |
195 | |
196 | @return true if and only if no range violation was detected |
197 | */ |
198 | bool next_byte_in_range(std::initializer_list<int> ranges) |
199 | { |
200 | assert(ranges.size() == 2 or ranges.size() == 4 or ranges.size() == 6); |
201 | add(current); |
202 | |
203 | for (auto range = ranges.begin(); range != ranges.end(); ++range) |
204 | { |
205 | get(); |
206 | if (JSON_HEDLEY_LIKELY(*range <= current and current <= *(++range))) |
207 | { |
208 | add(current); |
209 | } |
210 | else |
211 | { |
212 | error_message = "invalid string: ill-formed UTF-8 byte" ; |
213 | return false; |
214 | } |
215 | } |
216 | |
217 | return true; |
218 | } |
219 | |
220 | /*! |
221 | @brief scan a string literal |
222 | |
223 | This function scans a string according to Sect. 7 of RFC 7159. While |
224 | scanning, bytes are escaped and copied into buffer token_buffer. Then the |
225 | function returns successfully, token_buffer is *not* null-terminated (as it |
226 | may contain \0 bytes), and token_buffer.size() is the number of bytes in the |
227 | string. |
228 | |
229 | @return token_type::value_string if string could be successfully scanned, |
230 | token_type::parse_error otherwise |
231 | |
232 | @note In case of errors, variable error_message contains a textual |
233 | description. |
234 | */ |
235 | token_type scan_string() |
236 | { |
237 | // reset token_buffer (ignore opening quote) |
238 | reset(); |
239 | |
240 | // we entered the function by reading an open quote |
241 | assert(current == '\"'); |
242 | |
243 | while (true) |
244 | { |
245 | // get next character |
246 | switch (get()) |
247 | { |
248 | // end of file while parsing string |
249 | case std::char_traits<char>::eof(): |
250 | { |
251 | error_message = "invalid string: missing closing quote" ; |
252 | return token_type::parse_error; |
253 | } |
254 | |
255 | // closing quote |
256 | case '\"': |
257 | { |
258 | return token_type::value_string; |
259 | } |
260 | |
261 | // escapes |
262 | case '\\': |
263 | { |
264 | switch (get()) |
265 | { |
266 | // quotation mark |
267 | case '\"': |
268 | add('\"'); |
269 | break; |
270 | // reverse solidus |
271 | case '\\': |
272 | add('\\'); |
273 | break; |
274 | // solidus |
275 | case '/': |
276 | add('/'); |
277 | break; |
278 | // backspace |
279 | case 'b': |
280 | add('\b'); |
281 | break; |
282 | // form feed |
283 | case 'f': |
284 | add('\f'); |
285 | break; |
286 | // line feed |
287 | case 'n': |
288 | add('\n'); |
289 | break; |
290 | // carriage return |
291 | case 'r': |
292 | add('\r'); |
293 | break; |
294 | // tab |
295 | case 't': |
296 | add('\t'); |
297 | break; |
298 | |
299 | // unicode escapes |
300 | case 'u': |
301 | { |
302 | const int codepoint1 = get_codepoint(); |
303 | int codepoint = codepoint1; // start with codepoint1 |
304 | |
305 | if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1)) |
306 | { |
307 | error_message = "invalid string: '\\u' must be followed by 4 hex digits" ; |
308 | return token_type::parse_error; |
309 | } |
310 | |
311 | // check if code point is a high surrogate |
312 | if (0xD800 <= codepoint1 and codepoint1 <= 0xDBFF) |
313 | { |
314 | // expect next \uxxxx entry |
315 | if (JSON_HEDLEY_LIKELY(get() == '\\' and get() == 'u')) |
316 | { |
317 | const int codepoint2 = get_codepoint(); |
318 | |
319 | if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1)) |
320 | { |
321 | error_message = "invalid string: '\\u' must be followed by 4 hex digits" ; |
322 | return token_type::parse_error; |
323 | } |
324 | |
325 | // check if codepoint2 is a low surrogate |
326 | if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 and codepoint2 <= 0xDFFF)) |
327 | { |
328 | // overwrite codepoint |
329 | codepoint = static_cast<int>( |
330 | // high surrogate occupies the most significant 22 bits |
331 | (static_cast<unsigned int>(codepoint1) << 10u) |
332 | // low surrogate occupies the least significant 15 bits |
333 | + static_cast<unsigned int>(codepoint2) |
334 | // there is still the 0xD800, 0xDC00 and 0x10000 noise |
335 | // in the result so we have to subtract with: |
336 | // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 |
337 | - 0x35FDC00u); |
338 | } |
339 | else |
340 | { |
341 | error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF" ; |
342 | return token_type::parse_error; |
343 | } |
344 | } |
345 | else |
346 | { |
347 | error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF" ; |
348 | return token_type::parse_error; |
349 | } |
350 | } |
351 | else |
352 | { |
353 | if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF)) |
354 | { |
355 | error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF" ; |
356 | return token_type::parse_error; |
357 | } |
358 | } |
359 | |
360 | // result of the above calculation yields a proper codepoint |
361 | assert(0x00 <= codepoint and codepoint <= 0x10FFFF); |
362 | |
363 | // translate codepoint into bytes |
364 | if (codepoint < 0x80) |
365 | { |
366 | // 1-byte characters: 0xxxxxxx (ASCII) |
367 | add(codepoint); |
368 | } |
369 | else if (codepoint <= 0x7FF) |
370 | { |
371 | // 2-byte characters: 110xxxxx 10xxxxxx |
372 | add(static_cast<int>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u))); |
373 | add(static_cast<int>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu))); |
374 | } |
375 | else if (codepoint <= 0xFFFF) |
376 | { |
377 | // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx |
378 | add(static_cast<int>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u))); |
379 | add(static_cast<int>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu))); |
380 | add(static_cast<int>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu))); |
381 | } |
382 | else |
383 | { |
384 | // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
385 | add(static_cast<int>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u))); |
386 | add(static_cast<int>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu))); |
387 | add(static_cast<int>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu))); |
388 | add(static_cast<int>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu))); |
389 | } |
390 | |
391 | break; |
392 | } |
393 | |
394 | // other characters after escape |
395 | default: |
396 | error_message = "invalid string: forbidden character after backslash" ; |
397 | return token_type::parse_error; |
398 | } |
399 | |
400 | break; |
401 | } |
402 | |
403 | // invalid control characters |
404 | case 0x00: |
405 | { |
406 | error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000" ; |
407 | return token_type::parse_error; |
408 | } |
409 | |
410 | case 0x01: |
411 | { |
412 | error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001" ; |
413 | return token_type::parse_error; |
414 | } |
415 | |
416 | case 0x02: |
417 | { |
418 | error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002" ; |
419 | return token_type::parse_error; |
420 | } |
421 | |
422 | case 0x03: |
423 | { |
424 | error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003" ; |
425 | return token_type::parse_error; |
426 | } |
427 | |
428 | case 0x04: |
429 | { |
430 | error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004" ; |
431 | return token_type::parse_error; |
432 | } |
433 | |
434 | case 0x05: |
435 | { |
436 | error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005" ; |
437 | return token_type::parse_error; |
438 | } |
439 | |
440 | case 0x06: |
441 | { |
442 | error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006" ; |
443 | return token_type::parse_error; |
444 | } |
445 | |
446 | case 0x07: |
447 | { |
448 | error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007" ; |
449 | return token_type::parse_error; |
450 | } |
451 | |
452 | case 0x08: |
453 | { |
454 | error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b" ; |
455 | return token_type::parse_error; |
456 | } |
457 | |
458 | case 0x09: |
459 | { |
460 | error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t" ; |
461 | return token_type::parse_error; |
462 | } |
463 | |
464 | case 0x0A: |
465 | { |
466 | error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n" ; |
467 | return token_type::parse_error; |
468 | } |
469 | |
470 | case 0x0B: |
471 | { |
472 | error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B" ; |
473 | return token_type::parse_error; |
474 | } |
475 | |
476 | case 0x0C: |
477 | { |
478 | error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f" ; |
479 | return token_type::parse_error; |
480 | } |
481 | |
482 | case 0x0D: |
483 | { |
484 | error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r" ; |
485 | return token_type::parse_error; |
486 | } |
487 | |
488 | case 0x0E: |
489 | { |
490 | error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E" ; |
491 | return token_type::parse_error; |
492 | } |
493 | |
494 | case 0x0F: |
495 | { |
496 | error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F" ; |
497 | return token_type::parse_error; |
498 | } |
499 | |
500 | case 0x10: |
501 | { |
502 | error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010" ; |
503 | return token_type::parse_error; |
504 | } |
505 | |
506 | case 0x11: |
507 | { |
508 | error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011" ; |
509 | return token_type::parse_error; |
510 | } |
511 | |
512 | case 0x12: |
513 | { |
514 | error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012" ; |
515 | return token_type::parse_error; |
516 | } |
517 | |
518 | case 0x13: |
519 | { |
520 | error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013" ; |
521 | return token_type::parse_error; |
522 | } |
523 | |
524 | case 0x14: |
525 | { |
526 | error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014" ; |
527 | return token_type::parse_error; |
528 | } |
529 | |
530 | case 0x15: |
531 | { |
532 | error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015" ; |
533 | return token_type::parse_error; |
534 | } |
535 | |
536 | case 0x16: |
537 | { |
538 | error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016" ; |
539 | return token_type::parse_error; |
540 | } |
541 | |
542 | case 0x17: |
543 | { |
544 | error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017" ; |
545 | return token_type::parse_error; |
546 | } |
547 | |
548 | case 0x18: |
549 | { |
550 | error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018" ; |
551 | return token_type::parse_error; |
552 | } |
553 | |
554 | case 0x19: |
555 | { |
556 | error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019" ; |
557 | return token_type::parse_error; |
558 | } |
559 | |
560 | case 0x1A: |
561 | { |
562 | error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A" ; |
563 | return token_type::parse_error; |
564 | } |
565 | |
566 | case 0x1B: |
567 | { |
568 | error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B" ; |
569 | return token_type::parse_error; |
570 | } |
571 | |
572 | case 0x1C: |
573 | { |
574 | error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C" ; |
575 | return token_type::parse_error; |
576 | } |
577 | |
578 | case 0x1D: |
579 | { |
580 | error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D" ; |
581 | return token_type::parse_error; |
582 | } |
583 | |
584 | case 0x1E: |
585 | { |
586 | error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E" ; |
587 | return token_type::parse_error; |
588 | } |
589 | |
590 | case 0x1F: |
591 | { |
592 | error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F" ; |
593 | return token_type::parse_error; |
594 | } |
595 | |
596 | // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) |
597 | case 0x20: |
598 | case 0x21: |
599 | case 0x23: |
600 | case 0x24: |
601 | case 0x25: |
602 | case 0x26: |
603 | case 0x27: |
604 | case 0x28: |
605 | case 0x29: |
606 | case 0x2A: |
607 | case 0x2B: |
608 | case 0x2C: |
609 | case 0x2D: |
610 | case 0x2E: |
611 | case 0x2F: |
612 | case 0x30: |
613 | case 0x31: |
614 | case 0x32: |
615 | case 0x33: |
616 | case 0x34: |
617 | case 0x35: |
618 | case 0x36: |
619 | case 0x37: |
620 | case 0x38: |
621 | case 0x39: |
622 | case 0x3A: |
623 | case 0x3B: |
624 | case 0x3C: |
625 | case 0x3D: |
626 | case 0x3E: |
627 | case 0x3F: |
628 | case 0x40: |
629 | case 0x41: |
630 | case 0x42: |
631 | case 0x43: |
632 | case 0x44: |
633 | case 0x45: |
634 | case 0x46: |
635 | case 0x47: |
636 | case 0x48: |
637 | case 0x49: |
638 | case 0x4A: |
639 | case 0x4B: |
640 | case 0x4C: |
641 | case 0x4D: |
642 | case 0x4E: |
643 | case 0x4F: |
644 | case 0x50: |
645 | case 0x51: |
646 | case 0x52: |
647 | case 0x53: |
648 | case 0x54: |
649 | case 0x55: |
650 | case 0x56: |
651 | case 0x57: |
652 | case 0x58: |
653 | case 0x59: |
654 | case 0x5A: |
655 | case 0x5B: |
656 | case 0x5D: |
657 | case 0x5E: |
658 | case 0x5F: |
659 | case 0x60: |
660 | case 0x61: |
661 | case 0x62: |
662 | case 0x63: |
663 | case 0x64: |
664 | case 0x65: |
665 | case 0x66: |
666 | case 0x67: |
667 | case 0x68: |
668 | case 0x69: |
669 | case 0x6A: |
670 | case 0x6B: |
671 | case 0x6C: |
672 | case 0x6D: |
673 | case 0x6E: |
674 | case 0x6F: |
675 | case 0x70: |
676 | case 0x71: |
677 | case 0x72: |
678 | case 0x73: |
679 | case 0x74: |
680 | case 0x75: |
681 | case 0x76: |
682 | case 0x77: |
683 | case 0x78: |
684 | case 0x79: |
685 | case 0x7A: |
686 | case 0x7B: |
687 | case 0x7C: |
688 | case 0x7D: |
689 | case 0x7E: |
690 | case 0x7F: |
691 | { |
692 | add(current); |
693 | break; |
694 | } |
695 | |
696 | // U+0080..U+07FF: bytes C2..DF 80..BF |
697 | case 0xC2: |
698 | case 0xC3: |
699 | case 0xC4: |
700 | case 0xC5: |
701 | case 0xC6: |
702 | case 0xC7: |
703 | case 0xC8: |
704 | case 0xC9: |
705 | case 0xCA: |
706 | case 0xCB: |
707 | case 0xCC: |
708 | case 0xCD: |
709 | case 0xCE: |
710 | case 0xCF: |
711 | case 0xD0: |
712 | case 0xD1: |
713 | case 0xD2: |
714 | case 0xD3: |
715 | case 0xD4: |
716 | case 0xD5: |
717 | case 0xD6: |
718 | case 0xD7: |
719 | case 0xD8: |
720 | case 0xD9: |
721 | case 0xDA: |
722 | case 0xDB: |
723 | case 0xDC: |
724 | case 0xDD: |
725 | case 0xDE: |
726 | case 0xDF: |
727 | { |
728 | if (JSON_HEDLEY_UNLIKELY(not next_byte_in_range({0x80, 0xBF}))) |
729 | { |
730 | return token_type::parse_error; |
731 | } |
732 | break; |
733 | } |
734 | |
735 | // U+0800..U+0FFF: bytes E0 A0..BF 80..BF |
736 | case 0xE0: |
737 | { |
738 | if (JSON_HEDLEY_UNLIKELY(not (next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF})))) |
739 | { |
740 | return token_type::parse_error; |
741 | } |
742 | break; |
743 | } |
744 | |
745 | // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF |
746 | // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF |
747 | case 0xE1: |
748 | case 0xE2: |
749 | case 0xE3: |
750 | case 0xE4: |
751 | case 0xE5: |
752 | case 0xE6: |
753 | case 0xE7: |
754 | case 0xE8: |
755 | case 0xE9: |
756 | case 0xEA: |
757 | case 0xEB: |
758 | case 0xEC: |
759 | case 0xEE: |
760 | case 0xEF: |
761 | { |
762 | if (JSON_HEDLEY_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF})))) |
763 | { |
764 | return token_type::parse_error; |
765 | } |
766 | break; |
767 | } |
768 | |
769 | // U+D000..U+D7FF: bytes ED 80..9F 80..BF |
770 | case 0xED: |
771 | { |
772 | if (JSON_HEDLEY_UNLIKELY(not (next_byte_in_range({0x80, 0x9F, 0x80, 0xBF})))) |
773 | { |
774 | return token_type::parse_error; |
775 | } |
776 | break; |
777 | } |
778 | |
779 | // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF |
780 | case 0xF0: |
781 | { |
782 | if (JSON_HEDLEY_UNLIKELY(not (next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) |
783 | { |
784 | return token_type::parse_error; |
785 | } |
786 | break; |
787 | } |
788 | |
789 | // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF |
790 | case 0xF1: |
791 | case 0xF2: |
792 | case 0xF3: |
793 | { |
794 | if (JSON_HEDLEY_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) |
795 | { |
796 | return token_type::parse_error; |
797 | } |
798 | break; |
799 | } |
800 | |
801 | // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF |
802 | case 0xF4: |
803 | { |
804 | if (JSON_HEDLEY_UNLIKELY(not (next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})))) |
805 | { |
806 | return token_type::parse_error; |
807 | } |
808 | break; |
809 | } |
810 | |
811 | // remaining bytes (80..C1 and F5..FF) are ill-formed |
812 | default: |
813 | { |
814 | error_message = "invalid string: ill-formed UTF-8 byte" ; |
815 | return token_type::parse_error; |
816 | } |
817 | } |
818 | } |
819 | } |
820 | |
821 | JSON_HEDLEY_NON_NULL(2) |
822 | static void strtof(float& f, const char* str, char** endptr) noexcept |
823 | { |
824 | f = std::strtof(str, endptr); |
825 | } |
826 | |
827 | JSON_HEDLEY_NON_NULL(2) |
828 | static void strtof(double& f, const char* str, char** endptr) noexcept |
829 | { |
830 | f = std::strtod(str, endptr); |
831 | } |
832 | |
833 | JSON_HEDLEY_NON_NULL(2) |
834 | static void strtof(long double& f, const char* str, char** endptr) noexcept |
835 | { |
836 | f = std::strtold(str, endptr); |
837 | } |
838 | |
839 | /*! |
840 | @brief scan a number literal |
841 | |
842 | This function scans a string according to Sect. 6 of RFC 7159. |
843 | |
844 | The function is realized with a deterministic finite state machine derived |
845 | from the grammar described in RFC 7159. Starting in state "init", the |
846 | input is read and used to determined the next state. Only state "done" |
847 | accepts the number. State "error" is a trap state to model errors. In the |
848 | table below, "anything" means any character but the ones listed before. |
849 | |
850 | state | 0 | 1-9 | e E | + | - | . | anything |
851 | ---------|----------|----------|----------|---------|---------|----------|----------- |
852 | init | zero | any1 | [error] | [error] | minus | [error] | [error] |
853 | minus | zero | any1 | [error] | [error] | [error] | [error] | [error] |
854 | zero | done | done | exponent | done | done | decimal1 | done |
855 | any1 | any1 | any1 | exponent | done | done | decimal1 | done |
856 | decimal1 | decimal2 | [error] | [error] | [error] | [error] | [error] | [error] |
857 | decimal2 | decimal2 | decimal2 | exponent | done | done | done | done |
858 | exponent | any2 | any2 | [error] | sign | sign | [error] | [error] |
859 | sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] |
860 | any2 | any2 | any2 | done | done | done | done | done |
861 | |
862 | The state machine is realized with one label per state (prefixed with |
863 | "scan_number_") and `goto` statements between them. The state machine |
864 | contains cycles, but any cycle can be left when EOF is read. Therefore, |
865 | the function is guaranteed to terminate. |
866 | |
867 | During scanning, the read bytes are stored in token_buffer. This string is |
868 | then converted to a signed integer, an unsigned integer, or a |
869 | floating-point number. |
870 | |
871 | @return token_type::value_unsigned, token_type::value_integer, or |
872 | token_type::value_float if number could be successfully scanned, |
873 | token_type::parse_error otherwise |
874 | |
875 | @note The scanner is independent of the current locale. Internally, the |
876 | locale's decimal point is used instead of `.` to work with the |
877 | locale-dependent converters. |
878 | */ |
879 | token_type scan_number() // lgtm [cpp/use-of-goto] |
880 | { |
881 | // reset token_buffer to store the number's bytes |
882 | reset(); |
883 | |
884 | // the type of the parsed number; initially set to unsigned; will be |
885 | // changed if minus sign, decimal point or exponent is read |
886 | token_type number_type = token_type::value_unsigned; |
887 | |
888 | // state (init): we just found out we need to scan a number |
889 | switch (current) |
890 | { |
891 | case '-': |
892 | { |
893 | add(current); |
894 | goto scan_number_minus; |
895 | } |
896 | |
897 | case '0': |
898 | { |
899 | add(current); |
900 | goto scan_number_zero; |
901 | } |
902 | |
903 | case '1': |
904 | case '2': |
905 | case '3': |
906 | case '4': |
907 | case '5': |
908 | case '6': |
909 | case '7': |
910 | case '8': |
911 | case '9': |
912 | { |
913 | add(current); |
914 | goto scan_number_any1; |
915 | } |
916 | |
917 | // all other characters are rejected outside scan_number() |
918 | default: // LCOV_EXCL_LINE |
919 | assert(false); // LCOV_EXCL_LINE |
920 | } |
921 | |
922 | scan_number_minus: |
923 | // state: we just parsed a leading minus sign |
924 | number_type = token_type::value_integer; |
925 | switch (get()) |
926 | { |
927 | case '0': |
928 | { |
929 | add(current); |
930 | goto scan_number_zero; |
931 | } |
932 | |
933 | case '1': |
934 | case '2': |
935 | case '3': |
936 | case '4': |
937 | case '5': |
938 | case '6': |
939 | case '7': |
940 | case '8': |
941 | case '9': |
942 | { |
943 | add(current); |
944 | goto scan_number_any1; |
945 | } |
946 | |
947 | default: |
948 | { |
949 | error_message = "invalid number; expected digit after '-'" ; |
950 | return token_type::parse_error; |
951 | } |
952 | } |
953 | |
954 | scan_number_zero: |
955 | // state: we just parse a zero (maybe with a leading minus sign) |
956 | switch (get()) |
957 | { |
958 | case '.': |
959 | { |
960 | add(decimal_point_char); |
961 | goto scan_number_decimal1; |
962 | } |
963 | |
964 | case 'e': |
965 | case 'E': |
966 | { |
967 | add(current); |
968 | goto scan_number_exponent; |
969 | } |
970 | |
971 | default: |
972 | goto scan_number_done; |
973 | } |
974 | |
975 | scan_number_any1: |
976 | // state: we just parsed a number 0-9 (maybe with a leading minus sign) |
977 | switch (get()) |
978 | { |
979 | case '0': |
980 | case '1': |
981 | case '2': |
982 | case '3': |
983 | case '4': |
984 | case '5': |
985 | case '6': |
986 | case '7': |
987 | case '8': |
988 | case '9': |
989 | { |
990 | add(current); |
991 | goto scan_number_any1; |
992 | } |
993 | |
994 | case '.': |
995 | { |
996 | add(decimal_point_char); |
997 | goto scan_number_decimal1; |
998 | } |
999 | |
1000 | case 'e': |
1001 | case 'E': |
1002 | { |
1003 | add(current); |
1004 | goto scan_number_exponent; |
1005 | } |
1006 | |
1007 | default: |
1008 | goto scan_number_done; |
1009 | } |
1010 | |
1011 | scan_number_decimal1: |
1012 | // state: we just parsed a decimal point |
1013 | number_type = token_type::value_float; |
1014 | switch (get()) |
1015 | { |
1016 | case '0': |
1017 | case '1': |
1018 | case '2': |
1019 | case '3': |
1020 | case '4': |
1021 | case '5': |
1022 | case '6': |
1023 | case '7': |
1024 | case '8': |
1025 | case '9': |
1026 | { |
1027 | add(current); |
1028 | goto scan_number_decimal2; |
1029 | } |
1030 | |
1031 | default: |
1032 | { |
1033 | error_message = "invalid number; expected digit after '.'" ; |
1034 | return token_type::parse_error; |
1035 | } |
1036 | } |
1037 | |
1038 | scan_number_decimal2: |
1039 | // we just parsed at least one number after a decimal point |
1040 | switch (get()) |
1041 | { |
1042 | case '0': |
1043 | case '1': |
1044 | case '2': |
1045 | case '3': |
1046 | case '4': |
1047 | case '5': |
1048 | case '6': |
1049 | case '7': |
1050 | case '8': |
1051 | case '9': |
1052 | { |
1053 | add(current); |
1054 | goto scan_number_decimal2; |
1055 | } |
1056 | |
1057 | case 'e': |
1058 | case 'E': |
1059 | { |
1060 | add(current); |
1061 | goto scan_number_exponent; |
1062 | } |
1063 | |
1064 | default: |
1065 | goto scan_number_done; |
1066 | } |
1067 | |
1068 | scan_number_exponent: |
1069 | // we just parsed an exponent |
1070 | number_type = token_type::value_float; |
1071 | switch (get()) |
1072 | { |
1073 | case '+': |
1074 | case '-': |
1075 | { |
1076 | add(current); |
1077 | goto scan_number_sign; |
1078 | } |
1079 | |
1080 | case '0': |
1081 | case '1': |
1082 | case '2': |
1083 | case '3': |
1084 | case '4': |
1085 | case '5': |
1086 | case '6': |
1087 | case '7': |
1088 | case '8': |
1089 | case '9': |
1090 | { |
1091 | add(current); |
1092 | goto scan_number_any2; |
1093 | } |
1094 | |
1095 | default: |
1096 | { |
1097 | error_message = |
1098 | "invalid number; expected '+', '-', or digit after exponent" ; |
1099 | return token_type::parse_error; |
1100 | } |
1101 | } |
1102 | |
1103 | scan_number_sign: |
1104 | // we just parsed an exponent sign |
1105 | switch (get()) |
1106 | { |
1107 | case '0': |
1108 | case '1': |
1109 | case '2': |
1110 | case '3': |
1111 | case '4': |
1112 | case '5': |
1113 | case '6': |
1114 | case '7': |
1115 | case '8': |
1116 | case '9': |
1117 | { |
1118 | add(current); |
1119 | goto scan_number_any2; |
1120 | } |
1121 | |
1122 | default: |
1123 | { |
1124 | error_message = "invalid number; expected digit after exponent sign" ; |
1125 | return token_type::parse_error; |
1126 | } |
1127 | } |
1128 | |
1129 | scan_number_any2: |
1130 | // we just parsed a number after the exponent or exponent sign |
1131 | switch (get()) |
1132 | { |
1133 | case '0': |
1134 | case '1': |
1135 | case '2': |
1136 | case '3': |
1137 | case '4': |
1138 | case '5': |
1139 | case '6': |
1140 | case '7': |
1141 | case '8': |
1142 | case '9': |
1143 | { |
1144 | add(current); |
1145 | goto scan_number_any2; |
1146 | } |
1147 | |
1148 | default: |
1149 | goto scan_number_done; |
1150 | } |
1151 | |
1152 | scan_number_done: |
1153 | // unget the character after the number (we only read it to know that |
1154 | // we are done scanning a number) |
1155 | unget(); |
1156 | |
1157 | char* endptr = nullptr; |
1158 | errno = 0; |
1159 | |
1160 | // try to parse integers first and fall back to floats |
1161 | if (number_type == token_type::value_unsigned) |
1162 | { |
1163 | const auto x = std::strtoull(token_buffer.data(), &endptr, 10); |
1164 | |
1165 | // we checked the number format before |
1166 | assert(endptr == token_buffer.data() + token_buffer.size()); |
1167 | |
1168 | if (errno == 0) |
1169 | { |
1170 | value_unsigned = static_cast<number_unsigned_t>(x); |
1171 | if (value_unsigned == x) |
1172 | { |
1173 | return token_type::value_unsigned; |
1174 | } |
1175 | } |
1176 | } |
1177 | else if (number_type == token_type::value_integer) |
1178 | { |
1179 | const auto x = std::strtoll(token_buffer.data(), &endptr, 10); |
1180 | |
1181 | // we checked the number format before |
1182 | assert(endptr == token_buffer.data() + token_buffer.size()); |
1183 | |
1184 | if (errno == 0) |
1185 | { |
1186 | value_integer = static_cast<number_integer_t>(x); |
1187 | if (value_integer == x) |
1188 | { |
1189 | return token_type::value_integer; |
1190 | } |
1191 | } |
1192 | } |
1193 | |
1194 | // this code is reached if we parse a floating-point number or if an |
1195 | // integer conversion above failed |
1196 | strtof(value_float, token_buffer.data(), &endptr); |
1197 | |
1198 | // we checked the number format before |
1199 | assert(endptr == token_buffer.data() + token_buffer.size()); |
1200 | |
1201 | return token_type::value_float; |
1202 | } |
1203 | |
1204 | /*! |
1205 | @param[in] literal_text the literal text to expect |
1206 | @param[in] length the length of the passed literal text |
1207 | @param[in] return_type the token type to return on success |
1208 | */ |
1209 | JSON_HEDLEY_NON_NULL(2) |
1210 | token_type scan_literal(const char* literal_text, const std::size_t length, |
1211 | token_type return_type) |
1212 | { |
1213 | assert(current == literal_text[0]); |
1214 | for (std::size_t i = 1; i < length; ++i) |
1215 | { |
1216 | if (JSON_HEDLEY_UNLIKELY(get() != literal_text[i])) |
1217 | { |
1218 | error_message = "invalid literal" ; |
1219 | return token_type::parse_error; |
1220 | } |
1221 | } |
1222 | return return_type; |
1223 | } |
1224 | |
1225 | ///////////////////// |
1226 | // input management |
1227 | ///////////////////// |
1228 | |
1229 | /// reset token_buffer; current character is beginning of token |
1230 | void reset() noexcept |
1231 | { |
1232 | token_buffer.clear(); |
1233 | token_string.clear(); |
1234 | token_string.push_back(std::char_traits<char>::to_char_type(current)); |
1235 | } |
1236 | |
1237 | /* |
1238 | @brief get next character from the input |
1239 | |
1240 | This function provides the interface to the used input adapter. It does |
1241 | not throw in case the input reached EOF, but returns a |
1242 | `std::char_traits<char>::eof()` in that case. Stores the scanned characters |
1243 | for use in error messages. |
1244 | |
1245 | @return character read from the input |
1246 | */ |
1247 | std::char_traits<char>::int_type get() |
1248 | { |
1249 | ++position.chars_read_total; |
1250 | ++position.chars_read_current_line; |
1251 | |
1252 | if (next_unget) |
1253 | { |
1254 | // just reset the next_unget variable and work with current |
1255 | next_unget = false; |
1256 | } |
1257 | else |
1258 | { |
1259 | current = ia->get_character(); |
1260 | } |
1261 | |
1262 | if (JSON_HEDLEY_LIKELY(current != std::char_traits<char>::eof())) |
1263 | { |
1264 | token_string.push_back(std::char_traits<char>::to_char_type(current)); |
1265 | } |
1266 | |
1267 | if (current == '\n') |
1268 | { |
1269 | ++position.lines_read; |
1270 | position.chars_read_current_line = 0; |
1271 | } |
1272 | |
1273 | return current; |
1274 | } |
1275 | |
1276 | /*! |
1277 | @brief unget current character (read it again on next get) |
1278 | |
1279 | We implement unget by setting variable next_unget to true. The input is not |
1280 | changed - we just simulate ungetting by modifying chars_read_total, |
1281 | chars_read_current_line, and token_string. The next call to get() will |
1282 | behave as if the unget character is read again. |
1283 | */ |
1284 | void unget() |
1285 | { |
1286 | next_unget = true; |
1287 | |
1288 | --position.chars_read_total; |
1289 | |
1290 | // in case we "unget" a newline, we have to also decrement the lines_read |
1291 | if (position.chars_read_current_line == 0) |
1292 | { |
1293 | if (position.lines_read > 0) |
1294 | { |
1295 | --position.lines_read; |
1296 | } |
1297 | } |
1298 | else |
1299 | { |
1300 | --position.chars_read_current_line; |
1301 | } |
1302 | |
1303 | if (JSON_HEDLEY_LIKELY(current != std::char_traits<char>::eof())) |
1304 | { |
1305 | assert(not token_string.empty()); |
1306 | token_string.pop_back(); |
1307 | } |
1308 | } |
1309 | |
1310 | /// add a character to token_buffer |
1311 | void add(int c) |
1312 | { |
1313 | token_buffer.push_back(std::char_traits<char>::to_char_type(c)); |
1314 | } |
1315 | |
1316 | public: |
1317 | ///////////////////// |
1318 | // value getters |
1319 | ///////////////////// |
1320 | |
1321 | /// return integer value |
1322 | constexpr number_integer_t get_number_integer() const noexcept |
1323 | { |
1324 | return value_integer; |
1325 | } |
1326 | |
1327 | /// return unsigned integer value |
1328 | constexpr number_unsigned_t get_number_unsigned() const noexcept |
1329 | { |
1330 | return value_unsigned; |
1331 | } |
1332 | |
1333 | /// return floating-point value |
1334 | constexpr number_float_t get_number_float() const noexcept |
1335 | { |
1336 | return value_float; |
1337 | } |
1338 | |
1339 | /// return current string value (implicitly resets the token; useful only once) |
1340 | string_t& get_string() |
1341 | { |
1342 | return token_buffer; |
1343 | } |
1344 | |
1345 | ///////////////////// |
1346 | // diagnostics |
1347 | ///////////////////// |
1348 | |
1349 | /// return position of last read token |
1350 | constexpr position_t get_position() const noexcept |
1351 | { |
1352 | return position; |
1353 | } |
1354 | |
1355 | /// return the last read token (for errors only). Will never contain EOF |
1356 | /// (an arbitrary value that is not a valid char value, often -1), because |
1357 | /// 255 may legitimately occur. May contain NUL, which should be escaped. |
1358 | std::string get_token_string() const |
1359 | { |
1360 | // escape control characters |
1361 | std::string result; |
1362 | for (const auto c : token_string) |
1363 | { |
1364 | if ('\x00' <= c and c <= '\x1F') |
1365 | { |
1366 | // escape control characters |
1367 | std::array<char, 9> cs{{}}; |
1368 | (std::snprintf)(cs.data(), cs.size(), "<U+%.4X>" , static_cast<unsigned char>(c)); |
1369 | result += cs.data(); |
1370 | } |
1371 | else |
1372 | { |
1373 | // add character as is |
1374 | result.push_back(c); |
1375 | } |
1376 | } |
1377 | |
1378 | return result; |
1379 | } |
1380 | |
1381 | /// return syntax error message |
1382 | JSON_HEDLEY_RETURNS_NON_NULL |
1383 | constexpr const char* get_error_message() const noexcept |
1384 | { |
1385 | return error_message; |
1386 | } |
1387 | |
1388 | ///////////////////// |
1389 | // actual scanner |
1390 | ///////////////////// |
1391 | |
1392 | /*! |
1393 | @brief skip the UTF-8 byte order mark |
1394 | @return true iff there is no BOM or the correct BOM has been skipped |
1395 | */ |
1396 | bool skip_bom() |
1397 | { |
1398 | if (get() == 0xEF) |
1399 | { |
1400 | // check if we completely parse the BOM |
1401 | return get() == 0xBB and get() == 0xBF; |
1402 | } |
1403 | |
1404 | // the first character is not the beginning of the BOM; unget it to |
1405 | // process is later |
1406 | unget(); |
1407 | return true; |
1408 | } |
1409 | |
1410 | token_type scan() |
1411 | { |
1412 | // initially, skip the BOM |
1413 | if (position.chars_read_total == 0 and not skip_bom()) |
1414 | { |
1415 | error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given" ; |
1416 | return token_type::parse_error; |
1417 | } |
1418 | |
1419 | // read next character and ignore whitespace |
1420 | do |
1421 | { |
1422 | get(); |
1423 | } |
1424 | while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); |
1425 | |
1426 | switch (current) |
1427 | { |
1428 | // structural characters |
1429 | case '[': |
1430 | return token_type::begin_array; |
1431 | case ']': |
1432 | return token_type::end_array; |
1433 | case '{': |
1434 | return token_type::begin_object; |
1435 | case '}': |
1436 | return token_type::end_object; |
1437 | case ':': |
1438 | return token_type::name_separator; |
1439 | case ',': |
1440 | return token_type::value_separator; |
1441 | |
1442 | // literals |
1443 | case 't': |
1444 | return scan_literal("true" , 4, token_type::literal_true); |
1445 | case 'f': |
1446 | return scan_literal("false" , 5, token_type::literal_false); |
1447 | case 'n': |
1448 | return scan_literal("null" , 4, token_type::literal_null); |
1449 | |
1450 | // string |
1451 | case '\"': |
1452 | return scan_string(); |
1453 | |
1454 | // number |
1455 | case '-': |
1456 | case '0': |
1457 | case '1': |
1458 | case '2': |
1459 | case '3': |
1460 | case '4': |
1461 | case '5': |
1462 | case '6': |
1463 | case '7': |
1464 | case '8': |
1465 | case '9': |
1466 | return scan_number(); |
1467 | |
1468 | // end of input (the null byte is needed when parsing from |
1469 | // string literals) |
1470 | case '\0': |
1471 | case std::char_traits<char>::eof(): |
1472 | return token_type::end_of_input; |
1473 | |
1474 | // error |
1475 | default: |
1476 | error_message = "invalid literal" ; |
1477 | return token_type::parse_error; |
1478 | } |
1479 | } |
1480 | |
1481 | private: |
1482 | /// input adapter |
1483 | detail::input_adapter_t ia = nullptr; |
1484 | |
1485 | /// the current character |
1486 | std::char_traits<char>::int_type current = std::char_traits<char>::eof(); |
1487 | |
1488 | /// whether the next get() call should just return current |
1489 | bool next_unget = false; |
1490 | |
1491 | /// the start position of the current token |
1492 | position_t position {}; |
1493 | |
1494 | /// raw input token string (for error messages) |
1495 | std::vector<char> token_string {}; |
1496 | |
1497 | /// buffer for variable-length tokens (numbers, strings) |
1498 | string_t token_buffer {}; |
1499 | |
1500 | /// a description of occurred lexer errors |
1501 | const char* error_message = "" ; |
1502 | |
1503 | // number values |
1504 | number_integer_t value_integer = 0; |
1505 | number_unsigned_t value_unsigned = 0; |
1506 | number_float_t value_float = 0; |
1507 | |
1508 | /// the decimal point |
1509 | const char decimal_point_char = '.'; |
1510 | }; |
1511 | } // namespace detail |
1512 | } // namespace nlohmann |
1513 | |