1 | // Tencent is pleased to support the open source community by making RapidJSON available. |
2 | // |
3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. |
4 | // |
5 | // Licensed under the MIT License (the "License"); you may not use this file except |
6 | // in compliance with the License. You may obtain a copy of the License at |
7 | // |
8 | // http://opensource.org/licenses/MIT |
9 | // |
10 | // Unless required by applicable law or agreed to in writing, software distributed |
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR |
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the |
13 | // specific language governing permissions and limitations under the License. |
14 | |
15 | #ifndef RAPIDJSON_READER_H_ |
16 | #define RAPIDJSON_READER_H_ |
17 | |
18 | /*! \file reader.h */ |
19 | |
20 | #include "allocators.h" |
21 | #include "stream.h" |
22 | #include "encodedstream.h" |
23 | #include "internal/meta.h" |
24 | #include "internal/stack.h" |
25 | #include "internal/strtod.h" |
26 | #include <limits> |
27 | |
28 | #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER) |
29 | #include <intrin.h> |
30 | #pragma intrinsic(_BitScanForward) |
31 | #endif |
32 | #ifdef RAPIDJSON_SSE42 |
33 | #include <nmmintrin.h> |
34 | #elif defined(RAPIDJSON_SSE2) |
35 | #include <emmintrin.h> |
36 | #elif defined(RAPIDJSON_NEON) |
37 | #include <arm_neon.h> |
38 | #endif |
39 | |
40 | #ifdef __clang__ |
41 | RAPIDJSON_DIAG_PUSH |
42 | RAPIDJSON_DIAG_OFF(old-style-cast) |
43 | RAPIDJSON_DIAG_OFF(padded) |
44 | RAPIDJSON_DIAG_OFF(switch-enum) |
45 | #elif defined(_MSC_VER) |
46 | RAPIDJSON_DIAG_PUSH |
47 | RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant |
48 | RAPIDJSON_DIAG_OFF(4702) // unreachable code |
49 | #endif |
50 | |
51 | #ifdef __GNUC__ |
52 | RAPIDJSON_DIAG_PUSH |
53 | RAPIDJSON_DIAG_OFF(effc++) |
54 | #endif |
55 | |
56 | //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN |
57 | #define RAPIDJSON_NOTHING /* deliberately empty */ |
58 | #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN |
59 | #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \ |
60 | RAPIDJSON_MULTILINEMACRO_BEGIN \ |
61 | if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \ |
62 | RAPIDJSON_MULTILINEMACRO_END |
63 | #endif |
64 | #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \ |
65 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING) |
66 | //!@endcond |
67 | |
68 | /*! \def RAPIDJSON_PARSE_ERROR_NORETURN |
69 | \ingroup RAPIDJSON_ERRORS |
70 | \brief Macro to indicate a parse error. |
71 | \param parseErrorCode \ref rapidjson::ParseErrorCode of the error |
72 | \param offset position of the error in JSON input (\c size_t) |
73 | |
74 | This macros can be used as a customization point for the internal |
75 | error handling mechanism of RapidJSON. |
76 | |
77 | A common usage model is to throw an exception instead of requiring the |
78 | caller to explicitly check the \ref rapidjson::GenericReader::Parse's |
79 | return value: |
80 | |
81 | \code |
82 | #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \ |
83 | throw ParseException(parseErrorCode, #parseErrorCode, offset) |
84 | |
85 | #include <stdexcept> // std::runtime_error |
86 | #include "rapidjson/error/error.h" // rapidjson::ParseResult |
87 | |
88 | struct ParseException : std::runtime_error, rapidjson::ParseResult { |
89 | ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset) |
90 | : std::runtime_error(msg), ParseResult(code, offset) {} |
91 | }; |
92 | |
93 | #include "rapidjson/reader.h" |
94 | \endcode |
95 | |
96 | \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse |
97 | */ |
98 | #ifndef RAPIDJSON_PARSE_ERROR_NORETURN |
99 | #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \ |
100 | RAPIDJSON_MULTILINEMACRO_BEGIN \ |
101 | RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \ |
102 | SetParseError(parseErrorCode, offset); \ |
103 | RAPIDJSON_MULTILINEMACRO_END |
104 | #endif |
105 | |
106 | /*! \def RAPIDJSON_PARSE_ERROR |
107 | \ingroup RAPIDJSON_ERRORS |
108 | \brief (Internal) macro to indicate and handle a parse error. |
109 | \param parseErrorCode \ref rapidjson::ParseErrorCode of the error |
110 | \param offset position of the error in JSON input (\c size_t) |
111 | |
112 | Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing. |
113 | |
114 | \see RAPIDJSON_PARSE_ERROR_NORETURN |
115 | \hideinitializer |
116 | */ |
117 | #ifndef RAPIDJSON_PARSE_ERROR |
118 | #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \ |
119 | RAPIDJSON_MULTILINEMACRO_BEGIN \ |
120 | RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \ |
121 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \ |
122 | RAPIDJSON_MULTILINEMACRO_END |
123 | #endif |
124 | |
125 | #include "error/error.h" // ParseErrorCode, ParseResult |
126 | |
127 | RAPIDJSON_NAMESPACE_BEGIN |
128 | |
129 | /////////////////////////////////////////////////////////////////////////////// |
130 | // ParseFlag |
131 | |
132 | /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS |
133 | \ingroup RAPIDJSON_CONFIG |
134 | \brief User-defined kParseDefaultFlags definition. |
135 | |
136 | User can define this as any \c ParseFlag combinations. |
137 | */ |
138 | #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS |
139 | #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags |
140 | #endif |
141 | |
142 | //! Combination of parseFlags |
143 | /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream |
144 | */ |
145 | enum ParseFlag { |
146 | kParseNoFlags = 0, //!< No flags are set. |
147 | kParseInsituFlag = 1, //!< In-situ(destructive) parsing. |
148 | kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings. |
149 | kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing. |
150 | kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error. |
151 | kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower). |
152 | = 32, //!< Allow one-line (//) and multi-line (/**/) comments. |
153 | kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings. |
154 | kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays. |
155 | kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles. |
156 | kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS |
157 | }; |
158 | |
159 | /////////////////////////////////////////////////////////////////////////////// |
160 | // Handler |
161 | |
162 | /*! \class rapidjson::Handler |
163 | \brief Concept for receiving events from GenericReader upon parsing. |
164 | The functions return true if no error occurs. If they return false, |
165 | the event publisher should terminate the process. |
166 | \code |
167 | concept Handler { |
168 | typename Ch; |
169 | |
170 | bool Null(); |
171 | bool Bool(bool b); |
172 | bool Int(int i); |
173 | bool Uint(unsigned i); |
174 | bool Int64(int64_t i); |
175 | bool Uint64(uint64_t i); |
176 | bool Double(double d); |
177 | /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) |
178 | bool RawNumber(const Ch* str, SizeType length, bool copy); |
179 | bool String(const Ch* str, SizeType length, bool copy); |
180 | bool StartObject(); |
181 | bool Key(const Ch* str, SizeType length, bool copy); |
182 | bool EndObject(SizeType memberCount); |
183 | bool StartArray(); |
184 | bool EndArray(SizeType elementCount); |
185 | }; |
186 | \endcode |
187 | */ |
188 | /////////////////////////////////////////////////////////////////////////////// |
189 | // BaseReaderHandler |
190 | |
191 | //! Default implementation of Handler. |
192 | /*! This can be used as base class of any reader handler. |
193 | \note implements Handler concept |
194 | */ |
195 | template<typename Encoding = UTF8<>, typename Derived = void> |
196 | struct BaseReaderHandler { |
197 | typedef typename Encoding::Ch Ch; |
198 | |
199 | typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override; |
200 | |
201 | bool Default() { return true; } |
202 | bool Null() { return static_cast<Override&>(*this).Default(); } |
203 | bool Bool(bool) { return static_cast<Override&>(*this).Default(); } |
204 | bool Int(int) { return static_cast<Override&>(*this).Default(); } |
205 | bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); } |
206 | bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); } |
207 | bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); } |
208 | bool Double(double) { return static_cast<Override&>(*this).Default(); } |
209 | /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) |
210 | bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); } |
211 | bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); } |
212 | bool StartObject() { return static_cast<Override&>(*this).Default(); } |
213 | bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); } |
214 | bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); } |
215 | bool StartArray() { return static_cast<Override&>(*this).Default(); } |
216 | bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); } |
217 | }; |
218 | |
219 | /////////////////////////////////////////////////////////////////////////////// |
220 | // StreamLocalCopy |
221 | |
222 | namespace internal { |
223 | |
224 | template<typename Stream, int = StreamTraits<Stream>::copyOptimization> |
225 | class StreamLocalCopy; |
226 | |
227 | //! Do copy optimization. |
228 | template<typename Stream> |
229 | class StreamLocalCopy<Stream, 1> { |
230 | public: |
231 | StreamLocalCopy(Stream& original) : s(original), original_(original) {} |
232 | ~StreamLocalCopy() { original_ = s; } |
233 | |
234 | Stream s; |
235 | |
236 | private: |
237 | StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; |
238 | |
239 | Stream& original_; |
240 | }; |
241 | |
242 | //! Keep reference. |
243 | template<typename Stream> |
244 | class StreamLocalCopy<Stream, 0> { |
245 | public: |
246 | StreamLocalCopy(Stream& original) : s(original) {} |
247 | |
248 | Stream& s; |
249 | |
250 | private: |
251 | StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; |
252 | }; |
253 | |
254 | } // namespace internal |
255 | |
256 | /////////////////////////////////////////////////////////////////////////////// |
257 | // SkipWhitespace |
258 | |
259 | //! Skip the JSON white spaces in a stream. |
260 | /*! \param is A input stream for skipping white spaces. |
261 | \note This function has SSE2/SSE4.2 specialization. |
262 | */ |
263 | template<typename InputStream> |
264 | void SkipWhitespace(InputStream& is) { |
265 | internal::StreamLocalCopy<InputStream> copy(is); |
266 | InputStream& s(copy.s); |
267 | |
268 | typename InputStream::Ch c; |
269 | while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t') |
270 | s.Take(); |
271 | } |
272 | |
273 | inline const char* SkipWhitespace(const char* p, const char* end) { |
274 | while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) |
275 | ++p; |
276 | return p; |
277 | } |
278 | |
279 | #ifdef RAPIDJSON_SSE42 |
280 | //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once. |
281 | inline const char *SkipWhitespace_SIMD(const char* p) { |
282 | // Fast return for single non-whitespace |
283 | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
284 | ++p; |
285 | else |
286 | return p; |
287 | |
288 | // 16-byte align to the next boundary |
289 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
290 | while (p != nextAligned) |
291 | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
292 | ++p; |
293 | else |
294 | return p; |
295 | |
296 | // The rest of string using SIMD |
297 | static const char whitespace[16] = " \n\r\t" ; |
298 | const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0])); |
299 | |
300 | for (;; p += 16) { |
301 | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
302 | const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); |
303 | if (r != 16) // some of characters is non-whitespace |
304 | return p + r; |
305 | } |
306 | } |
307 | |
308 | inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { |
309 | // Fast return for single non-whitespace |
310 | if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) |
311 | ++p; |
312 | else |
313 | return p; |
314 | |
315 | // The middle of string using SIMD |
316 | static const char whitespace[16] = " \n\r\t" ; |
317 | const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0])); |
318 | |
319 | for (; p <= end - 16; p += 16) { |
320 | const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); |
321 | const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); |
322 | if (r != 16) // some of characters is non-whitespace |
323 | return p + r; |
324 | } |
325 | |
326 | return SkipWhitespace(p, end); |
327 | } |
328 | |
329 | #elif defined(RAPIDJSON_SSE2) |
330 | |
331 | //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once. |
332 | inline const char *SkipWhitespace_SIMD(const char* p) { |
333 | // Fast return for single non-whitespace |
334 | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
335 | ++p; |
336 | else |
337 | return p; |
338 | |
339 | // 16-byte align to the next boundary |
340 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
341 | while (p != nextAligned) |
342 | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
343 | ++p; |
344 | else |
345 | return p; |
346 | |
347 | // The rest of string |
348 | #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } |
349 | static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; |
350 | #undef C16 |
351 | |
352 | const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0])); |
353 | const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0])); |
354 | const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0])); |
355 | const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0])); |
356 | |
357 | for (;; p += 16) { |
358 | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
359 | __m128i x = _mm_cmpeq_epi8(s, w0); |
360 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); |
361 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); |
362 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); |
363 | unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x)); |
364 | if (r != 0) { // some of characters may be non-whitespace |
365 | #ifdef _MSC_VER // Find the index of first non-whitespace |
366 | unsigned long offset; |
367 | _BitScanForward(&offset, r); |
368 | return p + offset; |
369 | #else |
370 | return p + __builtin_ffs(r) - 1; |
371 | #endif |
372 | } |
373 | } |
374 | } |
375 | |
376 | inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { |
377 | // Fast return for single non-whitespace |
378 | if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) |
379 | ++p; |
380 | else |
381 | return p; |
382 | |
383 | // The rest of string |
384 | #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } |
385 | static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; |
386 | #undef C16 |
387 | |
388 | const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0])); |
389 | const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0])); |
390 | const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0])); |
391 | const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0])); |
392 | |
393 | for (; p <= end - 16; p += 16) { |
394 | const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); |
395 | __m128i x = _mm_cmpeq_epi8(s, w0); |
396 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); |
397 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); |
398 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); |
399 | unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x)); |
400 | if (r != 0) { // some of characters may be non-whitespace |
401 | #ifdef _MSC_VER // Find the index of first non-whitespace |
402 | unsigned long offset; |
403 | _BitScanForward(&offset, r); |
404 | return p + offset; |
405 | #else |
406 | return p + __builtin_ffs(r) - 1; |
407 | #endif |
408 | } |
409 | } |
410 | |
411 | return SkipWhitespace(p, end); |
412 | } |
413 | |
414 | #elif defined(RAPIDJSON_NEON) |
415 | |
416 | //! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once. |
417 | inline const char *SkipWhitespace_SIMD(const char* p) { |
418 | // Fast return for single non-whitespace |
419 | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
420 | ++p; |
421 | else |
422 | return p; |
423 | |
424 | // 16-byte align to the next boundary |
425 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
426 | while (p != nextAligned) |
427 | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
428 | ++p; |
429 | else |
430 | return p; |
431 | |
432 | const uint8x16_t w0 = vmovq_n_u8(' '); |
433 | const uint8x16_t w1 = vmovq_n_u8('\n'); |
434 | const uint8x16_t w2 = vmovq_n_u8('\r'); |
435 | const uint8x16_t w3 = vmovq_n_u8('\t'); |
436 | |
437 | for (;; p += 16) { |
438 | const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p)); |
439 | uint8x16_t x = vceqq_u8(s, w0); |
440 | x = vorrq_u8(x, vceqq_u8(s, w1)); |
441 | x = vorrq_u8(x, vceqq_u8(s, w2)); |
442 | x = vorrq_u8(x, vceqq_u8(s, w3)); |
443 | |
444 | x = vmvnq_u8(x); // Negate |
445 | x = vrev64q_u8(x); // Rev in 64 |
446 | uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract |
447 | uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract |
448 | |
449 | if (low == 0) { |
450 | if (high != 0) { |
451 | int lz =__builtin_clzll(high);; |
452 | return p + 8 + (lz >> 3); |
453 | } |
454 | } else { |
455 | int lz = __builtin_clzll(low);; |
456 | return p + (lz >> 3); |
457 | } |
458 | } |
459 | } |
460 | |
461 | inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { |
462 | // Fast return for single non-whitespace |
463 | if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) |
464 | ++p; |
465 | else |
466 | return p; |
467 | |
468 | const uint8x16_t w0 = vmovq_n_u8(' '); |
469 | const uint8x16_t w1 = vmovq_n_u8('\n'); |
470 | const uint8x16_t w2 = vmovq_n_u8('\r'); |
471 | const uint8x16_t w3 = vmovq_n_u8('\t'); |
472 | |
473 | for (; p <= end - 16; p += 16) { |
474 | const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p)); |
475 | uint8x16_t x = vceqq_u8(s, w0); |
476 | x = vorrq_u8(x, vceqq_u8(s, w1)); |
477 | x = vorrq_u8(x, vceqq_u8(s, w2)); |
478 | x = vorrq_u8(x, vceqq_u8(s, w3)); |
479 | |
480 | x = vmvnq_u8(x); // Negate |
481 | x = vrev64q_u8(x); // Rev in 64 |
482 | uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract |
483 | uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract |
484 | |
485 | if (low == 0) { |
486 | if (high != 0) { |
487 | int lz = __builtin_clzll(high); |
488 | return p + 8 + (lz >> 3); |
489 | } |
490 | } else { |
491 | int lz = __builtin_clzll(low); |
492 | return p + (lz >> 3); |
493 | } |
494 | } |
495 | |
496 | return SkipWhitespace(p, end); |
497 | } |
498 | |
499 | #endif // RAPIDJSON_NEON |
500 | |
501 | #ifdef RAPIDJSON_SIMD |
502 | //! Template function specialization for InsituStringStream |
503 | template<> inline void SkipWhitespace(InsituStringStream& is) { |
504 | is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_)); |
505 | } |
506 | |
507 | //! Template function specialization for StringStream |
508 | template<> inline void SkipWhitespace(StringStream& is) { |
509 | is.src_ = SkipWhitespace_SIMD(is.src_); |
510 | } |
511 | |
512 | template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) { |
513 | is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_); |
514 | } |
515 | #endif // RAPIDJSON_SIMD |
516 | |
517 | /////////////////////////////////////////////////////////////////////////////// |
518 | // GenericReader |
519 | |
520 | //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator. |
521 | /*! GenericReader parses JSON text from a stream, and send events synchronously to an |
522 | object implementing Handler concept. |
523 | |
524 | It needs to allocate a stack for storing a single decoded string during |
525 | non-destructive parsing. |
526 | |
527 | For in-situ parsing, the decoded string is directly written to the source |
528 | text string, no temporary buffer is required. |
529 | |
530 | A GenericReader object can be reused for parsing multiple JSON text. |
531 | |
532 | \tparam SourceEncoding Encoding of the input stream. |
533 | \tparam TargetEncoding Encoding of the parse output. |
534 | \tparam StackAllocator Allocator type for stack. |
535 | */ |
536 | template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator> |
537 | class GenericReader { |
538 | public: |
539 | typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type |
540 | |
541 | //! Constructor. |
542 | /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) |
543 | \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) |
544 | */ |
545 | GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : |
546 | stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState) {} |
547 | |
548 | //! Parse JSON text. |
549 | /*! \tparam parseFlags Combination of \ref ParseFlag. |
550 | \tparam InputStream Type of input stream, implementing Stream concept. |
551 | \tparam Handler Type of handler, implementing Handler concept. |
552 | \param is Input stream to be parsed. |
553 | \param handler The handler to receive events. |
554 | \return Whether the parsing is successful. |
555 | */ |
556 | template <unsigned parseFlags, typename InputStream, typename Handler> |
557 | ParseResult Parse(InputStream& is, Handler& handler) { |
558 | if (parseFlags & kParseIterativeFlag) |
559 | return IterativeParse<parseFlags>(is, handler); |
560 | |
561 | parseResult_.Clear(); |
562 | |
563 | ClearStackOnExit scope(*this); |
564 | |
565 | SkipWhitespaceAndComments<parseFlags>(is); |
566 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
567 | |
568 | if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) { |
569 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell()); |
570 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
571 | } |
572 | else { |
573 | ParseValue<parseFlags>(is, handler); |
574 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
575 | |
576 | if (!(parseFlags & kParseStopWhenDoneFlag)) { |
577 | SkipWhitespaceAndComments<parseFlags>(is); |
578 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
579 | |
580 | if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) { |
581 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell()); |
582 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
583 | } |
584 | } |
585 | } |
586 | |
587 | return parseResult_; |
588 | } |
589 | |
590 | //! Parse JSON text (with \ref kParseDefaultFlags) |
591 | /*! \tparam InputStream Type of input stream, implementing Stream concept |
592 | \tparam Handler Type of handler, implementing Handler concept. |
593 | \param is Input stream to be parsed. |
594 | \param handler The handler to receive events. |
595 | \return Whether the parsing is successful. |
596 | */ |
597 | template <typename InputStream, typename Handler> |
598 | ParseResult Parse(InputStream& is, Handler& handler) { |
599 | return Parse<kParseDefaultFlags>(is, handler); |
600 | } |
601 | |
602 | //! Initialize JSON text token-by-token parsing |
603 | /*! |
604 | */ |
605 | void IterativeParseInit() { |
606 | parseResult_.Clear(); |
607 | state_ = IterativeParsingStartState; |
608 | } |
609 | |
610 | //! Parse one token from JSON text |
611 | /*! \tparam InputStream Type of input stream, implementing Stream concept |
612 | \tparam Handler Type of handler, implementing Handler concept. |
613 | \param is Input stream to be parsed. |
614 | \param handler The handler to receive events. |
615 | \return Whether the parsing is successful. |
616 | */ |
617 | template <unsigned parseFlags, typename InputStream, typename Handler> |
618 | bool IterativeParseNext(InputStream& is, Handler& handler) { |
619 | while (RAPIDJSON_LIKELY(is.Peek() != '\0')) { |
620 | SkipWhitespaceAndComments<parseFlags>(is); |
621 | |
622 | Token t = Tokenize(is.Peek()); |
623 | IterativeParsingState n = Predict(state_, t); |
624 | IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler); |
625 | |
626 | // If we've finished or hit an error... |
627 | if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) { |
628 | // Report errors. |
629 | if (d == IterativeParsingErrorState) { |
630 | HandleError(state_, is); |
631 | return false; |
632 | } |
633 | |
634 | // Transition to the finish state. |
635 | RAPIDJSON_ASSERT(d == IterativeParsingFinishState); |
636 | state_ = d; |
637 | |
638 | // If StopWhenDone is not set... |
639 | if (!(parseFlags & kParseStopWhenDoneFlag)) { |
640 | // ... and extra non-whitespace data is found... |
641 | SkipWhitespaceAndComments<parseFlags>(is); |
642 | if (is.Peek() != '\0') { |
643 | // ... this is considered an error. |
644 | HandleError(state_, is); |
645 | return false; |
646 | } |
647 | } |
648 | |
649 | // Success! We are done! |
650 | return true; |
651 | } |
652 | |
653 | // Transition to the new state. |
654 | state_ = d; |
655 | |
656 | // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now. |
657 | if (!IsIterativeParsingDelimiterState(n)) |
658 | return true; |
659 | } |
660 | |
661 | // We reached the end of file. |
662 | stack_.Clear(); |
663 | |
664 | if (state_ != IterativeParsingFinishState) { |
665 | HandleError(state_, is); |
666 | return false; |
667 | } |
668 | |
669 | return true; |
670 | } |
671 | |
672 | //! Check if token-by-token parsing JSON text is complete |
673 | /*! \return Whether the JSON has been fully decoded. |
674 | */ |
675 | RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const { |
676 | return IsIterativeParsingCompleteState(state_); |
677 | } |
678 | |
679 | //! Whether a parse error has occurred in the last parsing. |
680 | bool HasParseError() const { return parseResult_.IsError(); } |
681 | |
682 | //! Get the \ref ParseErrorCode of last parsing. |
683 | ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); } |
684 | |
685 | //! Get the position of last parsing error in input, 0 otherwise. |
686 | size_t GetErrorOffset() const { return parseResult_.Offset(); } |
687 | |
688 | protected: |
689 | void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); } |
690 | |
691 | private: |
692 | // Prohibit copy constructor & assignment operator. |
693 | GenericReader(const GenericReader&); |
694 | GenericReader& operator=(const GenericReader&); |
695 | |
696 | void ClearStack() { stack_.Clear(); } |
697 | |
698 | // clear stack on any exit from ParseStream, e.g. due to exception |
699 | struct ClearStackOnExit { |
700 | explicit ClearStackOnExit(GenericReader& r) : r_(r) {} |
701 | ~ClearStackOnExit() { r_.ClearStack(); } |
702 | private: |
703 | GenericReader& r_; |
704 | ClearStackOnExit(const ClearStackOnExit&); |
705 | ClearStackOnExit& operator=(const ClearStackOnExit&); |
706 | }; |
707 | |
708 | template<unsigned parseFlags, typename InputStream> |
709 | void SkipWhitespaceAndComments(InputStream& is) { |
710 | SkipWhitespace(is); |
711 | |
712 | if (parseFlags & kParseCommentsFlag) { |
713 | while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) { |
714 | if (Consume(is, '*')) { |
715 | while (true) { |
716 | if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) |
717 | RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); |
718 | else if (Consume(is, '*')) { |
719 | if (Consume(is, '/')) |
720 | break; |
721 | } |
722 | else |
723 | is.Take(); |
724 | } |
725 | } |
726 | else if (RAPIDJSON_LIKELY(Consume(is, '/'))) |
727 | while (is.Peek() != '\0' && is.Take() != '\n') {} |
728 | else |
729 | RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); |
730 | |
731 | SkipWhitespace(is); |
732 | } |
733 | } |
734 | } |
735 | |
736 | // Parse object: { string : value, ... } |
737 | template<unsigned parseFlags, typename InputStream, typename Handler> |
738 | void ParseObject(InputStream& is, Handler& handler) { |
739 | RAPIDJSON_ASSERT(is.Peek() == '{'); |
740 | is.Take(); // Skip '{' |
741 | |
742 | if (RAPIDJSON_UNLIKELY(!handler.StartObject())) |
743 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
744 | |
745 | SkipWhitespaceAndComments<parseFlags>(is); |
746 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
747 | |
748 | if (Consume(is, '}')) { |
749 | if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object |
750 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
751 | return; |
752 | } |
753 | |
754 | for (SizeType memberCount = 0;;) { |
755 | if (RAPIDJSON_UNLIKELY(is.Peek() != '"')) |
756 | RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); |
757 | |
758 | ParseString<parseFlags>(is, handler, true); |
759 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
760 | |
761 | SkipWhitespaceAndComments<parseFlags>(is); |
762 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
763 | |
764 | if (RAPIDJSON_UNLIKELY(!Consume(is, ':'))) |
765 | RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); |
766 | |
767 | SkipWhitespaceAndComments<parseFlags>(is); |
768 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
769 | |
770 | ParseValue<parseFlags>(is, handler); |
771 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
772 | |
773 | SkipWhitespaceAndComments<parseFlags>(is); |
774 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
775 | |
776 | ++memberCount; |
777 | |
778 | switch (is.Peek()) { |
779 | case ',': |
780 | is.Take(); |
781 | SkipWhitespaceAndComments<parseFlags>(is); |
782 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
783 | break; |
784 | case '}': |
785 | is.Take(); |
786 | if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) |
787 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
788 | return; |
789 | default: |
790 | RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy |
791 | } |
792 | |
793 | if (parseFlags & kParseTrailingCommasFlag) { |
794 | if (is.Peek() == '}') { |
795 | if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) |
796 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
797 | is.Take(); |
798 | return; |
799 | } |
800 | } |
801 | } |
802 | } |
803 | |
804 | // Parse array: [ value, ... ] |
805 | template<unsigned parseFlags, typename InputStream, typename Handler> |
806 | void ParseArray(InputStream& is, Handler& handler) { |
807 | RAPIDJSON_ASSERT(is.Peek() == '['); |
808 | is.Take(); // Skip '[' |
809 | |
810 | if (RAPIDJSON_UNLIKELY(!handler.StartArray())) |
811 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
812 | |
813 | SkipWhitespaceAndComments<parseFlags>(is); |
814 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
815 | |
816 | if (Consume(is, ']')) { |
817 | if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array |
818 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
819 | return; |
820 | } |
821 | |
822 | for (SizeType elementCount = 0;;) { |
823 | ParseValue<parseFlags>(is, handler); |
824 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
825 | |
826 | ++elementCount; |
827 | SkipWhitespaceAndComments<parseFlags>(is); |
828 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
829 | |
830 | if (Consume(is, ',')) { |
831 | SkipWhitespaceAndComments<parseFlags>(is); |
832 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
833 | } |
834 | else if (Consume(is, ']')) { |
835 | if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) |
836 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
837 | return; |
838 | } |
839 | else |
840 | RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); |
841 | |
842 | if (parseFlags & kParseTrailingCommasFlag) { |
843 | if (is.Peek() == ']') { |
844 | if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) |
845 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
846 | is.Take(); |
847 | return; |
848 | } |
849 | } |
850 | } |
851 | } |
852 | |
853 | template<unsigned parseFlags, typename InputStream, typename Handler> |
854 | void ParseNull(InputStream& is, Handler& handler) { |
855 | RAPIDJSON_ASSERT(is.Peek() == 'n'); |
856 | is.Take(); |
857 | |
858 | if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) { |
859 | if (RAPIDJSON_UNLIKELY(!handler.Null())) |
860 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
861 | } |
862 | else |
863 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); |
864 | } |
865 | |
866 | template<unsigned parseFlags, typename InputStream, typename Handler> |
867 | void ParseTrue(InputStream& is, Handler& handler) { |
868 | RAPIDJSON_ASSERT(is.Peek() == 't'); |
869 | is.Take(); |
870 | |
871 | if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) { |
872 | if (RAPIDJSON_UNLIKELY(!handler.Bool(true))) |
873 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
874 | } |
875 | else |
876 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); |
877 | } |
878 | |
879 | template<unsigned parseFlags, typename InputStream, typename Handler> |
880 | void ParseFalse(InputStream& is, Handler& handler) { |
881 | RAPIDJSON_ASSERT(is.Peek() == 'f'); |
882 | is.Take(); |
883 | |
884 | if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) { |
885 | if (RAPIDJSON_UNLIKELY(!handler.Bool(false))) |
886 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
887 | } |
888 | else |
889 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); |
890 | } |
891 | |
892 | template<typename InputStream> |
893 | RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) { |
894 | if (RAPIDJSON_LIKELY(is.Peek() == expect)) { |
895 | is.Take(); |
896 | return true; |
897 | } |
898 | else |
899 | return false; |
900 | } |
901 | |
902 | // Helper function to parse four hexadecimal digits in \uXXXX in ParseString(). |
903 | template<typename InputStream> |
904 | unsigned ParseHex4(InputStream& is, size_t escapeOffset) { |
905 | unsigned codepoint = 0; |
906 | for (int i = 0; i < 4; i++) { |
907 | Ch c = is.Peek(); |
908 | codepoint <<= 4; |
909 | codepoint += static_cast<unsigned>(c); |
910 | if (c >= '0' && c <= '9') |
911 | codepoint -= '0'; |
912 | else if (c >= 'A' && c <= 'F') |
913 | codepoint -= 'A' - 10; |
914 | else if (c >= 'a' && c <= 'f') |
915 | codepoint -= 'a' - 10; |
916 | else { |
917 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset); |
918 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0); |
919 | } |
920 | is.Take(); |
921 | } |
922 | return codepoint; |
923 | } |
924 | |
925 | template <typename CharType> |
926 | class StackStream { |
927 | public: |
928 | typedef CharType Ch; |
929 | |
930 | StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {} |
931 | RAPIDJSON_FORCEINLINE void Put(Ch c) { |
932 | *stack_.template Push<Ch>() = c; |
933 | ++length_; |
934 | } |
935 | |
936 | RAPIDJSON_FORCEINLINE void* Push(SizeType count) { |
937 | length_ += count; |
938 | return stack_.template Push<Ch>(count); |
939 | } |
940 | |
941 | size_t Length() const { return length_; } |
942 | |
943 | Ch* Pop() { |
944 | return stack_.template Pop<Ch>(length_); |
945 | } |
946 | |
947 | private: |
948 | StackStream(const StackStream&); |
949 | StackStream& operator=(const StackStream&); |
950 | |
951 | internal::Stack<StackAllocator>& stack_; |
952 | SizeType length_; |
953 | }; |
954 | |
955 | // Parse string and generate String event. Different code paths for kParseInsituFlag. |
956 | template<unsigned parseFlags, typename InputStream, typename Handler> |
957 | void ParseString(InputStream& is, Handler& handler, bool isKey = false) { |
958 | internal::StreamLocalCopy<InputStream> copy(is); |
959 | InputStream& s(copy.s); |
960 | |
961 | RAPIDJSON_ASSERT(s.Peek() == '\"'); |
962 | s.Take(); // Skip '\"' |
963 | |
964 | bool success = false; |
965 | if (parseFlags & kParseInsituFlag) { |
966 | typename InputStream::Ch *head = s.PutBegin(); |
967 | ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s); |
968 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
969 | size_t length = s.PutEnd(head) - 1; |
970 | RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); |
971 | const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head); |
972 | success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false)); |
973 | } |
974 | else { |
975 | StackStream<typename TargetEncoding::Ch> stackStream(stack_); |
976 | ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream); |
977 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
978 | SizeType length = static_cast<SizeType>(stackStream.Length()) - 1; |
979 | const typename TargetEncoding::Ch* const str = stackStream.Pop(); |
980 | success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true)); |
981 | } |
982 | if (RAPIDJSON_UNLIKELY(!success)) |
983 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell()); |
984 | } |
985 | |
986 | // Parse string to an output is |
987 | // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation. |
988 | template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream> |
989 | RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) { |
990 | //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN |
991 | #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
992 | static const char escape[256] = { |
993 | Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/', |
994 | Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, |
995 | 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, |
996 | 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
997 | Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 |
998 | }; |
999 | #undef Z16 |
1000 | //!@endcond |
1001 | |
1002 | for (;;) { |
1003 | // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation. |
1004 | if (!(parseFlags & kParseValidateEncodingFlag)) |
1005 | ScanCopyUnescapedString(is, os); |
1006 | |
1007 | Ch c = is.Peek(); |
1008 | if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape |
1009 | size_t escapeOffset = is.Tell(); // For invalid escaping, report the initial '\\' as error offset |
1010 | is.Take(); |
1011 | Ch e = is.Peek(); |
1012 | if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) { |
1013 | is.Take(); |
1014 | os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)])); |
1015 | } |
1016 | else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode |
1017 | is.Take(); |
1018 | unsigned codepoint = ParseHex4(is, escapeOffset); |
1019 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
1020 | if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) { |
1021 | // Handle UTF-16 surrogate pair |
1022 | if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) |
1023 | RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); |
1024 | unsigned codepoint2 = ParseHex4(is, escapeOffset); |
1025 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
1026 | if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)) |
1027 | RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); |
1028 | codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; |
1029 | } |
1030 | TEncoding::Encode(os, codepoint); |
1031 | } |
1032 | else |
1033 | RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset); |
1034 | } |
1035 | else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote |
1036 | is.Take(); |
1037 | os.Put('\0'); // null-terminate the string |
1038 | return; |
1039 | } |
1040 | else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF |
1041 | if (c == '\0') |
1042 | RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell()); |
1043 | else |
1044 | RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell()); |
1045 | } |
1046 | else { |
1047 | size_t offset = is.Tell(); |
1048 | if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ? |
1049 | !Transcoder<SEncoding, TEncoding>::Validate(is, os) : |
1050 | !Transcoder<SEncoding, TEncoding>::Transcode(is, os)))) |
1051 | RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset); |
1052 | } |
1053 | } |
1054 | } |
1055 | |
1056 | template<typename InputStream, typename OutputStream> |
1057 | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) { |
1058 | // Do nothing for generic version |
1059 | } |
1060 | |
1061 | #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) |
1062 | // StringStream -> StackStream<char> |
1063 | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) { |
1064 | const char* p = is.src_; |
1065 | |
1066 | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1067 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1068 | while (p != nextAligned) |
1069 | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1070 | is.src_ = p; |
1071 | return; |
1072 | } |
1073 | else |
1074 | os.Put(*p++); |
1075 | |
1076 | // The rest of string using SIMD |
1077 | static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; |
1078 | static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; |
1079 | static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; |
1080 | const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); |
1081 | const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); |
1082 | const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); |
1083 | |
1084 | for (;; p += 16) { |
1085 | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
1086 | const __m128i t1 = _mm_cmpeq_epi8(s, dq); |
1087 | const __m128i t2 = _mm_cmpeq_epi8(s, bs); |
1088 | const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F |
1089 | const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); |
1090 | unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); |
1091 | if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped |
1092 | SizeType length; |
1093 | #ifdef _MSC_VER // Find the index of first escaped |
1094 | unsigned long offset; |
1095 | _BitScanForward(&offset, r); |
1096 | length = offset; |
1097 | #else |
1098 | length = static_cast<SizeType>(__builtin_ffs(r) - 1); |
1099 | #endif |
1100 | if (length != 0) { |
1101 | char* q = reinterpret_cast<char*>(os.Push(length)); |
1102 | for (size_t i = 0; i < length; i++) |
1103 | q[i] = p[i]; |
1104 | |
1105 | p += length; |
1106 | } |
1107 | break; |
1108 | } |
1109 | _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s); |
1110 | } |
1111 | |
1112 | is.src_ = p; |
1113 | } |
1114 | |
1115 | // InsituStringStream -> InsituStringStream |
1116 | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { |
1117 | RAPIDJSON_ASSERT(&is == &os); |
1118 | (void)os; |
1119 | |
1120 | if (is.src_ == is.dst_) { |
1121 | SkipUnescapedString(is); |
1122 | return; |
1123 | } |
1124 | |
1125 | char* p = is.src_; |
1126 | char *q = is.dst_; |
1127 | |
1128 | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1129 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1130 | while (p != nextAligned) |
1131 | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1132 | is.src_ = p; |
1133 | is.dst_ = q; |
1134 | return; |
1135 | } |
1136 | else |
1137 | *q++ = *p++; |
1138 | |
1139 | // The rest of string using SIMD |
1140 | static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; |
1141 | static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; |
1142 | static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; |
1143 | const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); |
1144 | const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); |
1145 | const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); |
1146 | |
1147 | for (;; p += 16, q += 16) { |
1148 | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
1149 | const __m128i t1 = _mm_cmpeq_epi8(s, dq); |
1150 | const __m128i t2 = _mm_cmpeq_epi8(s, bs); |
1151 | const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F |
1152 | const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); |
1153 | unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); |
1154 | if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped |
1155 | size_t length; |
1156 | #ifdef _MSC_VER // Find the index of first escaped |
1157 | unsigned long offset; |
1158 | _BitScanForward(&offset, r); |
1159 | length = offset; |
1160 | #else |
1161 | length = static_cast<size_t>(__builtin_ffs(r) - 1); |
1162 | #endif |
1163 | for (const char* pend = p + length; p != pend; ) |
1164 | *q++ = *p++; |
1165 | break; |
1166 | } |
1167 | _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s); |
1168 | } |
1169 | |
1170 | is.src_ = p; |
1171 | is.dst_ = q; |
1172 | } |
1173 | |
1174 | // When read/write pointers are the same for insitu stream, just skip unescaped characters |
1175 | static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { |
1176 | RAPIDJSON_ASSERT(is.src_ == is.dst_); |
1177 | char* p = is.src_; |
1178 | |
1179 | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1180 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1181 | for (; p != nextAligned; p++) |
1182 | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1183 | is.src_ = is.dst_ = p; |
1184 | return; |
1185 | } |
1186 | |
1187 | // The rest of string using SIMD |
1188 | static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; |
1189 | static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; |
1190 | static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; |
1191 | const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); |
1192 | const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); |
1193 | const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); |
1194 | |
1195 | for (;; p += 16) { |
1196 | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
1197 | const __m128i t1 = _mm_cmpeq_epi8(s, dq); |
1198 | const __m128i t2 = _mm_cmpeq_epi8(s, bs); |
1199 | const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F |
1200 | const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); |
1201 | unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); |
1202 | if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped |
1203 | size_t length; |
1204 | #ifdef _MSC_VER // Find the index of first escaped |
1205 | unsigned long offset; |
1206 | _BitScanForward(&offset, r); |
1207 | length = offset; |
1208 | #else |
1209 | length = static_cast<size_t>(__builtin_ffs(r) - 1); |
1210 | #endif |
1211 | p += length; |
1212 | break; |
1213 | } |
1214 | } |
1215 | |
1216 | is.src_ = is.dst_ = p; |
1217 | } |
1218 | #elif defined(RAPIDJSON_NEON) |
1219 | // StringStream -> StackStream<char> |
1220 | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) { |
1221 | const char* p = is.src_; |
1222 | |
1223 | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1224 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1225 | while (p != nextAligned) |
1226 | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1227 | is.src_ = p; |
1228 | return; |
1229 | } |
1230 | else |
1231 | os.Put(*p++); |
1232 | |
1233 | // The rest of string using SIMD |
1234 | const uint8x16_t s0 = vmovq_n_u8('"'); |
1235 | const uint8x16_t s1 = vmovq_n_u8('\\'); |
1236 | const uint8x16_t s2 = vmovq_n_u8('\b'); |
1237 | const uint8x16_t s3 = vmovq_n_u8(32); |
1238 | |
1239 | for (;; p += 16) { |
1240 | const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p)); |
1241 | uint8x16_t x = vceqq_u8(s, s0); |
1242 | x = vorrq_u8(x, vceqq_u8(s, s1)); |
1243 | x = vorrq_u8(x, vceqq_u8(s, s2)); |
1244 | x = vorrq_u8(x, vcltq_u8(s, s3)); |
1245 | |
1246 | x = vrev64q_u8(x); // Rev in 64 |
1247 | uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract |
1248 | uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract |
1249 | |
1250 | SizeType length = 0; |
1251 | bool escaped = false; |
1252 | if (low == 0) { |
1253 | if (high != 0) { |
1254 | unsigned lz = (unsigned)__builtin_clzll(high);; |
1255 | length = 8 + (lz >> 3); |
1256 | escaped = true; |
1257 | } |
1258 | } else { |
1259 | unsigned lz = (unsigned)__builtin_clzll(low);; |
1260 | length = lz >> 3; |
1261 | escaped = true; |
1262 | } |
1263 | if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped |
1264 | if (length != 0) { |
1265 | char* q = reinterpret_cast<char*>(os.Push(length)); |
1266 | for (size_t i = 0; i < length; i++) |
1267 | q[i] = p[i]; |
1268 | |
1269 | p += length; |
1270 | } |
1271 | break; |
1272 | } |
1273 | vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s); |
1274 | } |
1275 | |
1276 | is.src_ = p; |
1277 | } |
1278 | |
1279 | // InsituStringStream -> InsituStringStream |
1280 | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { |
1281 | RAPIDJSON_ASSERT(&is == &os); |
1282 | (void)os; |
1283 | |
1284 | if (is.src_ == is.dst_) { |
1285 | SkipUnescapedString(is); |
1286 | return; |
1287 | } |
1288 | |
1289 | char* p = is.src_; |
1290 | char *q = is.dst_; |
1291 | |
1292 | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1293 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1294 | while (p != nextAligned) |
1295 | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1296 | is.src_ = p; |
1297 | is.dst_ = q; |
1298 | return; |
1299 | } |
1300 | else |
1301 | *q++ = *p++; |
1302 | |
1303 | // The rest of string using SIMD |
1304 | const uint8x16_t s0 = vmovq_n_u8('"'); |
1305 | const uint8x16_t s1 = vmovq_n_u8('\\'); |
1306 | const uint8x16_t s2 = vmovq_n_u8('\b'); |
1307 | const uint8x16_t s3 = vmovq_n_u8(32); |
1308 | |
1309 | for (;; p += 16, q += 16) { |
1310 | const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p)); |
1311 | uint8x16_t x = vceqq_u8(s, s0); |
1312 | x = vorrq_u8(x, vceqq_u8(s, s1)); |
1313 | x = vorrq_u8(x, vceqq_u8(s, s2)); |
1314 | x = vorrq_u8(x, vcltq_u8(s, s3)); |
1315 | |
1316 | x = vrev64q_u8(x); // Rev in 64 |
1317 | uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract |
1318 | uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract |
1319 | |
1320 | SizeType length = 0; |
1321 | bool escaped = false; |
1322 | if (low == 0) { |
1323 | if (high != 0) { |
1324 | unsigned lz = (unsigned)__builtin_clzll(high); |
1325 | length = 8 + (lz >> 3); |
1326 | escaped = true; |
1327 | } |
1328 | } else { |
1329 | unsigned lz = (unsigned)__builtin_clzll(low); |
1330 | length = lz >> 3; |
1331 | escaped = true; |
1332 | } |
1333 | if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped |
1334 | for (const char* pend = p + length; p != pend; ) { |
1335 | *q++ = *p++; |
1336 | } |
1337 | break; |
1338 | } |
1339 | vst1q_u8(reinterpret_cast<uint8_t *>(q), s); |
1340 | } |
1341 | |
1342 | is.src_ = p; |
1343 | is.dst_ = q; |
1344 | } |
1345 | |
1346 | // When read/write pointers are the same for insitu stream, just skip unescaped characters |
1347 | static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { |
1348 | RAPIDJSON_ASSERT(is.src_ == is.dst_); |
1349 | char* p = is.src_; |
1350 | |
1351 | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1352 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1353 | for (; p != nextAligned; p++) |
1354 | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1355 | is.src_ = is.dst_ = p; |
1356 | return; |
1357 | } |
1358 | |
1359 | // The rest of string using SIMD |
1360 | const uint8x16_t s0 = vmovq_n_u8('"'); |
1361 | const uint8x16_t s1 = vmovq_n_u8('\\'); |
1362 | const uint8x16_t s2 = vmovq_n_u8('\b'); |
1363 | const uint8x16_t s3 = vmovq_n_u8(32); |
1364 | |
1365 | for (;; p += 16) { |
1366 | const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p)); |
1367 | uint8x16_t x = vceqq_u8(s, s0); |
1368 | x = vorrq_u8(x, vceqq_u8(s, s1)); |
1369 | x = vorrq_u8(x, vceqq_u8(s, s2)); |
1370 | x = vorrq_u8(x, vcltq_u8(s, s3)); |
1371 | |
1372 | x = vrev64q_u8(x); // Rev in 64 |
1373 | uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract |
1374 | uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract |
1375 | |
1376 | if (low == 0) { |
1377 | if (high != 0) { |
1378 | int lz = __builtin_clzll(high); |
1379 | p += 8 + (lz >> 3); |
1380 | break; |
1381 | } |
1382 | } else { |
1383 | int lz = __builtin_clzll(low); |
1384 | p += lz >> 3; |
1385 | break; |
1386 | } |
1387 | } |
1388 | |
1389 | is.src_ = is.dst_ = p; |
1390 | } |
1391 | #endif // RAPIDJSON_NEON |
1392 | |
1393 | template<typename InputStream, bool backup, bool pushOnTake> |
1394 | class NumberStream; |
1395 | |
1396 | template<typename InputStream> |
1397 | class NumberStream<InputStream, false, false> { |
1398 | public: |
1399 | typedef typename InputStream::Ch Ch; |
1400 | |
1401 | NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; } |
1402 | |
1403 | RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); } |
1404 | RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); } |
1405 | RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); } |
1406 | RAPIDJSON_FORCEINLINE void Push(char) {} |
1407 | |
1408 | size_t Tell() { return is.Tell(); } |
1409 | size_t Length() { return 0; } |
1410 | const char* Pop() { return 0; } |
1411 | |
1412 | protected: |
1413 | NumberStream& operator=(const NumberStream&); |
1414 | |
1415 | InputStream& is; |
1416 | }; |
1417 | |
1418 | template<typename InputStream> |
1419 | class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> { |
1420 | typedef NumberStream<InputStream, false, false> Base; |
1421 | public: |
1422 | NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {} |
1423 | |
1424 | RAPIDJSON_FORCEINLINE Ch TakePush() { |
1425 | stackStream.Put(static_cast<char>(Base::is.Peek())); |
1426 | return Base::is.Take(); |
1427 | } |
1428 | |
1429 | RAPIDJSON_FORCEINLINE void Push(char c) { |
1430 | stackStream.Put(c); |
1431 | } |
1432 | |
1433 | size_t Length() { return stackStream.Length(); } |
1434 | |
1435 | const char* Pop() { |
1436 | stackStream.Put('\0'); |
1437 | return stackStream.Pop(); |
1438 | } |
1439 | |
1440 | private: |
1441 | StackStream<char> stackStream; |
1442 | }; |
1443 | |
1444 | template<typename InputStream> |
1445 | class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> { |
1446 | typedef NumberStream<InputStream, true, false> Base; |
1447 | public: |
1448 | NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {} |
1449 | |
1450 | RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); } |
1451 | }; |
1452 | |
1453 | template<unsigned parseFlags, typename InputStream, typename Handler> |
1454 | void ParseNumber(InputStream& is, Handler& handler) { |
1455 | internal::StreamLocalCopy<InputStream> copy(is); |
1456 | NumberStream<InputStream, |
1457 | ((parseFlags & kParseNumbersAsStringsFlag) != 0) ? |
1458 | ((parseFlags & kParseInsituFlag) == 0) : |
1459 | ((parseFlags & kParseFullPrecisionFlag) != 0), |
1460 | (parseFlags & kParseNumbersAsStringsFlag) != 0 && |
1461 | (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s); |
1462 | |
1463 | size_t startOffset = s.Tell(); |
1464 | double d = 0.0; |
1465 | bool useNanOrInf = false; |
1466 | |
1467 | // Parse minus |
1468 | bool minus = Consume(s, '-'); |
1469 | |
1470 | // Parse int: zero / ( digit1-9 *DIGIT ) |
1471 | unsigned i = 0; |
1472 | uint64_t i64 = 0; |
1473 | bool use64bit = false; |
1474 | int significandDigit = 0; |
1475 | if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) { |
1476 | i = 0; |
1477 | s.TakePush(); |
1478 | } |
1479 | else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) { |
1480 | i = static_cast<unsigned>(s.TakePush() - '0'); |
1481 | |
1482 | if (minus) |
1483 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1484 | if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648 |
1485 | if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) { |
1486 | i64 = i; |
1487 | use64bit = true; |
1488 | break; |
1489 | } |
1490 | } |
1491 | i = i * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1492 | significandDigit++; |
1493 | } |
1494 | else |
1495 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1496 | if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295 |
1497 | if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) { |
1498 | i64 = i; |
1499 | use64bit = true; |
1500 | break; |
1501 | } |
1502 | } |
1503 | i = i * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1504 | significandDigit++; |
1505 | } |
1506 | } |
1507 | // Parse NaN or Infinity here |
1508 | else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) { |
1509 | if (Consume(s, 'N')) { |
1510 | if (Consume(s, 'a') && Consume(s, 'N')) { |
1511 | d = std::numeric_limits<double>::quiet_NaN(); |
1512 | useNanOrInf = true; |
1513 | } |
1514 | } |
1515 | else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) { |
1516 | if (Consume(s, 'n') && Consume(s, 'f')) { |
1517 | d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity()); |
1518 | useNanOrInf = true; |
1519 | |
1520 | if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n') |
1521 | && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) { |
1522 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); |
1523 | } |
1524 | } |
1525 | } |
1526 | |
1527 | if (RAPIDJSON_UNLIKELY(!useNanOrInf)) { |
1528 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); |
1529 | } |
1530 | } |
1531 | else |
1532 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); |
1533 | |
1534 | // Parse 64bit int |
1535 | bool useDouble = false; |
1536 | if (use64bit) { |
1537 | if (minus) |
1538 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1539 | if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808 |
1540 | if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) { |
1541 | d = static_cast<double>(i64); |
1542 | useDouble = true; |
1543 | break; |
1544 | } |
1545 | i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1546 | significandDigit++; |
1547 | } |
1548 | else |
1549 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1550 | if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615 |
1551 | if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) { |
1552 | d = static_cast<double>(i64); |
1553 | useDouble = true; |
1554 | break; |
1555 | } |
1556 | i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1557 | significandDigit++; |
1558 | } |
1559 | } |
1560 | |
1561 | // Force double for big integer |
1562 | if (useDouble) { |
1563 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1564 | d = d * 10 + (s.TakePush() - '0'); |
1565 | } |
1566 | } |
1567 | |
1568 | // Parse frac = decimal-point 1*DIGIT |
1569 | int expFrac = 0; |
1570 | size_t decimalPosition; |
1571 | if (Consume(s, '.')) { |
1572 | decimalPosition = s.Length(); |
1573 | |
1574 | if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9'))) |
1575 | RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell()); |
1576 | |
1577 | if (!useDouble) { |
1578 | #if RAPIDJSON_64BIT |
1579 | // Use i64 to store significand in 64-bit architecture |
1580 | if (!use64bit) |
1581 | i64 = i; |
1582 | |
1583 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1584 | if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path |
1585 | break; |
1586 | else { |
1587 | i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1588 | --expFrac; |
1589 | if (i64 != 0) |
1590 | significandDigit++; |
1591 | } |
1592 | } |
1593 | |
1594 | d = static_cast<double>(i64); |
1595 | #else |
1596 | // Use double to store significand in 32-bit architecture |
1597 | d = static_cast<double>(use64bit ? i64 : i); |
1598 | #endif |
1599 | useDouble = true; |
1600 | } |
1601 | |
1602 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1603 | if (significandDigit < 17) { |
1604 | d = d * 10.0 + (s.TakePush() - '0'); |
1605 | --expFrac; |
1606 | if (RAPIDJSON_LIKELY(d > 0.0)) |
1607 | significandDigit++; |
1608 | } |
1609 | else |
1610 | s.TakePush(); |
1611 | } |
1612 | } |
1613 | else |
1614 | decimalPosition = s.Length(); // decimal position at the end of integer. |
1615 | |
1616 | // Parse exp = e [ minus / plus ] 1*DIGIT |
1617 | int exp = 0; |
1618 | if (Consume(s, 'e') || Consume(s, 'E')) { |
1619 | if (!useDouble) { |
1620 | d = static_cast<double>(use64bit ? i64 : i); |
1621 | useDouble = true; |
1622 | } |
1623 | |
1624 | bool expMinus = false; |
1625 | if (Consume(s, '+')) |
1626 | ; |
1627 | else if (Consume(s, '-')) |
1628 | expMinus = true; |
1629 | |
1630 | if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1631 | exp = static_cast<int>(s.Take() - '0'); |
1632 | if (expMinus) { |
1633 | // (exp + expFrac) must not underflow int => we're detecting when -exp gets |
1634 | // dangerously close to INT_MIN (a pessimistic next digit 9 would push it into |
1635 | // underflow territory): |
1636 | // |
1637 | // -(exp * 10 + 9) + expFrac >= INT_MIN |
1638 | // <=> exp <= (expFrac - INT_MIN - 9) / 10 |
1639 | RAPIDJSON_ASSERT(expFrac <= 0); |
1640 | int maxExp = (expFrac + 2147483639) / 10; |
1641 | |
1642 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1643 | exp = exp * 10 + static_cast<int>(s.Take() - '0'); |
1644 | if (RAPIDJSON_UNLIKELY(exp > maxExp)) { |
1645 | while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent |
1646 | s.Take(); |
1647 | } |
1648 | } |
1649 | } |
1650 | else { // positive exp |
1651 | int maxExp = 308 - expFrac; |
1652 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1653 | exp = exp * 10 + static_cast<int>(s.Take() - '0'); |
1654 | if (RAPIDJSON_UNLIKELY(exp > maxExp)) |
1655 | RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); |
1656 | } |
1657 | } |
1658 | } |
1659 | else |
1660 | RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell()); |
1661 | |
1662 | if (expMinus) |
1663 | exp = -exp; |
1664 | } |
1665 | |
1666 | // Finish parsing, call event according to the type of number. |
1667 | bool cont = true; |
1668 | |
1669 | if (parseFlags & kParseNumbersAsStringsFlag) { |
1670 | if (parseFlags & kParseInsituFlag) { |
1671 | s.Pop(); // Pop stack no matter if it will be used or not. |
1672 | typename InputStream::Ch* head = is.PutBegin(); |
1673 | const size_t length = s.Tell() - startOffset; |
1674 | RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); |
1675 | // unable to insert the \0 character here, it will erase the comma after this number |
1676 | const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head); |
1677 | cont = handler.RawNumber(str, SizeType(length), false); |
1678 | } |
1679 | else { |
1680 | SizeType numCharsToCopy = static_cast<SizeType>(s.Length()); |
1681 | StringStream srcStream(s.Pop()); |
1682 | StackStream<typename TargetEncoding::Ch> dstStream(stack_); |
1683 | while (numCharsToCopy--) { |
1684 | Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream); |
1685 | } |
1686 | dstStream.Put('\0'); |
1687 | const typename TargetEncoding::Ch* str = dstStream.Pop(); |
1688 | const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1; |
1689 | cont = handler.RawNumber(str, SizeType(length), true); |
1690 | } |
1691 | } |
1692 | else { |
1693 | size_t length = s.Length(); |
1694 | const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not. |
1695 | |
1696 | if (useDouble) { |
1697 | int p = exp + expFrac; |
1698 | if (parseFlags & kParseFullPrecisionFlag) |
1699 | d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp); |
1700 | else |
1701 | d = internal::StrtodNormalPrecision(d, p); |
1702 | |
1703 | // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal |
1704 | if (d > (std::numeric_limits<double>::max)()) { |
1705 | // Overflow |
1706 | // TODO: internal::StrtodX should report overflow (or underflow) |
1707 | RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); |
1708 | } |
1709 | |
1710 | cont = handler.Double(minus ? -d : d); |
1711 | } |
1712 | else if (useNanOrInf) { |
1713 | cont = handler.Double(d); |
1714 | } |
1715 | else { |
1716 | if (use64bit) { |
1717 | if (minus) |
1718 | cont = handler.Int64(static_cast<int64_t>(~i64 + 1)); |
1719 | else |
1720 | cont = handler.Uint64(i64); |
1721 | } |
1722 | else { |
1723 | if (minus) |
1724 | cont = handler.Int(static_cast<int32_t>(~i + 1)); |
1725 | else |
1726 | cont = handler.Uint(i); |
1727 | } |
1728 | } |
1729 | } |
1730 | if (RAPIDJSON_UNLIKELY(!cont)) |
1731 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset); |
1732 | } |
1733 | |
1734 | // Parse any JSON value |
1735 | template<unsigned parseFlags, typename InputStream, typename Handler> |
1736 | void ParseValue(InputStream& is, Handler& handler) { |
1737 | switch (is.Peek()) { |
1738 | case 'n': ParseNull <parseFlags>(is, handler); break; |
1739 | case 't': ParseTrue <parseFlags>(is, handler); break; |
1740 | case 'f': ParseFalse <parseFlags>(is, handler); break; |
1741 | case '"': ParseString<parseFlags>(is, handler); break; |
1742 | case '{': ParseObject<parseFlags>(is, handler); break; |
1743 | case '[': ParseArray <parseFlags>(is, handler); break; |
1744 | default : |
1745 | ParseNumber<parseFlags>(is, handler); |
1746 | break; |
1747 | |
1748 | } |
1749 | } |
1750 | |
1751 | // Iterative Parsing |
1752 | |
1753 | // States |
1754 | enum IterativeParsingState { |
1755 | IterativeParsingFinishState = 0, // sink states at top |
1756 | IterativeParsingErrorState, // sink states at top |
1757 | IterativeParsingStartState, |
1758 | |
1759 | // Object states |
1760 | IterativeParsingObjectInitialState, |
1761 | IterativeParsingMemberKeyState, |
1762 | IterativeParsingMemberValueState, |
1763 | IterativeParsingObjectFinishState, |
1764 | |
1765 | // Array states |
1766 | IterativeParsingArrayInitialState, |
1767 | IterativeParsingElementState, |
1768 | IterativeParsingArrayFinishState, |
1769 | |
1770 | // Single value state |
1771 | IterativeParsingValueState, |
1772 | |
1773 | // Delimiter states (at bottom) |
1774 | IterativeParsingElementDelimiterState, |
1775 | IterativeParsingMemberDelimiterState, |
1776 | IterativeParsingKeyValueDelimiterState, |
1777 | |
1778 | cIterativeParsingStateCount |
1779 | }; |
1780 | |
1781 | // Tokens |
1782 | enum Token { |
1783 | LeftBracketToken = 0, |
1784 | RightBracketToken, |
1785 | |
1786 | LeftCurlyBracketToken, |
1787 | RightCurlyBracketToken, |
1788 | |
1789 | CommaToken, |
1790 | ColonToken, |
1791 | |
1792 | StringToken, |
1793 | FalseToken, |
1794 | TrueToken, |
1795 | NullToken, |
1796 | NumberToken, |
1797 | |
1798 | kTokenCount |
1799 | }; |
1800 | |
1801 | RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const { |
1802 | |
1803 | //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN |
1804 | #define N NumberToken |
1805 | #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N |
1806 | // Maps from ASCII to Token |
1807 | static const unsigned char tokenMap[256] = { |
1808 | N16, // 00~0F |
1809 | N16, // 10~1F |
1810 | N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F |
1811 | N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F |
1812 | N16, // 40~4F |
1813 | N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F |
1814 | N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F |
1815 | N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F |
1816 | N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF |
1817 | }; |
1818 | #undef N |
1819 | #undef N16 |
1820 | //!@endcond |
1821 | |
1822 | if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) |
1823 | return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]); |
1824 | else |
1825 | return NumberToken; |
1826 | } |
1827 | |
1828 | RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) const { |
1829 | // current state x one lookahead token -> new state |
1830 | static const char G[cIterativeParsingStateCount][kTokenCount] = { |
1831 | // Finish(sink state) |
1832 | { |
1833 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1834 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1835 | IterativeParsingErrorState |
1836 | }, |
1837 | // Error(sink state) |
1838 | { |
1839 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1840 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1841 | IterativeParsingErrorState |
1842 | }, |
1843 | // Start |
1844 | { |
1845 | IterativeParsingArrayInitialState, // Left bracket |
1846 | IterativeParsingErrorState, // Right bracket |
1847 | IterativeParsingObjectInitialState, // Left curly bracket |
1848 | IterativeParsingErrorState, // Right curly bracket |
1849 | IterativeParsingErrorState, // Comma |
1850 | IterativeParsingErrorState, // Colon |
1851 | IterativeParsingValueState, // String |
1852 | IterativeParsingValueState, // False |
1853 | IterativeParsingValueState, // True |
1854 | IterativeParsingValueState, // Null |
1855 | IterativeParsingValueState // Number |
1856 | }, |
1857 | // ObjectInitial |
1858 | { |
1859 | IterativeParsingErrorState, // Left bracket |
1860 | IterativeParsingErrorState, // Right bracket |
1861 | IterativeParsingErrorState, // Left curly bracket |
1862 | IterativeParsingObjectFinishState, // Right curly bracket |
1863 | IterativeParsingErrorState, // Comma |
1864 | IterativeParsingErrorState, // Colon |
1865 | IterativeParsingMemberKeyState, // String |
1866 | IterativeParsingErrorState, // False |
1867 | IterativeParsingErrorState, // True |
1868 | IterativeParsingErrorState, // Null |
1869 | IterativeParsingErrorState // Number |
1870 | }, |
1871 | // MemberKey |
1872 | { |
1873 | IterativeParsingErrorState, // Left bracket |
1874 | IterativeParsingErrorState, // Right bracket |
1875 | IterativeParsingErrorState, // Left curly bracket |
1876 | IterativeParsingErrorState, // Right curly bracket |
1877 | IterativeParsingErrorState, // Comma |
1878 | IterativeParsingKeyValueDelimiterState, // Colon |
1879 | IterativeParsingErrorState, // String |
1880 | IterativeParsingErrorState, // False |
1881 | IterativeParsingErrorState, // True |
1882 | IterativeParsingErrorState, // Null |
1883 | IterativeParsingErrorState // Number |
1884 | }, |
1885 | // MemberValue |
1886 | { |
1887 | IterativeParsingErrorState, // Left bracket |
1888 | IterativeParsingErrorState, // Right bracket |
1889 | IterativeParsingErrorState, // Left curly bracket |
1890 | IterativeParsingObjectFinishState, // Right curly bracket |
1891 | IterativeParsingMemberDelimiterState, // Comma |
1892 | IterativeParsingErrorState, // Colon |
1893 | IterativeParsingErrorState, // String |
1894 | IterativeParsingErrorState, // False |
1895 | IterativeParsingErrorState, // True |
1896 | IterativeParsingErrorState, // Null |
1897 | IterativeParsingErrorState // Number |
1898 | }, |
1899 | // ObjectFinish(sink state) |
1900 | { |
1901 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1902 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1903 | IterativeParsingErrorState |
1904 | }, |
1905 | // ArrayInitial |
1906 | { |
1907 | IterativeParsingArrayInitialState, // Left bracket(push Element state) |
1908 | IterativeParsingArrayFinishState, // Right bracket |
1909 | IterativeParsingObjectInitialState, // Left curly bracket(push Element state) |
1910 | IterativeParsingErrorState, // Right curly bracket |
1911 | IterativeParsingErrorState, // Comma |
1912 | IterativeParsingErrorState, // Colon |
1913 | IterativeParsingElementState, // String |
1914 | IterativeParsingElementState, // False |
1915 | IterativeParsingElementState, // True |
1916 | IterativeParsingElementState, // Null |
1917 | IterativeParsingElementState // Number |
1918 | }, |
1919 | // Element |
1920 | { |
1921 | IterativeParsingErrorState, // Left bracket |
1922 | IterativeParsingArrayFinishState, // Right bracket |
1923 | IterativeParsingErrorState, // Left curly bracket |
1924 | IterativeParsingErrorState, // Right curly bracket |
1925 | IterativeParsingElementDelimiterState, // Comma |
1926 | IterativeParsingErrorState, // Colon |
1927 | IterativeParsingErrorState, // String |
1928 | IterativeParsingErrorState, // False |
1929 | IterativeParsingErrorState, // True |
1930 | IterativeParsingErrorState, // Null |
1931 | IterativeParsingErrorState // Number |
1932 | }, |
1933 | // ArrayFinish(sink state) |
1934 | { |
1935 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1936 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1937 | IterativeParsingErrorState |
1938 | }, |
1939 | // Single Value (sink state) |
1940 | { |
1941 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1942 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1943 | IterativeParsingErrorState |
1944 | }, |
1945 | // ElementDelimiter |
1946 | { |
1947 | IterativeParsingArrayInitialState, // Left bracket(push Element state) |
1948 | IterativeParsingArrayFinishState, // Right bracket |
1949 | IterativeParsingObjectInitialState, // Left curly bracket(push Element state) |
1950 | IterativeParsingErrorState, // Right curly bracket |
1951 | IterativeParsingErrorState, // Comma |
1952 | IterativeParsingErrorState, // Colon |
1953 | IterativeParsingElementState, // String |
1954 | IterativeParsingElementState, // False |
1955 | IterativeParsingElementState, // True |
1956 | IterativeParsingElementState, // Null |
1957 | IterativeParsingElementState // Number |
1958 | }, |
1959 | // MemberDelimiter |
1960 | { |
1961 | IterativeParsingErrorState, // Left bracket |
1962 | IterativeParsingErrorState, // Right bracket |
1963 | IterativeParsingErrorState, // Left curly bracket |
1964 | IterativeParsingObjectFinishState, // Right curly bracket |
1965 | IterativeParsingErrorState, // Comma |
1966 | IterativeParsingErrorState, // Colon |
1967 | IterativeParsingMemberKeyState, // String |
1968 | IterativeParsingErrorState, // False |
1969 | IterativeParsingErrorState, // True |
1970 | IterativeParsingErrorState, // Null |
1971 | IterativeParsingErrorState // Number |
1972 | }, |
1973 | // KeyValueDelimiter |
1974 | { |
1975 | IterativeParsingArrayInitialState, // Left bracket(push MemberValue state) |
1976 | IterativeParsingErrorState, // Right bracket |
1977 | IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state) |
1978 | IterativeParsingErrorState, // Right curly bracket |
1979 | IterativeParsingErrorState, // Comma |
1980 | IterativeParsingErrorState, // Colon |
1981 | IterativeParsingMemberValueState, // String |
1982 | IterativeParsingMemberValueState, // False |
1983 | IterativeParsingMemberValueState, // True |
1984 | IterativeParsingMemberValueState, // Null |
1985 | IterativeParsingMemberValueState // Number |
1986 | }, |
1987 | }; // End of G |
1988 | |
1989 | return static_cast<IterativeParsingState>(G[state][token]); |
1990 | } |
1991 | |
1992 | // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). |
1993 | // May return a new state on state pop. |
1994 | template <unsigned parseFlags, typename InputStream, typename Handler> |
1995 | RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) { |
1996 | (void)token; |
1997 | |
1998 | switch (dst) { |
1999 | case IterativeParsingErrorState: |
2000 | return dst; |
2001 | |
2002 | case IterativeParsingObjectInitialState: |
2003 | case IterativeParsingArrayInitialState: |
2004 | { |
2005 | // Push the state(Element or MemeberValue) if we are nested in another array or value of member. |
2006 | // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop. |
2007 | IterativeParsingState n = src; |
2008 | if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState) |
2009 | n = IterativeParsingElementState; |
2010 | else if (src == IterativeParsingKeyValueDelimiterState) |
2011 | n = IterativeParsingMemberValueState; |
2012 | // Push current state. |
2013 | *stack_.template Push<SizeType>(1) = n; |
2014 | // Initialize and push the member/element count. |
2015 | *stack_.template Push<SizeType>(1) = 0; |
2016 | // Call handler |
2017 | bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray(); |
2018 | // On handler short circuits the parsing. |
2019 | if (!hr) { |
2020 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); |
2021 | return IterativeParsingErrorState; |
2022 | } |
2023 | else { |
2024 | is.Take(); |
2025 | return dst; |
2026 | } |
2027 | } |
2028 | |
2029 | case IterativeParsingMemberKeyState: |
2030 | ParseString<parseFlags>(is, handler, true); |
2031 | if (HasParseError()) |
2032 | return IterativeParsingErrorState; |
2033 | else |
2034 | return dst; |
2035 | |
2036 | case IterativeParsingKeyValueDelimiterState: |
2037 | RAPIDJSON_ASSERT(token == ColonToken); |
2038 | is.Take(); |
2039 | return dst; |
2040 | |
2041 | case IterativeParsingMemberValueState: |
2042 | // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. |
2043 | ParseValue<parseFlags>(is, handler); |
2044 | if (HasParseError()) { |
2045 | return IterativeParsingErrorState; |
2046 | } |
2047 | return dst; |
2048 | |
2049 | case IterativeParsingElementState: |
2050 | // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. |
2051 | ParseValue<parseFlags>(is, handler); |
2052 | if (HasParseError()) { |
2053 | return IterativeParsingErrorState; |
2054 | } |
2055 | return dst; |
2056 | |
2057 | case IterativeParsingMemberDelimiterState: |
2058 | case IterativeParsingElementDelimiterState: |
2059 | is.Take(); |
2060 | // Update member/element count. |
2061 | *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1; |
2062 | return dst; |
2063 | |
2064 | case IterativeParsingObjectFinishState: |
2065 | { |
2066 | // Transit from delimiter is only allowed when trailing commas are enabled |
2067 | if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) { |
2068 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); |
2069 | return IterativeParsingErrorState; |
2070 | } |
2071 | // Get member count. |
2072 | SizeType c = *stack_.template Pop<SizeType>(1); |
2073 | // If the object is not empty, count the last member. |
2074 | if (src == IterativeParsingMemberValueState) |
2075 | ++c; |
2076 | // Restore the state. |
2077 | IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1)); |
2078 | // Transit to Finish state if this is the topmost scope. |
2079 | if (n == IterativeParsingStartState) |
2080 | n = IterativeParsingFinishState; |
2081 | // Call handler |
2082 | bool hr = handler.EndObject(c); |
2083 | // On handler short circuits the parsing. |
2084 | if (!hr) { |
2085 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); |
2086 | return IterativeParsingErrorState; |
2087 | } |
2088 | else { |
2089 | is.Take(); |
2090 | return n; |
2091 | } |
2092 | } |
2093 | |
2094 | case IterativeParsingArrayFinishState: |
2095 | { |
2096 | // Transit from delimiter is only allowed when trailing commas are enabled |
2097 | if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) { |
2098 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell()); |
2099 | return IterativeParsingErrorState; |
2100 | } |
2101 | // Get element count. |
2102 | SizeType c = *stack_.template Pop<SizeType>(1); |
2103 | // If the array is not empty, count the last element. |
2104 | if (src == IterativeParsingElementState) |
2105 | ++c; |
2106 | // Restore the state. |
2107 | IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1)); |
2108 | // Transit to Finish state if this is the topmost scope. |
2109 | if (n == IterativeParsingStartState) |
2110 | n = IterativeParsingFinishState; |
2111 | // Call handler |
2112 | bool hr = handler.EndArray(c); |
2113 | // On handler short circuits the parsing. |
2114 | if (!hr) { |
2115 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); |
2116 | return IterativeParsingErrorState; |
2117 | } |
2118 | else { |
2119 | is.Take(); |
2120 | return n; |
2121 | } |
2122 | } |
2123 | |
2124 | default: |
2125 | // This branch is for IterativeParsingValueState actually. |
2126 | // Use `default:` rather than |
2127 | // `case IterativeParsingValueState:` is for code coverage. |
2128 | |
2129 | // The IterativeParsingStartState is not enumerated in this switch-case. |
2130 | // It is impossible for that case. And it can be caught by following assertion. |
2131 | |
2132 | // The IterativeParsingFinishState is not enumerated in this switch-case either. |
2133 | // It is a "derivative" state which cannot triggered from Predict() directly. |
2134 | // Therefore it cannot happen here. And it can be caught by following assertion. |
2135 | RAPIDJSON_ASSERT(dst == IterativeParsingValueState); |
2136 | |
2137 | // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. |
2138 | ParseValue<parseFlags>(is, handler); |
2139 | if (HasParseError()) { |
2140 | return IterativeParsingErrorState; |
2141 | } |
2142 | return IterativeParsingFinishState; |
2143 | } |
2144 | } |
2145 | |
2146 | template <typename InputStream> |
2147 | void HandleError(IterativeParsingState src, InputStream& is) { |
2148 | if (HasParseError()) { |
2149 | // Error flag has been set. |
2150 | return; |
2151 | } |
2152 | |
2153 | switch (src) { |
2154 | case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return; |
2155 | case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return; |
2156 | case IterativeParsingObjectInitialState: |
2157 | case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return; |
2158 | case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return; |
2159 | case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return; |
2160 | case IterativeParsingKeyValueDelimiterState: |
2161 | case IterativeParsingArrayInitialState: |
2162 | case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return; |
2163 | default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return; |
2164 | } |
2165 | } |
2166 | |
2167 | RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const { |
2168 | return s >= IterativeParsingElementDelimiterState; |
2169 | } |
2170 | |
2171 | RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const { |
2172 | return s <= IterativeParsingErrorState; |
2173 | } |
2174 | |
2175 | template <unsigned parseFlags, typename InputStream, typename Handler> |
2176 | ParseResult IterativeParse(InputStream& is, Handler& handler) { |
2177 | parseResult_.Clear(); |
2178 | ClearStackOnExit scope(*this); |
2179 | IterativeParsingState state = IterativeParsingStartState; |
2180 | |
2181 | SkipWhitespaceAndComments<parseFlags>(is); |
2182 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
2183 | while (is.Peek() != '\0') { |
2184 | Token t = Tokenize(is.Peek()); |
2185 | IterativeParsingState n = Predict(state, t); |
2186 | IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler); |
2187 | |
2188 | if (d == IterativeParsingErrorState) { |
2189 | HandleError(state, is); |
2190 | break; |
2191 | } |
2192 | |
2193 | state = d; |
2194 | |
2195 | // Do not further consume streams if a root JSON has been parsed. |
2196 | if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState) |
2197 | break; |
2198 | |
2199 | SkipWhitespaceAndComments<parseFlags>(is); |
2200 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
2201 | } |
2202 | |
2203 | // Handle the end of file. |
2204 | if (state != IterativeParsingFinishState) |
2205 | HandleError(state, is); |
2206 | |
2207 | return parseResult_; |
2208 | } |
2209 | |
2210 | static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. |
2211 | internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. |
2212 | ParseResult parseResult_; |
2213 | IterativeParsingState state_; |
2214 | }; // class GenericReader |
2215 | |
2216 | //! Reader with UTF8 encoding and default allocator. |
2217 | typedef GenericReader<UTF8<>, UTF8<> > Reader; |
2218 | |
2219 | RAPIDJSON_NAMESPACE_END |
2220 | |
2221 | #if defined(__clang__) || defined(_MSC_VER) |
2222 | RAPIDJSON_DIAG_POP |
2223 | #endif |
2224 | |
2225 | |
2226 | #ifdef __GNUC__ |
2227 | RAPIDJSON_DIAG_POP |
2228 | #endif |
2229 | |
2230 | #endif // RAPIDJSON_READER_H_ |
2231 | |