1 | // Tencent is pleased to support the open source community by making RapidJSON available. |
2 | // |
3 | // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. |
4 | // |
5 | // Licensed under the MIT License (the "License"); you may not use this file except |
6 | // in compliance with the License. You may obtain a copy of the License at |
7 | // |
8 | // http://opensource.org/licenses/MIT |
9 | // |
10 | // Unless required by applicable law or agreed to in writing, software distributed |
11 | // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR |
12 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the |
13 | // specific language governing permissions and limitations under the License. |
14 | |
15 | #ifndef RAPIDJSON_READER_H_ |
16 | #define RAPIDJSON_READER_H_ |
17 | |
18 | /*! \file reader.h */ |
19 | |
20 | #include "allocators.h" |
21 | #include "stream.h" |
22 | #include "encodedstream.h" |
23 | #include "internal/meta.h" |
24 | #include "internal/stack.h" |
25 | #include "internal/strtod.h" |
26 | #include <limits> |
27 | |
28 | #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER) |
29 | #include <intrin.h> |
30 | #pragma intrinsic(_BitScanForward) |
31 | #endif |
32 | #ifdef RAPIDJSON_SSE42 |
33 | #include <nmmintrin.h> |
34 | #elif defined(RAPIDJSON_SSE2) |
35 | #include <emmintrin.h> |
36 | #endif |
37 | |
38 | #ifdef _MSC_VER |
39 | RAPIDJSON_DIAG_PUSH |
40 | RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant |
41 | RAPIDJSON_DIAG_OFF(4702) // unreachable code |
42 | #endif |
43 | |
44 | #ifdef __clang__ |
45 | RAPIDJSON_DIAG_PUSH |
46 | RAPIDJSON_DIAG_OFF(old-style-cast) |
47 | RAPIDJSON_DIAG_OFF(padded) |
48 | RAPIDJSON_DIAG_OFF(switch-enum) |
49 | #endif |
50 | |
51 | #ifdef __GNUC__ |
52 | RAPIDJSON_DIAG_PUSH |
53 | RAPIDJSON_DIAG_OFF(effc++) |
54 | #endif |
55 | |
56 | //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN |
57 | #define RAPIDJSON_NOTHING /* deliberately empty */ |
58 | #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN |
59 | #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \ |
60 | RAPIDJSON_MULTILINEMACRO_BEGIN \ |
61 | if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \ |
62 | RAPIDJSON_MULTILINEMACRO_END |
63 | #endif |
64 | #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \ |
65 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING) |
66 | //!@endcond |
67 | |
68 | /*! \def RAPIDJSON_PARSE_ERROR_NORETURN |
69 | \ingroup RAPIDJSON_ERRORS |
70 | \brief Macro to indicate a parse error. |
71 | \param parseErrorCode \ref rapidjson::ParseErrorCode of the error |
72 | \param offset position of the error in JSON input (\c size_t) |
73 | |
74 | This macros can be used as a customization point for the internal |
75 | error handling mechanism of RapidJSON. |
76 | |
77 | A common usage model is to throw an exception instead of requiring the |
78 | caller to explicitly check the \ref rapidjson::GenericReader::Parse's |
79 | return value: |
80 | |
81 | \code |
82 | #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \ |
83 | throw ParseException(parseErrorCode, #parseErrorCode, offset) |
84 | |
85 | #include <stdexcept> // std::runtime_error |
86 | #include "rapidjson/error/error.h" // rapidjson::ParseResult |
87 | |
88 | struct ParseException : std::runtime_error, rapidjson::ParseResult { |
89 | ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset) |
90 | : std::runtime_error(msg), ParseResult(code, offset) {} |
91 | }; |
92 | |
93 | #include "rapidjson/reader.h" |
94 | \endcode |
95 | |
96 | \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse |
97 | */ |
98 | #ifndef RAPIDJSON_PARSE_ERROR_NORETURN |
99 | #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \ |
100 | RAPIDJSON_MULTILINEMACRO_BEGIN \ |
101 | RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \ |
102 | SetParseError(parseErrorCode, offset); \ |
103 | RAPIDJSON_MULTILINEMACRO_END |
104 | #endif |
105 | |
106 | /*! \def RAPIDJSON_PARSE_ERROR |
107 | \ingroup RAPIDJSON_ERRORS |
108 | \brief (Internal) macro to indicate and handle a parse error. |
109 | \param parseErrorCode \ref rapidjson::ParseErrorCode of the error |
110 | \param offset position of the error in JSON input (\c size_t) |
111 | |
112 | Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing. |
113 | |
114 | \see RAPIDJSON_PARSE_ERROR_NORETURN |
115 | \hideinitializer |
116 | */ |
117 | #ifndef RAPIDJSON_PARSE_ERROR |
118 | #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \ |
119 | RAPIDJSON_MULTILINEMACRO_BEGIN \ |
120 | RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \ |
121 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \ |
122 | RAPIDJSON_MULTILINEMACRO_END |
123 | #endif |
124 | |
125 | #include "error/error.h" // ParseErrorCode, ParseResult |
126 | |
127 | RAPIDJSON_NAMESPACE_BEGIN |
128 | |
129 | /////////////////////////////////////////////////////////////////////////////// |
130 | // ParseFlag |
131 | |
132 | /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS |
133 | \ingroup RAPIDJSON_CONFIG |
134 | \brief User-defined kParseDefaultFlags definition. |
135 | |
136 | User can define this as any \c ParseFlag combinations. |
137 | */ |
138 | #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS |
139 | #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags |
140 | #endif |
141 | |
142 | //! Combination of parseFlags |
143 | /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream |
144 | */ |
145 | enum ParseFlag { |
146 | kParseNoFlags = 0, //!< No flags are set. |
147 | kParseInsituFlag = 1, //!< In-situ(destructive) parsing. |
148 | kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings. |
149 | kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing. |
150 | kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error. |
151 | kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower). |
152 | = 32, //!< Allow one-line (//) and multi-line (/**/) comments. |
153 | kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings. |
154 | kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays. |
155 | kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles. |
156 | kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS |
157 | }; |
158 | |
159 | /////////////////////////////////////////////////////////////////////////////// |
160 | // Handler |
161 | |
162 | /*! \class rapidjson::Handler |
163 | \brief Concept for receiving events from GenericReader upon parsing. |
164 | The functions return true if no error occurs. If they return false, |
165 | the event publisher should terminate the process. |
166 | \code |
167 | concept Handler { |
168 | typename Ch; |
169 | |
170 | bool Null(); |
171 | bool Bool(bool b); |
172 | bool Int(int i); |
173 | bool Uint(unsigned i); |
174 | bool Int64(int64_t i); |
175 | bool Uint64(uint64_t i); |
176 | bool Double(double d); |
177 | /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) |
178 | bool RawNumber(const Ch* str, SizeType length, bool copy); |
179 | bool String(const Ch* str, SizeType length, bool copy); |
180 | bool StartObject(); |
181 | bool Key(const Ch* str, SizeType length, bool copy); |
182 | bool EndObject(SizeType memberCount); |
183 | bool StartArray(); |
184 | bool EndArray(SizeType elementCount); |
185 | }; |
186 | \endcode |
187 | */ |
188 | /////////////////////////////////////////////////////////////////////////////// |
189 | // BaseReaderHandler |
190 | |
191 | //! Default implementation of Handler. |
192 | /*! This can be used as base class of any reader handler. |
193 | \note implements Handler concept |
194 | */ |
195 | template<typename Encoding = UTF8<>, typename Derived = void> |
196 | struct BaseReaderHandler { |
197 | typedef typename Encoding::Ch Ch; |
198 | |
199 | typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override; |
200 | |
201 | bool Default() { return true; } |
202 | bool Null() { return static_cast<Override&>(*this).Default(); } |
203 | bool Bool(bool) { return static_cast<Override&>(*this).Default(); } |
204 | bool Int(int) { return static_cast<Override&>(*this).Default(); } |
205 | bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); } |
206 | bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); } |
207 | bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); } |
208 | bool Double(double) { return static_cast<Override&>(*this).Default(); } |
209 | /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) |
210 | bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); } |
211 | bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); } |
212 | bool StartObject() { return static_cast<Override&>(*this).Default(); } |
213 | bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); } |
214 | bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); } |
215 | bool StartArray() { return static_cast<Override&>(*this).Default(); } |
216 | bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); } |
217 | }; |
218 | |
219 | /////////////////////////////////////////////////////////////////////////////// |
220 | // StreamLocalCopy |
221 | |
222 | namespace internal { |
223 | |
224 | template<typename Stream, int = StreamTraits<Stream>::copyOptimization> |
225 | class StreamLocalCopy; |
226 | |
227 | //! Do copy optimization. |
228 | template<typename Stream> |
229 | class StreamLocalCopy<Stream, 1> { |
230 | public: |
231 | StreamLocalCopy(Stream& original) : s(original), original_(original) {} |
232 | ~StreamLocalCopy() { original_ = s; } |
233 | |
234 | Stream s; |
235 | |
236 | private: |
237 | StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; |
238 | |
239 | Stream& original_; |
240 | }; |
241 | |
242 | //! Keep reference. |
243 | template<typename Stream> |
244 | class StreamLocalCopy<Stream, 0> { |
245 | public: |
246 | StreamLocalCopy(Stream& original) : s(original) {} |
247 | |
248 | Stream& s; |
249 | |
250 | private: |
251 | StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */; |
252 | }; |
253 | |
254 | } // namespace internal |
255 | |
256 | /////////////////////////////////////////////////////////////////////////////// |
257 | // SkipWhitespace |
258 | |
259 | //! Skip the JSON white spaces in a stream. |
260 | /*! \param is A input stream for skipping white spaces. |
261 | \note This function has SSE2/SSE4.2 specialization. |
262 | */ |
263 | template<typename InputStream> |
264 | void SkipWhitespace(InputStream& is) { |
265 | internal::StreamLocalCopy<InputStream> copy(is); |
266 | InputStream& s(copy.s); |
267 | |
268 | typename InputStream::Ch c; |
269 | while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t') |
270 | s.Take(); |
271 | } |
272 | |
273 | inline const char* SkipWhitespace(const char* p, const char* end) { |
274 | while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) |
275 | ++p; |
276 | return p; |
277 | } |
278 | |
279 | #ifdef RAPIDJSON_SSE42 |
280 | //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once. |
281 | inline const char *SkipWhitespace_SIMD(const char* p) { |
282 | // Fast return for single non-whitespace |
283 | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
284 | ++p; |
285 | else |
286 | return p; |
287 | |
288 | // 16-byte align to the next boundary |
289 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
290 | while (p != nextAligned) |
291 | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
292 | ++p; |
293 | else |
294 | return p; |
295 | |
296 | // The rest of string using SIMD |
297 | static const char whitespace[16] = " \n\r\t" ; |
298 | const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0])); |
299 | |
300 | for (;; p += 16) { |
301 | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
302 | const int r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY)); |
303 | if (r != 0) { // some of characters is non-whitespace |
304 | #ifdef _MSC_VER // Find the index of first non-whitespace |
305 | unsigned long offset; |
306 | _BitScanForward(&offset, r); |
307 | return p + offset; |
308 | #else |
309 | return p + __builtin_ffs(r) - 1; |
310 | #endif |
311 | } |
312 | } |
313 | } |
314 | |
315 | inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { |
316 | // Fast return for single non-whitespace |
317 | if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) |
318 | ++p; |
319 | else |
320 | return p; |
321 | |
322 | // The middle of string using SIMD |
323 | static const char whitespace[16] = " \n\r\t" ; |
324 | const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0])); |
325 | |
326 | for (; p <= end - 16; p += 16) { |
327 | const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); |
328 | const int r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY)); |
329 | if (r != 0) { // some of characters is non-whitespace |
330 | #ifdef _MSC_VER // Find the index of first non-whitespace |
331 | unsigned long offset; |
332 | _BitScanForward(&offset, r); |
333 | return p + offset; |
334 | #else |
335 | return p + __builtin_ffs(r) - 1; |
336 | #endif |
337 | } |
338 | } |
339 | |
340 | return SkipWhitespace(p, end); |
341 | } |
342 | |
343 | #elif defined(RAPIDJSON_SSE2) |
344 | |
345 | //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once. |
346 | inline const char *SkipWhitespace_SIMD(const char* p) { |
347 | // Fast return for single non-whitespace |
348 | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
349 | ++p; |
350 | else |
351 | return p; |
352 | |
353 | // 16-byte align to the next boundary |
354 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
355 | while (p != nextAligned) |
356 | if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') |
357 | ++p; |
358 | else |
359 | return p; |
360 | |
361 | // The rest of string |
362 | #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } |
363 | static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; |
364 | #undef C16 |
365 | |
366 | const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0])); |
367 | const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0])); |
368 | const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0])); |
369 | const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0])); |
370 | |
371 | for (;; p += 16) { |
372 | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
373 | __m128i x = _mm_cmpeq_epi8(s, w0); |
374 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); |
375 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); |
376 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); |
377 | unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x)); |
378 | if (r != 0) { // some of characters may be non-whitespace |
379 | #ifdef _MSC_VER // Find the index of first non-whitespace |
380 | unsigned long offset; |
381 | _BitScanForward(&offset, r); |
382 | return p + offset; |
383 | #else |
384 | return p + __builtin_ffs(r) - 1; |
385 | #endif |
386 | } |
387 | } |
388 | } |
389 | |
390 | inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { |
391 | // Fast return for single non-whitespace |
392 | if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) |
393 | ++p; |
394 | else |
395 | return p; |
396 | |
397 | // The rest of string |
398 | #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } |
399 | static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; |
400 | #undef C16 |
401 | |
402 | const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0])); |
403 | const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0])); |
404 | const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0])); |
405 | const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0])); |
406 | |
407 | for (; p <= end - 16; p += 16) { |
408 | const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); |
409 | __m128i x = _mm_cmpeq_epi8(s, w0); |
410 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); |
411 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); |
412 | x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); |
413 | unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x)); |
414 | if (r != 0) { // some of characters may be non-whitespace |
415 | #ifdef _MSC_VER // Find the index of first non-whitespace |
416 | unsigned long offset; |
417 | _BitScanForward(&offset, r); |
418 | return p + offset; |
419 | #else |
420 | return p + __builtin_ffs(r) - 1; |
421 | #endif |
422 | } |
423 | } |
424 | |
425 | return SkipWhitespace(p, end); |
426 | } |
427 | |
428 | #endif // RAPIDJSON_SSE2 |
429 | |
430 | #ifdef RAPIDJSON_SIMD |
431 | //! Template function specialization for InsituStringStream |
432 | template<> inline void SkipWhitespace(InsituStringStream& is) { |
433 | is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_)); |
434 | } |
435 | |
436 | //! Template function specialization for StringStream |
437 | template<> inline void SkipWhitespace(StringStream& is) { |
438 | is.src_ = SkipWhitespace_SIMD(is.src_); |
439 | } |
440 | |
441 | template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) { |
442 | is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_); |
443 | } |
444 | #endif // RAPIDJSON_SIMD |
445 | |
446 | /////////////////////////////////////////////////////////////////////////////// |
447 | // GenericReader |
448 | |
449 | //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator. |
450 | /*! GenericReader parses JSON text from a stream, and send events synchronously to an |
451 | object implementing Handler concept. |
452 | |
453 | It needs to allocate a stack for storing a single decoded string during |
454 | non-destructive parsing. |
455 | |
456 | For in-situ parsing, the decoded string is directly written to the source |
457 | text string, no temporary buffer is required. |
458 | |
459 | A GenericReader object can be reused for parsing multiple JSON text. |
460 | |
461 | \tparam SourceEncoding Encoding of the input stream. |
462 | \tparam TargetEncoding Encoding of the parse output. |
463 | \tparam StackAllocator Allocator type for stack. |
464 | */ |
465 | template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator> |
466 | class GenericReader { |
467 | public: |
468 | typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type |
469 | |
470 | //! Constructor. |
471 | /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) |
472 | \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) |
473 | */ |
474 | GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {} |
475 | |
476 | //! Parse JSON text. |
477 | /*! \tparam parseFlags Combination of \ref ParseFlag. |
478 | \tparam InputStream Type of input stream, implementing Stream concept. |
479 | \tparam Handler Type of handler, implementing Handler concept. |
480 | \param is Input stream to be parsed. |
481 | \param handler The handler to receive events. |
482 | \return Whether the parsing is successful. |
483 | */ |
484 | template <unsigned parseFlags, typename InputStream, typename Handler> |
485 | ParseResult Parse(InputStream& is, Handler& handler) { |
486 | if (parseFlags & kParseIterativeFlag) |
487 | return IterativeParse<parseFlags>(is, handler); |
488 | |
489 | parseResult_.Clear(); |
490 | |
491 | ClearStackOnExit scope(*this); |
492 | |
493 | SkipWhitespaceAndComments<parseFlags>(is); |
494 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
495 | |
496 | if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) { |
497 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell()); |
498 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
499 | } |
500 | else { |
501 | ParseValue<parseFlags>(is, handler); |
502 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
503 | |
504 | if (!(parseFlags & kParseStopWhenDoneFlag)) { |
505 | SkipWhitespaceAndComments<parseFlags>(is); |
506 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
507 | |
508 | if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) { |
509 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell()); |
510 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
511 | } |
512 | } |
513 | } |
514 | |
515 | return parseResult_; |
516 | } |
517 | |
518 | //! Parse JSON text (with \ref kParseDefaultFlags) |
519 | /*! \tparam InputStream Type of input stream, implementing Stream concept |
520 | \tparam Handler Type of handler, implementing Handler concept. |
521 | \param is Input stream to be parsed. |
522 | \param handler The handler to receive events. |
523 | \return Whether the parsing is successful. |
524 | */ |
525 | template <typename InputStream, typename Handler> |
526 | ParseResult Parse(InputStream& is, Handler& handler) { |
527 | return Parse<kParseDefaultFlags>(is, handler); |
528 | } |
529 | |
530 | //! Whether a parse error has occured in the last parsing. |
531 | bool HasParseError() const { return parseResult_.IsError(); } |
532 | |
533 | //! Get the \ref ParseErrorCode of last parsing. |
534 | ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); } |
535 | |
536 | //! Get the position of last parsing error in input, 0 otherwise. |
537 | size_t GetErrorOffset() const { return parseResult_.Offset(); } |
538 | |
539 | protected: |
540 | void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); } |
541 | |
542 | private: |
543 | // Prohibit copy constructor & assignment operator. |
544 | GenericReader(const GenericReader&); |
545 | GenericReader& operator=(const GenericReader&); |
546 | |
547 | void ClearStack() { stack_.Clear(); } |
548 | |
549 | // clear stack on any exit from ParseStream, e.g. due to exception |
550 | struct ClearStackOnExit { |
551 | explicit ClearStackOnExit(GenericReader& r) : r_(r) {} |
552 | ~ClearStackOnExit() { r_.ClearStack(); } |
553 | private: |
554 | GenericReader& r_; |
555 | ClearStackOnExit(const ClearStackOnExit&); |
556 | ClearStackOnExit& operator=(const ClearStackOnExit&); |
557 | }; |
558 | |
559 | template<unsigned parseFlags, typename InputStream> |
560 | void SkipWhitespaceAndComments(InputStream& is) { |
561 | SkipWhitespace(is); |
562 | |
563 | if (parseFlags & kParseCommentsFlag) { |
564 | while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) { |
565 | if (Consume(is, '*')) { |
566 | while (true) { |
567 | if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) |
568 | RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); |
569 | else if (Consume(is, '*')) { |
570 | if (Consume(is, '/')) |
571 | break; |
572 | } |
573 | else |
574 | is.Take(); |
575 | } |
576 | } |
577 | else if (RAPIDJSON_LIKELY(Consume(is, '/'))) |
578 | while (is.Peek() != '\0' && is.Take() != '\n'); |
579 | else |
580 | RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); |
581 | |
582 | SkipWhitespace(is); |
583 | } |
584 | } |
585 | } |
586 | |
587 | // Parse object: { string : value, ... } |
588 | template<unsigned parseFlags, typename InputStream, typename Handler> |
589 | void ParseObject(InputStream& is, Handler& handler) { |
590 | RAPIDJSON_ASSERT(is.Peek() == '{'); |
591 | is.Take(); // Skip '{' |
592 | |
593 | if (RAPIDJSON_UNLIKELY(!handler.StartObject())) |
594 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
595 | |
596 | SkipWhitespaceAndComments<parseFlags>(is); |
597 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
598 | |
599 | if (Consume(is, '}')) { |
600 | if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object |
601 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
602 | return; |
603 | } |
604 | |
605 | for (SizeType memberCount = 0;;) { |
606 | if (RAPIDJSON_UNLIKELY(is.Peek() != '"')) |
607 | RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); |
608 | |
609 | ParseString<parseFlags>(is, handler, true); |
610 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
611 | |
612 | SkipWhitespaceAndComments<parseFlags>(is); |
613 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
614 | |
615 | if (RAPIDJSON_UNLIKELY(!Consume(is, ':'))) |
616 | RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); |
617 | |
618 | SkipWhitespaceAndComments<parseFlags>(is); |
619 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
620 | |
621 | ParseValue<parseFlags>(is, handler); |
622 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
623 | |
624 | SkipWhitespaceAndComments<parseFlags>(is); |
625 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
626 | |
627 | ++memberCount; |
628 | |
629 | switch (is.Peek()) { |
630 | case ',': |
631 | is.Take(); |
632 | SkipWhitespaceAndComments<parseFlags>(is); |
633 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
634 | break; |
635 | case '}': |
636 | is.Take(); |
637 | if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) |
638 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
639 | return; |
640 | default: |
641 | RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy |
642 | } |
643 | |
644 | if (parseFlags & kParseTrailingCommasFlag) { |
645 | if (is.Peek() == '}') { |
646 | if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) |
647 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
648 | is.Take(); |
649 | return; |
650 | } |
651 | } |
652 | } |
653 | } |
654 | |
655 | // Parse array: [ value, ... ] |
656 | template<unsigned parseFlags, typename InputStream, typename Handler> |
657 | void ParseArray(InputStream& is, Handler& handler) { |
658 | RAPIDJSON_ASSERT(is.Peek() == '['); |
659 | is.Take(); // Skip '[' |
660 | |
661 | if (RAPIDJSON_UNLIKELY(!handler.StartArray())) |
662 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
663 | |
664 | SkipWhitespaceAndComments<parseFlags>(is); |
665 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
666 | |
667 | if (Consume(is, ']')) { |
668 | if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array |
669 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
670 | return; |
671 | } |
672 | |
673 | for (SizeType elementCount = 0;;) { |
674 | ParseValue<parseFlags>(is, handler); |
675 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
676 | |
677 | ++elementCount; |
678 | SkipWhitespaceAndComments<parseFlags>(is); |
679 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
680 | |
681 | if (Consume(is, ',')) { |
682 | SkipWhitespaceAndComments<parseFlags>(is); |
683 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
684 | } |
685 | else if (Consume(is, ']')) { |
686 | if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) |
687 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
688 | return; |
689 | } |
690 | else |
691 | RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); |
692 | |
693 | if (parseFlags & kParseTrailingCommasFlag) { |
694 | if (is.Peek() == ']') { |
695 | if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) |
696 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
697 | is.Take(); |
698 | return; |
699 | } |
700 | } |
701 | } |
702 | } |
703 | |
704 | template<unsigned parseFlags, typename InputStream, typename Handler> |
705 | void ParseNull(InputStream& is, Handler& handler) { |
706 | RAPIDJSON_ASSERT(is.Peek() == 'n'); |
707 | is.Take(); |
708 | |
709 | if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) { |
710 | if (RAPIDJSON_UNLIKELY(!handler.Null())) |
711 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
712 | } |
713 | else |
714 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); |
715 | } |
716 | |
717 | template<unsigned parseFlags, typename InputStream, typename Handler> |
718 | void ParseTrue(InputStream& is, Handler& handler) { |
719 | RAPIDJSON_ASSERT(is.Peek() == 't'); |
720 | is.Take(); |
721 | |
722 | if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) { |
723 | if (RAPIDJSON_UNLIKELY(!handler.Bool(true))) |
724 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
725 | } |
726 | else |
727 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); |
728 | } |
729 | |
730 | template<unsigned parseFlags, typename InputStream, typename Handler> |
731 | void ParseFalse(InputStream& is, Handler& handler) { |
732 | RAPIDJSON_ASSERT(is.Peek() == 'f'); |
733 | is.Take(); |
734 | |
735 | if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) { |
736 | if (RAPIDJSON_UNLIKELY(!handler.Bool(false))) |
737 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); |
738 | } |
739 | else |
740 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); |
741 | } |
742 | |
743 | template<typename InputStream> |
744 | RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) { |
745 | if (RAPIDJSON_LIKELY(is.Peek() == expect)) { |
746 | is.Take(); |
747 | return true; |
748 | } |
749 | else |
750 | return false; |
751 | } |
752 | |
753 | // Helper function to parse four hexidecimal digits in \uXXXX in ParseString(). |
754 | template<typename InputStream> |
755 | unsigned ParseHex4(InputStream& is, size_t escapeOffset) { |
756 | unsigned codepoint = 0; |
757 | for (int i = 0; i < 4; i++) { |
758 | Ch c = is.Peek(); |
759 | codepoint <<= 4; |
760 | codepoint += static_cast<unsigned>(c); |
761 | if (c >= '0' && c <= '9') |
762 | codepoint -= '0'; |
763 | else if (c >= 'A' && c <= 'F') |
764 | codepoint -= 'A' - 10; |
765 | else if (c >= 'a' && c <= 'f') |
766 | codepoint -= 'a' - 10; |
767 | else { |
768 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset); |
769 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0); |
770 | } |
771 | is.Take(); |
772 | } |
773 | return codepoint; |
774 | } |
775 | |
776 | template <typename CharType> |
777 | class StackStream { |
778 | public: |
779 | typedef CharType Ch; |
780 | |
781 | StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {} |
782 | RAPIDJSON_FORCEINLINE void Put(Ch c) { |
783 | *stack_.template Push<Ch>() = c; |
784 | ++length_; |
785 | } |
786 | |
787 | RAPIDJSON_FORCEINLINE void* Push(SizeType count) { |
788 | length_ += count; |
789 | return stack_.template Push<Ch>(count); |
790 | } |
791 | |
792 | size_t Length() const { return length_; } |
793 | |
794 | Ch* Pop() { |
795 | return stack_.template Pop<Ch>(length_); |
796 | } |
797 | |
798 | private: |
799 | StackStream(const StackStream&); |
800 | StackStream& operator=(const StackStream&); |
801 | |
802 | internal::Stack<StackAllocator>& stack_; |
803 | SizeType length_; |
804 | }; |
805 | |
806 | // Parse string and generate String event. Different code paths for kParseInsituFlag. |
807 | template<unsigned parseFlags, typename InputStream, typename Handler> |
808 | void ParseString(InputStream& is, Handler& handler, bool isKey = false) { |
809 | internal::StreamLocalCopy<InputStream> copy(is); |
810 | InputStream& s(copy.s); |
811 | |
812 | RAPIDJSON_ASSERT(s.Peek() == '\"'); |
813 | s.Take(); // Skip '\"' |
814 | |
815 | bool success = false; |
816 | if (parseFlags & kParseInsituFlag) { |
817 | typename InputStream::Ch *head = s.PutBegin(); |
818 | ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s); |
819 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
820 | size_t length = s.PutEnd(head) - 1; |
821 | RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); |
822 | const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head); |
823 | success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false)); |
824 | } |
825 | else { |
826 | StackStream<typename TargetEncoding::Ch> stackStream(stack_); |
827 | ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream); |
828 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
829 | SizeType length = static_cast<SizeType>(stackStream.Length()) - 1; |
830 | const typename TargetEncoding::Ch* const str = stackStream.Pop(); |
831 | success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true)); |
832 | } |
833 | if (RAPIDJSON_UNLIKELY(!success)) |
834 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell()); |
835 | } |
836 | |
837 | // Parse string to an output is |
838 | // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation. |
839 | template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream> |
840 | RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) { |
841 | //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN |
842 | #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
843 | static const char escape[256] = { |
844 | Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/', |
845 | Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, |
846 | 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, |
847 | 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
848 | Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 |
849 | }; |
850 | #undef Z16 |
851 | //!@endcond |
852 | |
853 | for (;;) { |
854 | // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation. |
855 | if (!(parseFlags & kParseValidateEncodingFlag)) |
856 | ScanCopyUnescapedString(is, os); |
857 | |
858 | Ch c = is.Peek(); |
859 | if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape |
860 | size_t escapeOffset = is.Tell(); // For invalid escaping, report the inital '\\' as error offset |
861 | is.Take(); |
862 | Ch e = is.Peek(); |
863 | if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) { |
864 | is.Take(); |
865 | os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)])); |
866 | } |
867 | else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode |
868 | is.Take(); |
869 | unsigned codepoint = ParseHex4(is, escapeOffset); |
870 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
871 | if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) { |
872 | // Handle UTF-16 surrogate pair |
873 | if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) |
874 | RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); |
875 | unsigned codepoint2 = ParseHex4(is, escapeOffset); |
876 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; |
877 | if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)) |
878 | RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); |
879 | codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; |
880 | } |
881 | TEncoding::Encode(os, codepoint); |
882 | } |
883 | else |
884 | RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset); |
885 | } |
886 | else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote |
887 | is.Take(); |
888 | os.Put('\0'); // null-terminate the string |
889 | return; |
890 | } |
891 | else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF |
892 | if (c == '\0') |
893 | RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell()); |
894 | else |
895 | RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell()); |
896 | } |
897 | else { |
898 | size_t offset = is.Tell(); |
899 | if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ? |
900 | !Transcoder<SEncoding, TEncoding>::Validate(is, os) : |
901 | !Transcoder<SEncoding, TEncoding>::Transcode(is, os)))) |
902 | RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset); |
903 | } |
904 | } |
905 | } |
906 | |
907 | template<typename InputStream, typename OutputStream> |
908 | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) { |
909 | // Do nothing for generic version |
910 | } |
911 | |
912 | #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) |
913 | // StringStream -> StackStream<char> |
914 | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) { |
915 | const char* p = is.src_; |
916 | |
917 | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
918 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
919 | while (p != nextAligned) |
920 | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
921 | is.src_ = p; |
922 | return; |
923 | } |
924 | else |
925 | os.Put(*p++); |
926 | |
927 | // The rest of string using SIMD |
928 | static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; |
929 | static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; |
930 | static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; |
931 | const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); |
932 | const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); |
933 | const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); |
934 | |
935 | for (;; p += 16) { |
936 | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
937 | const __m128i t1 = _mm_cmpeq_epi8(s, dq); |
938 | const __m128i t2 = _mm_cmpeq_epi8(s, bs); |
939 | const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 |
940 | const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); |
941 | unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); |
942 | if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped |
943 | SizeType length; |
944 | #ifdef _MSC_VER // Find the index of first escaped |
945 | unsigned long offset; |
946 | _BitScanForward(&offset, r); |
947 | length = offset; |
948 | #else |
949 | length = static_cast<SizeType>(__builtin_ffs(r) - 1); |
950 | #endif |
951 | char* q = reinterpret_cast<char*>(os.Push(length)); |
952 | for (size_t i = 0; i < length; i++) |
953 | q[i] = p[i]; |
954 | |
955 | p += length; |
956 | break; |
957 | } |
958 | _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s); |
959 | } |
960 | |
961 | is.src_ = p; |
962 | } |
963 | |
964 | // InsituStringStream -> InsituStringStream |
965 | static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { |
966 | RAPIDJSON_ASSERT(&is == &os); |
967 | (void)os; |
968 | |
969 | if (is.src_ == is.dst_) { |
970 | SkipUnescapedString(is); |
971 | return; |
972 | } |
973 | |
974 | char* p = is.src_; |
975 | char *q = is.dst_; |
976 | |
977 | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
978 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
979 | while (p != nextAligned) |
980 | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
981 | is.src_ = p; |
982 | is.dst_ = q; |
983 | return; |
984 | } |
985 | else |
986 | *q++ = *p++; |
987 | |
988 | // The rest of string using SIMD |
989 | static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; |
990 | static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; |
991 | static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; |
992 | const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); |
993 | const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); |
994 | const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); |
995 | |
996 | for (;; p += 16, q += 16) { |
997 | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
998 | const __m128i t1 = _mm_cmpeq_epi8(s, dq); |
999 | const __m128i t2 = _mm_cmpeq_epi8(s, bs); |
1000 | const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 |
1001 | const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); |
1002 | unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); |
1003 | if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped |
1004 | size_t length; |
1005 | #ifdef _MSC_VER // Find the index of first escaped |
1006 | unsigned long offset; |
1007 | _BitScanForward(&offset, r); |
1008 | length = offset; |
1009 | #else |
1010 | length = static_cast<size_t>(__builtin_ffs(r) - 1); |
1011 | #endif |
1012 | for (const char* pend = p + length; p != pend; ) |
1013 | *q++ = *p++; |
1014 | break; |
1015 | } |
1016 | _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s); |
1017 | } |
1018 | |
1019 | is.src_ = p; |
1020 | is.dst_ = q; |
1021 | } |
1022 | |
1023 | // When read/write pointers are the same for insitu stream, just skip unescaped characters |
1024 | static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { |
1025 | RAPIDJSON_ASSERT(is.src_ == is.dst_); |
1026 | char* p = is.src_; |
1027 | |
1028 | // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) |
1029 | const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); |
1030 | for (; p != nextAligned; p++) |
1031 | if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { |
1032 | is.src_ = is.dst_ = p; |
1033 | return; |
1034 | } |
1035 | |
1036 | // The rest of string using SIMD |
1037 | static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; |
1038 | static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; |
1039 | static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; |
1040 | const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); |
1041 | const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); |
1042 | const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); |
1043 | |
1044 | for (;; p += 16) { |
1045 | const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); |
1046 | const __m128i t1 = _mm_cmpeq_epi8(s, dq); |
1047 | const __m128i t2 = _mm_cmpeq_epi8(s, bs); |
1048 | const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 |
1049 | const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); |
1050 | unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); |
1051 | if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped |
1052 | size_t length; |
1053 | #ifdef _MSC_VER // Find the index of first escaped |
1054 | unsigned long offset; |
1055 | _BitScanForward(&offset, r); |
1056 | length = offset; |
1057 | #else |
1058 | length = static_cast<size_t>(__builtin_ffs(r) - 1); |
1059 | #endif |
1060 | p += length; |
1061 | break; |
1062 | } |
1063 | } |
1064 | |
1065 | is.src_ = is.dst_ = p; |
1066 | } |
1067 | #endif |
1068 | |
1069 | template<typename InputStream, bool backup, bool pushOnTake> |
1070 | class NumberStream; |
1071 | |
1072 | template<typename InputStream> |
1073 | class NumberStream<InputStream, false, false> { |
1074 | public: |
1075 | typedef typename InputStream::Ch Ch; |
1076 | |
1077 | NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; } |
1078 | ~NumberStream() {} |
1079 | |
1080 | RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); } |
1081 | RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); } |
1082 | RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); } |
1083 | RAPIDJSON_FORCEINLINE void Push(char) {} |
1084 | |
1085 | size_t Tell() { return is.Tell(); } |
1086 | size_t Length() { return 0; } |
1087 | const char* Pop() { return 0; } |
1088 | |
1089 | protected: |
1090 | NumberStream& operator=(const NumberStream&); |
1091 | |
1092 | InputStream& is; |
1093 | }; |
1094 | |
1095 | template<typename InputStream> |
1096 | class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> { |
1097 | typedef NumberStream<InputStream, false, false> Base; |
1098 | public: |
1099 | NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {} |
1100 | ~NumberStream() {} |
1101 | |
1102 | RAPIDJSON_FORCEINLINE Ch TakePush() { |
1103 | stackStream.Put(static_cast<char>(Base::is.Peek())); |
1104 | return Base::is.Take(); |
1105 | } |
1106 | |
1107 | RAPIDJSON_FORCEINLINE void Push(char c) { |
1108 | stackStream.Put(c); |
1109 | } |
1110 | |
1111 | size_t Length() { return stackStream.Length(); } |
1112 | |
1113 | const char* Pop() { |
1114 | stackStream.Put('\0'); |
1115 | return stackStream.Pop(); |
1116 | } |
1117 | |
1118 | private: |
1119 | StackStream<char> stackStream; |
1120 | }; |
1121 | |
1122 | template<typename InputStream> |
1123 | class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> { |
1124 | typedef NumberStream<InputStream, true, false> Base; |
1125 | public: |
1126 | NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {} |
1127 | ~NumberStream() {} |
1128 | |
1129 | RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); } |
1130 | }; |
1131 | |
1132 | template<unsigned parseFlags, typename InputStream, typename Handler> |
1133 | void ParseNumber(InputStream& is, Handler& handler) { |
1134 | internal::StreamLocalCopy<InputStream> copy(is); |
1135 | NumberStream<InputStream, |
1136 | ((parseFlags & kParseNumbersAsStringsFlag) != 0) ? |
1137 | ((parseFlags & kParseInsituFlag) == 0) : |
1138 | ((parseFlags & kParseFullPrecisionFlag) != 0), |
1139 | (parseFlags & kParseNumbersAsStringsFlag) != 0 && |
1140 | (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s); |
1141 | |
1142 | size_t startOffset = s.Tell(); |
1143 | double d = 0.0; |
1144 | bool useNanOrInf = false; |
1145 | |
1146 | // Parse minus |
1147 | bool minus = Consume(s, '-'); |
1148 | |
1149 | // Parse int: zero / ( digit1-9 *DIGIT ) |
1150 | unsigned i = 0; |
1151 | uint64_t i64 = 0; |
1152 | bool use64bit = false; |
1153 | int significandDigit = 0; |
1154 | if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) { |
1155 | i = 0; |
1156 | s.TakePush(); |
1157 | } |
1158 | else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) { |
1159 | i = static_cast<unsigned>(s.TakePush() - '0'); |
1160 | |
1161 | if (minus) |
1162 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1163 | if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648 |
1164 | if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) { |
1165 | i64 = i; |
1166 | use64bit = true; |
1167 | break; |
1168 | } |
1169 | } |
1170 | i = i * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1171 | significandDigit++; |
1172 | } |
1173 | else |
1174 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1175 | if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295 |
1176 | if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) { |
1177 | i64 = i; |
1178 | use64bit = true; |
1179 | break; |
1180 | } |
1181 | } |
1182 | i = i * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1183 | significandDigit++; |
1184 | } |
1185 | } |
1186 | // Parse NaN or Infinity here |
1187 | else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) { |
1188 | useNanOrInf = true; |
1189 | if (RAPIDJSON_LIKELY(Consume(s, 'N') && Consume(s, 'a') && Consume(s, 'N'))) { |
1190 | d = std::numeric_limits<double>::quiet_NaN(); |
1191 | } |
1192 | else if (RAPIDJSON_LIKELY(Consume(s, 'I') && Consume(s, 'n') && Consume(s, 'f'))) { |
1193 | d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity()); |
1194 | if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n') |
1195 | && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) |
1196 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); |
1197 | } |
1198 | else |
1199 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); |
1200 | } |
1201 | else |
1202 | RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); |
1203 | |
1204 | // Parse 64bit int |
1205 | bool useDouble = false; |
1206 | if (use64bit) { |
1207 | if (minus) |
1208 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1209 | if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808 |
1210 | if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) { |
1211 | d = static_cast<double>(i64); |
1212 | useDouble = true; |
1213 | break; |
1214 | } |
1215 | i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1216 | significandDigit++; |
1217 | } |
1218 | else |
1219 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1220 | if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615 |
1221 | if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) { |
1222 | d = static_cast<double>(i64); |
1223 | useDouble = true; |
1224 | break; |
1225 | } |
1226 | i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1227 | significandDigit++; |
1228 | } |
1229 | } |
1230 | |
1231 | // Force double for big integer |
1232 | if (useDouble) { |
1233 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1234 | if (RAPIDJSON_UNLIKELY(d >= 1.7976931348623157e307)) // DBL_MAX / 10.0 |
1235 | RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); |
1236 | d = d * 10 + (s.TakePush() - '0'); |
1237 | } |
1238 | } |
1239 | |
1240 | // Parse frac = decimal-point 1*DIGIT |
1241 | int expFrac = 0; |
1242 | size_t decimalPosition; |
1243 | if (Consume(s, '.')) { |
1244 | decimalPosition = s.Length(); |
1245 | |
1246 | if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9'))) |
1247 | RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell()); |
1248 | |
1249 | if (!useDouble) { |
1250 | #if RAPIDJSON_64BIT |
1251 | // Use i64 to store significand in 64-bit architecture |
1252 | if (!use64bit) |
1253 | i64 = i; |
1254 | |
1255 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1256 | if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path |
1257 | break; |
1258 | else { |
1259 | i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0'); |
1260 | --expFrac; |
1261 | if (i64 != 0) |
1262 | significandDigit++; |
1263 | } |
1264 | } |
1265 | |
1266 | d = static_cast<double>(i64); |
1267 | #else |
1268 | // Use double to store significand in 32-bit architecture |
1269 | d = static_cast<double>(use64bit ? i64 : i); |
1270 | #endif |
1271 | useDouble = true; |
1272 | } |
1273 | |
1274 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1275 | if (significandDigit < 17) { |
1276 | d = d * 10.0 + (s.TakePush() - '0'); |
1277 | --expFrac; |
1278 | if (RAPIDJSON_LIKELY(d > 0.0)) |
1279 | significandDigit++; |
1280 | } |
1281 | else |
1282 | s.TakePush(); |
1283 | } |
1284 | } |
1285 | else |
1286 | decimalPosition = s.Length(); // decimal position at the end of integer. |
1287 | |
1288 | // Parse exp = e [ minus / plus ] 1*DIGIT |
1289 | int exp = 0; |
1290 | if (Consume(s, 'e') || Consume(s, 'E')) { |
1291 | if (!useDouble) { |
1292 | d = static_cast<double>(use64bit ? i64 : i); |
1293 | useDouble = true; |
1294 | } |
1295 | |
1296 | bool expMinus = false; |
1297 | if (Consume(s, '+')) |
1298 | ; |
1299 | else if (Consume(s, '-')) |
1300 | expMinus = true; |
1301 | |
1302 | if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1303 | exp = static_cast<int>(s.Take() - '0'); |
1304 | if (expMinus) { |
1305 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1306 | exp = exp * 10 + static_cast<int>(s.Take() - '0'); |
1307 | if (exp >= 214748364) { // Issue #313: prevent overflow exponent |
1308 | while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent |
1309 | s.Take(); |
1310 | } |
1311 | } |
1312 | } |
1313 | else { // positive exp |
1314 | int maxExp = 308 - expFrac; |
1315 | while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { |
1316 | exp = exp * 10 + static_cast<int>(s.Take() - '0'); |
1317 | if (RAPIDJSON_UNLIKELY(exp > maxExp)) |
1318 | RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); |
1319 | } |
1320 | } |
1321 | } |
1322 | else |
1323 | RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell()); |
1324 | |
1325 | if (expMinus) |
1326 | exp = -exp; |
1327 | } |
1328 | |
1329 | // Finish parsing, call event according to the type of number. |
1330 | bool cont = true; |
1331 | |
1332 | if (parseFlags & kParseNumbersAsStringsFlag) { |
1333 | if (parseFlags & kParseInsituFlag) { |
1334 | s.Pop(); // Pop stack no matter if it will be used or not. |
1335 | typename InputStream::Ch* head = is.PutBegin(); |
1336 | const size_t length = s.Tell() - startOffset; |
1337 | RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); |
1338 | // unable to insert the \0 character here, it will erase the comma after this number |
1339 | const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head); |
1340 | cont = handler.RawNumber(str, SizeType(length), false); |
1341 | } |
1342 | else { |
1343 | SizeType numCharsToCopy = static_cast<SizeType>(s.Length()); |
1344 | StringStream srcStream(s.Pop()); |
1345 | StackStream<typename TargetEncoding::Ch> dstStream(stack_); |
1346 | while (numCharsToCopy--) { |
1347 | Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream); |
1348 | } |
1349 | dstStream.Put('\0'); |
1350 | const typename TargetEncoding::Ch* str = dstStream.Pop(); |
1351 | const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1; |
1352 | cont = handler.RawNumber(str, SizeType(length), true); |
1353 | } |
1354 | } |
1355 | else { |
1356 | size_t length = s.Length(); |
1357 | const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not. |
1358 | |
1359 | if (useDouble) { |
1360 | int p = exp + expFrac; |
1361 | if (parseFlags & kParseFullPrecisionFlag) |
1362 | d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp); |
1363 | else |
1364 | d = internal::StrtodNormalPrecision(d, p); |
1365 | |
1366 | cont = handler.Double(minus ? -d : d); |
1367 | } |
1368 | else if (useNanOrInf) { |
1369 | cont = handler.Double(d); |
1370 | } |
1371 | else { |
1372 | if (use64bit) { |
1373 | if (minus) |
1374 | cont = handler.Int64(static_cast<int64_t>(~i64 + 1)); |
1375 | else |
1376 | cont = handler.Uint64(i64); |
1377 | } |
1378 | else { |
1379 | if (minus) |
1380 | cont = handler.Int(static_cast<int32_t>(~i + 1)); |
1381 | else |
1382 | cont = handler.Uint(i); |
1383 | } |
1384 | } |
1385 | } |
1386 | if (RAPIDJSON_UNLIKELY(!cont)) |
1387 | RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset); |
1388 | } |
1389 | |
1390 | // Parse any JSON value |
1391 | template<unsigned parseFlags, typename InputStream, typename Handler> |
1392 | void ParseValue(InputStream& is, Handler& handler) { |
1393 | switch (is.Peek()) { |
1394 | case 'n': ParseNull <parseFlags>(is, handler); break; |
1395 | case 't': ParseTrue <parseFlags>(is, handler); break; |
1396 | case 'f': ParseFalse <parseFlags>(is, handler); break; |
1397 | case '"': ParseString<parseFlags>(is, handler); break; |
1398 | case '{': ParseObject<parseFlags>(is, handler); break; |
1399 | case '[': ParseArray <parseFlags>(is, handler); break; |
1400 | default : |
1401 | ParseNumber<parseFlags>(is, handler); |
1402 | break; |
1403 | |
1404 | } |
1405 | } |
1406 | |
1407 | // Iterative Parsing |
1408 | |
1409 | // States |
1410 | enum IterativeParsingState { |
1411 | IterativeParsingStartState = 0, |
1412 | IterativeParsingFinishState, |
1413 | IterativeParsingErrorState, |
1414 | |
1415 | // Object states |
1416 | IterativeParsingObjectInitialState, |
1417 | IterativeParsingMemberKeyState, |
1418 | IterativeParsingKeyValueDelimiterState, |
1419 | IterativeParsingMemberValueState, |
1420 | IterativeParsingMemberDelimiterState, |
1421 | IterativeParsingObjectFinishState, |
1422 | |
1423 | // Array states |
1424 | IterativeParsingArrayInitialState, |
1425 | IterativeParsingElementState, |
1426 | IterativeParsingElementDelimiterState, |
1427 | IterativeParsingArrayFinishState, |
1428 | |
1429 | // Single value state |
1430 | IterativeParsingValueState |
1431 | }; |
1432 | |
1433 | enum { cIterativeParsingStateCount = IterativeParsingValueState + 1 }; |
1434 | |
1435 | // Tokens |
1436 | enum Token { |
1437 | LeftBracketToken = 0, |
1438 | RightBracketToken, |
1439 | |
1440 | LeftCurlyBracketToken, |
1441 | RightCurlyBracketToken, |
1442 | |
1443 | CommaToken, |
1444 | ColonToken, |
1445 | |
1446 | StringToken, |
1447 | FalseToken, |
1448 | TrueToken, |
1449 | NullToken, |
1450 | NumberToken, |
1451 | |
1452 | kTokenCount |
1453 | }; |
1454 | |
1455 | RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) { |
1456 | |
1457 | //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN |
1458 | #define N NumberToken |
1459 | #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N |
1460 | // Maps from ASCII to Token |
1461 | static const unsigned char tokenMap[256] = { |
1462 | N16, // 00~0F |
1463 | N16, // 10~1F |
1464 | N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F |
1465 | N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F |
1466 | N16, // 40~4F |
1467 | N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F |
1468 | N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F |
1469 | N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F |
1470 | N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF |
1471 | }; |
1472 | #undef N |
1473 | #undef N16 |
1474 | //!@endcond |
1475 | |
1476 | if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) |
1477 | return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]); |
1478 | else |
1479 | return NumberToken; |
1480 | } |
1481 | |
1482 | RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) { |
1483 | // current state x one lookahead token -> new state |
1484 | static const char G[cIterativeParsingStateCount][kTokenCount] = { |
1485 | // Start |
1486 | { |
1487 | IterativeParsingArrayInitialState, // Left bracket |
1488 | IterativeParsingErrorState, // Right bracket |
1489 | IterativeParsingObjectInitialState, // Left curly bracket |
1490 | IterativeParsingErrorState, // Right curly bracket |
1491 | IterativeParsingErrorState, // Comma |
1492 | IterativeParsingErrorState, // Colon |
1493 | IterativeParsingValueState, // String |
1494 | IterativeParsingValueState, // False |
1495 | IterativeParsingValueState, // True |
1496 | IterativeParsingValueState, // Null |
1497 | IterativeParsingValueState // Number |
1498 | }, |
1499 | // Finish(sink state) |
1500 | { |
1501 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1502 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1503 | IterativeParsingErrorState |
1504 | }, |
1505 | // Error(sink state) |
1506 | { |
1507 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1508 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1509 | IterativeParsingErrorState |
1510 | }, |
1511 | // ObjectInitial |
1512 | { |
1513 | IterativeParsingErrorState, // Left bracket |
1514 | IterativeParsingErrorState, // Right bracket |
1515 | IterativeParsingErrorState, // Left curly bracket |
1516 | IterativeParsingObjectFinishState, // Right curly bracket |
1517 | IterativeParsingErrorState, // Comma |
1518 | IterativeParsingErrorState, // Colon |
1519 | IterativeParsingMemberKeyState, // String |
1520 | IterativeParsingErrorState, // False |
1521 | IterativeParsingErrorState, // True |
1522 | IterativeParsingErrorState, // Null |
1523 | IterativeParsingErrorState // Number |
1524 | }, |
1525 | // MemberKey |
1526 | { |
1527 | IterativeParsingErrorState, // Left bracket |
1528 | IterativeParsingErrorState, // Right bracket |
1529 | IterativeParsingErrorState, // Left curly bracket |
1530 | IterativeParsingErrorState, // Right curly bracket |
1531 | IterativeParsingErrorState, // Comma |
1532 | IterativeParsingKeyValueDelimiterState, // Colon |
1533 | IterativeParsingErrorState, // String |
1534 | IterativeParsingErrorState, // False |
1535 | IterativeParsingErrorState, // True |
1536 | IterativeParsingErrorState, // Null |
1537 | IterativeParsingErrorState // Number |
1538 | }, |
1539 | // KeyValueDelimiter |
1540 | { |
1541 | IterativeParsingArrayInitialState, // Left bracket(push MemberValue state) |
1542 | IterativeParsingErrorState, // Right bracket |
1543 | IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state) |
1544 | IterativeParsingErrorState, // Right curly bracket |
1545 | IterativeParsingErrorState, // Comma |
1546 | IterativeParsingErrorState, // Colon |
1547 | IterativeParsingMemberValueState, // String |
1548 | IterativeParsingMemberValueState, // False |
1549 | IterativeParsingMemberValueState, // True |
1550 | IterativeParsingMemberValueState, // Null |
1551 | IterativeParsingMemberValueState // Number |
1552 | }, |
1553 | // MemberValue |
1554 | { |
1555 | IterativeParsingErrorState, // Left bracket |
1556 | IterativeParsingErrorState, // Right bracket |
1557 | IterativeParsingErrorState, // Left curly bracket |
1558 | IterativeParsingObjectFinishState, // Right curly bracket |
1559 | IterativeParsingMemberDelimiterState, // Comma |
1560 | IterativeParsingErrorState, // Colon |
1561 | IterativeParsingErrorState, // String |
1562 | IterativeParsingErrorState, // False |
1563 | IterativeParsingErrorState, // True |
1564 | IterativeParsingErrorState, // Null |
1565 | IterativeParsingErrorState // Number |
1566 | }, |
1567 | // MemberDelimiter |
1568 | { |
1569 | IterativeParsingErrorState, // Left bracket |
1570 | IterativeParsingErrorState, // Right bracket |
1571 | IterativeParsingErrorState, // Left curly bracket |
1572 | IterativeParsingObjectFinishState, // Right curly bracket |
1573 | IterativeParsingErrorState, // Comma |
1574 | IterativeParsingErrorState, // Colon |
1575 | IterativeParsingMemberKeyState, // String |
1576 | IterativeParsingErrorState, // False |
1577 | IterativeParsingErrorState, // True |
1578 | IterativeParsingErrorState, // Null |
1579 | IterativeParsingErrorState // Number |
1580 | }, |
1581 | // ObjectFinish(sink state) |
1582 | { |
1583 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1584 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1585 | IterativeParsingErrorState |
1586 | }, |
1587 | // ArrayInitial |
1588 | { |
1589 | IterativeParsingArrayInitialState, // Left bracket(push Element state) |
1590 | IterativeParsingArrayFinishState, // Right bracket |
1591 | IterativeParsingObjectInitialState, // Left curly bracket(push Element state) |
1592 | IterativeParsingErrorState, // Right curly bracket |
1593 | IterativeParsingErrorState, // Comma |
1594 | IterativeParsingErrorState, // Colon |
1595 | IterativeParsingElementState, // String |
1596 | IterativeParsingElementState, // False |
1597 | IterativeParsingElementState, // True |
1598 | IterativeParsingElementState, // Null |
1599 | IterativeParsingElementState // Number |
1600 | }, |
1601 | // Element |
1602 | { |
1603 | IterativeParsingErrorState, // Left bracket |
1604 | IterativeParsingArrayFinishState, // Right bracket |
1605 | IterativeParsingErrorState, // Left curly bracket |
1606 | IterativeParsingErrorState, // Right curly bracket |
1607 | IterativeParsingElementDelimiterState, // Comma |
1608 | IterativeParsingErrorState, // Colon |
1609 | IterativeParsingErrorState, // String |
1610 | IterativeParsingErrorState, // False |
1611 | IterativeParsingErrorState, // True |
1612 | IterativeParsingErrorState, // Null |
1613 | IterativeParsingErrorState // Number |
1614 | }, |
1615 | // ElementDelimiter |
1616 | { |
1617 | IterativeParsingArrayInitialState, // Left bracket(push Element state) |
1618 | IterativeParsingArrayFinishState, // Right bracket |
1619 | IterativeParsingObjectInitialState, // Left curly bracket(push Element state) |
1620 | IterativeParsingErrorState, // Right curly bracket |
1621 | IterativeParsingErrorState, // Comma |
1622 | IterativeParsingErrorState, // Colon |
1623 | IterativeParsingElementState, // String |
1624 | IterativeParsingElementState, // False |
1625 | IterativeParsingElementState, // True |
1626 | IterativeParsingElementState, // Null |
1627 | IterativeParsingElementState // Number |
1628 | }, |
1629 | // ArrayFinish(sink state) |
1630 | { |
1631 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1632 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1633 | IterativeParsingErrorState |
1634 | }, |
1635 | // Single Value (sink state) |
1636 | { |
1637 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1638 | IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, |
1639 | IterativeParsingErrorState |
1640 | } |
1641 | }; // End of G |
1642 | |
1643 | return static_cast<IterativeParsingState>(G[state][token]); |
1644 | } |
1645 | |
1646 | // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). |
1647 | // May return a new state on state pop. |
1648 | template <unsigned parseFlags, typename InputStream, typename Handler> |
1649 | RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) { |
1650 | (void)token; |
1651 | |
1652 | switch (dst) { |
1653 | case IterativeParsingErrorState: |
1654 | return dst; |
1655 | |
1656 | case IterativeParsingObjectInitialState: |
1657 | case IterativeParsingArrayInitialState: |
1658 | { |
1659 | // Push the state(Element or MemeberValue) if we are nested in another array or value of member. |
1660 | // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop. |
1661 | IterativeParsingState n = src; |
1662 | if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState) |
1663 | n = IterativeParsingElementState; |
1664 | else if (src == IterativeParsingKeyValueDelimiterState) |
1665 | n = IterativeParsingMemberValueState; |
1666 | // Push current state. |
1667 | *stack_.template Push<SizeType>(1) = n; |
1668 | // Initialize and push the member/element count. |
1669 | *stack_.template Push<SizeType>(1) = 0; |
1670 | // Call handler |
1671 | bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray(); |
1672 | // On handler short circuits the parsing. |
1673 | if (!hr) { |
1674 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); |
1675 | return IterativeParsingErrorState; |
1676 | } |
1677 | else { |
1678 | is.Take(); |
1679 | return dst; |
1680 | } |
1681 | } |
1682 | |
1683 | case IterativeParsingMemberKeyState: |
1684 | ParseString<parseFlags>(is, handler, true); |
1685 | if (HasParseError()) |
1686 | return IterativeParsingErrorState; |
1687 | else |
1688 | return dst; |
1689 | |
1690 | case IterativeParsingKeyValueDelimiterState: |
1691 | RAPIDJSON_ASSERT(token == ColonToken); |
1692 | is.Take(); |
1693 | return dst; |
1694 | |
1695 | case IterativeParsingMemberValueState: |
1696 | // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. |
1697 | ParseValue<parseFlags>(is, handler); |
1698 | if (HasParseError()) { |
1699 | return IterativeParsingErrorState; |
1700 | } |
1701 | return dst; |
1702 | |
1703 | case IterativeParsingElementState: |
1704 | // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. |
1705 | ParseValue<parseFlags>(is, handler); |
1706 | if (HasParseError()) { |
1707 | return IterativeParsingErrorState; |
1708 | } |
1709 | return dst; |
1710 | |
1711 | case IterativeParsingMemberDelimiterState: |
1712 | case IterativeParsingElementDelimiterState: |
1713 | is.Take(); |
1714 | // Update member/element count. |
1715 | *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1; |
1716 | return dst; |
1717 | |
1718 | case IterativeParsingObjectFinishState: |
1719 | { |
1720 | // Transit from delimiter is only allowed when trailing commas are enabled |
1721 | if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) { |
1722 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); |
1723 | return IterativeParsingErrorState; |
1724 | } |
1725 | // Get member count. |
1726 | SizeType c = *stack_.template Pop<SizeType>(1); |
1727 | // If the object is not empty, count the last member. |
1728 | if (src == IterativeParsingMemberValueState) |
1729 | ++c; |
1730 | // Restore the state. |
1731 | IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1)); |
1732 | // Transit to Finish state if this is the topmost scope. |
1733 | if (n == IterativeParsingStartState) |
1734 | n = IterativeParsingFinishState; |
1735 | // Call handler |
1736 | bool hr = handler.EndObject(c); |
1737 | // On handler short circuits the parsing. |
1738 | if (!hr) { |
1739 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); |
1740 | return IterativeParsingErrorState; |
1741 | } |
1742 | else { |
1743 | is.Take(); |
1744 | return n; |
1745 | } |
1746 | } |
1747 | |
1748 | case IterativeParsingArrayFinishState: |
1749 | { |
1750 | // Transit from delimiter is only allowed when trailing commas are enabled |
1751 | if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) { |
1752 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell()); |
1753 | return IterativeParsingErrorState; |
1754 | } |
1755 | // Get element count. |
1756 | SizeType c = *stack_.template Pop<SizeType>(1); |
1757 | // If the array is not empty, count the last element. |
1758 | if (src == IterativeParsingElementState) |
1759 | ++c; |
1760 | // Restore the state. |
1761 | IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1)); |
1762 | // Transit to Finish state if this is the topmost scope. |
1763 | if (n == IterativeParsingStartState) |
1764 | n = IterativeParsingFinishState; |
1765 | // Call handler |
1766 | bool hr = handler.EndArray(c); |
1767 | // On handler short circuits the parsing. |
1768 | if (!hr) { |
1769 | RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); |
1770 | return IterativeParsingErrorState; |
1771 | } |
1772 | else { |
1773 | is.Take(); |
1774 | return n; |
1775 | } |
1776 | } |
1777 | |
1778 | default: |
1779 | // This branch is for IterativeParsingValueState actually. |
1780 | // Use `default:` rather than |
1781 | // `case IterativeParsingValueState:` is for code coverage. |
1782 | |
1783 | // The IterativeParsingStartState is not enumerated in this switch-case. |
1784 | // It is impossible for that case. And it can be caught by following assertion. |
1785 | |
1786 | // The IterativeParsingFinishState is not enumerated in this switch-case either. |
1787 | // It is a "derivative" state which cannot triggered from Predict() directly. |
1788 | // Therefore it cannot happen here. And it can be caught by following assertion. |
1789 | RAPIDJSON_ASSERT(dst == IterativeParsingValueState); |
1790 | |
1791 | // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. |
1792 | ParseValue<parseFlags>(is, handler); |
1793 | if (HasParseError()) { |
1794 | return IterativeParsingErrorState; |
1795 | } |
1796 | return IterativeParsingFinishState; |
1797 | } |
1798 | } |
1799 | |
1800 | template <typename InputStream> |
1801 | void HandleError(IterativeParsingState src, InputStream& is) { |
1802 | if (HasParseError()) { |
1803 | // Error flag has been set. |
1804 | return; |
1805 | } |
1806 | |
1807 | switch (src) { |
1808 | case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return; |
1809 | case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return; |
1810 | case IterativeParsingObjectInitialState: |
1811 | case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return; |
1812 | case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return; |
1813 | case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return; |
1814 | case IterativeParsingKeyValueDelimiterState: |
1815 | case IterativeParsingArrayInitialState: |
1816 | case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return; |
1817 | default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return; |
1818 | } |
1819 | } |
1820 | |
1821 | template <unsigned parseFlags, typename InputStream, typename Handler> |
1822 | ParseResult IterativeParse(InputStream& is, Handler& handler) { |
1823 | parseResult_.Clear(); |
1824 | ClearStackOnExit scope(*this); |
1825 | IterativeParsingState state = IterativeParsingStartState; |
1826 | |
1827 | SkipWhitespaceAndComments<parseFlags>(is); |
1828 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
1829 | while (is.Peek() != '\0') { |
1830 | Token t = Tokenize(is.Peek()); |
1831 | IterativeParsingState n = Predict(state, t); |
1832 | IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler); |
1833 | |
1834 | if (d == IterativeParsingErrorState) { |
1835 | HandleError(state, is); |
1836 | break; |
1837 | } |
1838 | |
1839 | state = d; |
1840 | |
1841 | // Do not further consume streams if a root JSON has been parsed. |
1842 | if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState) |
1843 | break; |
1844 | |
1845 | SkipWhitespaceAndComments<parseFlags>(is); |
1846 | RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); |
1847 | } |
1848 | |
1849 | // Handle the end of file. |
1850 | if (state != IterativeParsingFinishState) |
1851 | HandleError(state, is); |
1852 | |
1853 | return parseResult_; |
1854 | } |
1855 | |
1856 | static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. |
1857 | internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. |
1858 | ParseResult parseResult_; |
1859 | }; // class GenericReader |
1860 | |
1861 | //! Reader with UTF8 encoding and default allocator. |
1862 | typedef GenericReader<UTF8<>, UTF8<> > Reader; |
1863 | |
1864 | RAPIDJSON_NAMESPACE_END |
1865 | |
1866 | #ifdef __clang__ |
1867 | RAPIDJSON_DIAG_POP |
1868 | #endif |
1869 | |
1870 | |
1871 | #ifdef __GNUC__ |
1872 | RAPIDJSON_DIAG_POP |
1873 | #endif |
1874 | |
1875 | #ifdef _MSC_VER |
1876 | RAPIDJSON_DIAG_POP |
1877 | #endif |
1878 | |
1879 | #endif // RAPIDJSON_READER_H_ |
1880 | |