1#pragma once
2
3#include <cassert> // assert
4#include <cmath> // isfinite
5#include <cstdint> // uint8_t
6#include <functional> // function
7#include <string> // string
8#include <utility> // move
9#include <vector> // vector
10
11#include <nlohmann/detail/exceptions.hpp>
12#include <nlohmann/detail/input/input_adapters.hpp>
13#include <nlohmann/detail/input/json_sax.hpp>
14#include <nlohmann/detail/input/lexer.hpp>
15#include <nlohmann/detail/macro_scope.hpp>
16#include <nlohmann/detail/meta/is_sax.hpp>
17#include <nlohmann/detail/value_t.hpp>
18
19namespace nlohmann
20{
21namespace detail
22{
23////////////
24// parser //
25////////////
26
27/*!
28@brief syntax analysis
29
30This class implements a recursive decent parser.
31*/
32template<typename BasicJsonType>
33class parser
34{
35 using number_integer_t = typename BasicJsonType::number_integer_t;
36 using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
37 using number_float_t = typename BasicJsonType::number_float_t;
38 using string_t = typename BasicJsonType::string_t;
39 using lexer_t = lexer<BasicJsonType>;
40 using token_type = typename lexer_t::token_type;
41
42 public:
43 enum class parse_event_t : uint8_t
44 {
45 /// the parser read `{` and started to process a JSON object
46 object_start,
47 /// the parser read `}` and finished processing a JSON object
48 object_end,
49 /// the parser read `[` and started to process a JSON array
50 array_start,
51 /// the parser read `]` and finished processing a JSON array
52 array_end,
53 /// the parser read a key of a value in an object
54 key,
55 /// the parser finished reading a JSON value
56 value
57 };
58
59 using parser_callback_t =
60 std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
61
62 /// a parser reading from an input adapter
63 explicit parser(detail::input_adapter_t&& adapter,
64 const parser_callback_t cb = nullptr,
65 const bool allow_exceptions_ = true)
66 : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_)
67 {
68 // read first token
69 get_token();
70 }
71
72 /*!
73 @brief public parser interface
74
75 @param[in] strict whether to expect the last token to be EOF
76 @param[in,out] result parsed JSON value
77
78 @throw parse_error.101 in case of an unexpected token
79 @throw parse_error.102 if to_unicode fails or surrogate error
80 @throw parse_error.103 if to_unicode fails
81 */
82 void parse(const bool strict, BasicJsonType& result)
83 {
84 if (callback)
85 {
86 json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
87 sax_parse_internal(&sdp);
88 result.assert_invariant();
89
90 // in strict mode, input must be completely read
91 if (strict and (get_token() != token_type::end_of_input))
92 {
93 sdp.parse_error(m_lexer.get_position(),
94 m_lexer.get_token_string(),
95 parse_error::create(101, m_lexer.get_position(),
96 exception_message(token_type::end_of_input, "value")));
97 }
98
99 // in case of an error, return discarded value
100 if (sdp.is_errored())
101 {
102 result = value_t::discarded;
103 return;
104 }
105
106 // set top-level value to null if it was discarded by the callback
107 // function
108 if (result.is_discarded())
109 {
110 result = nullptr;
111 }
112 }
113 else
114 {
115 json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
116 sax_parse_internal(&sdp);
117 result.assert_invariant();
118
119 // in strict mode, input must be completely read
120 if (strict and (get_token() != token_type::end_of_input))
121 {
122 sdp.parse_error(m_lexer.get_position(),
123 m_lexer.get_token_string(),
124 parse_error::create(101, m_lexer.get_position(),
125 exception_message(token_type::end_of_input, "value")));
126 }
127
128 // in case of an error, return discarded value
129 if (sdp.is_errored())
130 {
131 result = value_t::discarded;
132 return;
133 }
134 }
135 }
136
137 /*!
138 @brief public accept interface
139
140 @param[in] strict whether to expect the last token to be EOF
141 @return whether the input is a proper JSON text
142 */
143 bool accept(const bool strict = true)
144 {
145 json_sax_acceptor<BasicJsonType> sax_acceptor;
146 return sax_parse(&sax_acceptor, strict);
147 }
148
149 template <typename SAX>
150 JSON_HEDLEY_NON_NULL(2)
151 bool sax_parse(SAX* sax, const bool strict = true)
152 {
153 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
154 const bool result = sax_parse_internal(sax);
155
156 // strict mode: next byte must be EOF
157 if (result and strict and (get_token() != token_type::end_of_input))
158 {
159 return sax->parse_error(m_lexer.get_position(),
160 m_lexer.get_token_string(),
161 parse_error::create(101, m_lexer.get_position(),
162 exception_message(token_type::end_of_input, "value")));
163 }
164
165 return result;
166 }
167
168 private:
169 template <typename SAX>
170 JSON_HEDLEY_NON_NULL(2)
171 bool sax_parse_internal(SAX* sax)
172 {
173 // stack to remember the hierarchy of structured values we are parsing
174 // true = array; false = object
175 std::vector<bool> states;
176 // value to avoid a goto (see comment where set to true)
177 bool skip_to_state_evaluation = false;
178
179 while (true)
180 {
181 if (not skip_to_state_evaluation)
182 {
183 // invariant: get_token() was called before each iteration
184 switch (last_token)
185 {
186 case token_type::begin_object:
187 {
188 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(std::size_t(-1))))
189 {
190 return false;
191 }
192
193 // closing } -> we are done
194 if (get_token() == token_type::end_object)
195 {
196 if (JSON_HEDLEY_UNLIKELY(not sax->end_object()))
197 {
198 return false;
199 }
200 break;
201 }
202
203 // parse key
204 if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
205 {
206 return sax->parse_error(m_lexer.get_position(),
207 m_lexer.get_token_string(),
208 parse_error::create(101, m_lexer.get_position(),
209 exception_message(token_type::value_string, "object key")));
210 }
211 if (JSON_HEDLEY_UNLIKELY(not sax->key(m_lexer.get_string())))
212 {
213 return false;
214 }
215
216 // parse separator (:)
217 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
218 {
219 return sax->parse_error(m_lexer.get_position(),
220 m_lexer.get_token_string(),
221 parse_error::create(101, m_lexer.get_position(),
222 exception_message(token_type::name_separator, "object separator")));
223 }
224
225 // remember we are now inside an object
226 states.push_back(false);
227
228 // parse values
229 get_token();
230 continue;
231 }
232
233 case token_type::begin_array:
234 {
235 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(std::size_t(-1))))
236 {
237 return false;
238 }
239
240 // closing ] -> we are done
241 if (get_token() == token_type::end_array)
242 {
243 if (JSON_HEDLEY_UNLIKELY(not sax->end_array()))
244 {
245 return false;
246 }
247 break;
248 }
249
250 // remember we are now inside an array
251 states.push_back(true);
252
253 // parse values (no need to call get_token)
254 continue;
255 }
256
257 case token_type::value_float:
258 {
259 const auto res = m_lexer.get_number_float();
260
261 if (JSON_HEDLEY_UNLIKELY(not std::isfinite(res)))
262 {
263 return sax->parse_error(m_lexer.get_position(),
264 m_lexer.get_token_string(),
265 out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'"));
266 }
267
268 if (JSON_HEDLEY_UNLIKELY(not sax->number_float(res, m_lexer.get_string())))
269 {
270 return false;
271 }
272
273 break;
274 }
275
276 case token_type::literal_false:
277 {
278 if (JSON_HEDLEY_UNLIKELY(not sax->boolean(false)))
279 {
280 return false;
281 }
282 break;
283 }
284
285 case token_type::literal_null:
286 {
287 if (JSON_HEDLEY_UNLIKELY(not sax->null()))
288 {
289 return false;
290 }
291 break;
292 }
293
294 case token_type::literal_true:
295 {
296 if (JSON_HEDLEY_UNLIKELY(not sax->boolean(true)))
297 {
298 return false;
299 }
300 break;
301 }
302
303 case token_type::value_integer:
304 {
305 if (JSON_HEDLEY_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer())))
306 {
307 return false;
308 }
309 break;
310 }
311
312 case token_type::value_string:
313 {
314 if (JSON_HEDLEY_UNLIKELY(not sax->string(m_lexer.get_string())))
315 {
316 return false;
317 }
318 break;
319 }
320
321 case token_type::value_unsigned:
322 {
323 if (JSON_HEDLEY_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned())))
324 {
325 return false;
326 }
327 break;
328 }
329
330 case token_type::parse_error:
331 {
332 // using "uninitialized" to avoid "expected" message
333 return sax->parse_error(m_lexer.get_position(),
334 m_lexer.get_token_string(),
335 parse_error::create(101, m_lexer.get_position(),
336 exception_message(token_type::uninitialized, "value")));
337 }
338
339 default: // the last token was unexpected
340 {
341 return sax->parse_error(m_lexer.get_position(),
342 m_lexer.get_token_string(),
343 parse_error::create(101, m_lexer.get_position(),
344 exception_message(token_type::literal_or_value, "value")));
345 }
346 }
347 }
348 else
349 {
350 skip_to_state_evaluation = false;
351 }
352
353 // we reached this line after we successfully parsed a value
354 if (states.empty())
355 {
356 // empty stack: we reached the end of the hierarchy: done
357 return true;
358 }
359
360 if (states.back()) // array
361 {
362 // comma -> next value
363 if (get_token() == token_type::value_separator)
364 {
365 // parse a new value
366 get_token();
367 continue;
368 }
369
370 // closing ]
371 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
372 {
373 if (JSON_HEDLEY_UNLIKELY(not sax->end_array()))
374 {
375 return false;
376 }
377
378 // We are done with this array. Before we can parse a
379 // new value, we need to evaluate the new state first.
380 // By setting skip_to_state_evaluation to false, we
381 // are effectively jumping to the beginning of this if.
382 assert(not states.empty());
383 states.pop_back();
384 skip_to_state_evaluation = true;
385 continue;
386 }
387
388 return sax->parse_error(m_lexer.get_position(),
389 m_lexer.get_token_string(),
390 parse_error::create(101, m_lexer.get_position(),
391 exception_message(token_type::end_array, "array")));
392 }
393 else // object
394 {
395 // comma -> next value
396 if (get_token() == token_type::value_separator)
397 {
398 // parse key
399 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
400 {
401 return sax->parse_error(m_lexer.get_position(),
402 m_lexer.get_token_string(),
403 parse_error::create(101, m_lexer.get_position(),
404 exception_message(token_type::value_string, "object key")));
405 }
406
407 if (JSON_HEDLEY_UNLIKELY(not sax->key(m_lexer.get_string())))
408 {
409 return false;
410 }
411
412 // parse separator (:)
413 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
414 {
415 return sax->parse_error(m_lexer.get_position(),
416 m_lexer.get_token_string(),
417 parse_error::create(101, m_lexer.get_position(),
418 exception_message(token_type::name_separator, "object separator")));
419 }
420
421 // parse values
422 get_token();
423 continue;
424 }
425
426 // closing }
427 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
428 {
429 if (JSON_HEDLEY_UNLIKELY(not sax->end_object()))
430 {
431 return false;
432 }
433
434 // We are done with this object. Before we can parse a
435 // new value, we need to evaluate the new state first.
436 // By setting skip_to_state_evaluation to false, we
437 // are effectively jumping to the beginning of this if.
438 assert(not states.empty());
439 states.pop_back();
440 skip_to_state_evaluation = true;
441 continue;
442 }
443
444 return sax->parse_error(m_lexer.get_position(),
445 m_lexer.get_token_string(),
446 parse_error::create(101, m_lexer.get_position(),
447 exception_message(token_type::end_object, "object")));
448 }
449 }
450 }
451
452 /// get next token from lexer
453 token_type get_token()
454 {
455 return last_token = m_lexer.scan();
456 }
457
458 std::string exception_message(const token_type expected, const std::string& context)
459 {
460 std::string error_msg = "syntax error ";
461
462 if (not context.empty())
463 {
464 error_msg += "while parsing " + context + " ";
465 }
466
467 error_msg += "- ";
468
469 if (last_token == token_type::parse_error)
470 {
471 error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
472 m_lexer.get_token_string() + "'";
473 }
474 else
475 {
476 error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
477 }
478
479 if (expected != token_type::uninitialized)
480 {
481 error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
482 }
483
484 return error_msg;
485 }
486
487 private:
488 /// callback function
489 const parser_callback_t callback = nullptr;
490 /// the type of the last read token
491 token_type last_token = token_type::uninitialized;
492 /// the lexer
493 lexer_t m_lexer;
494 /// whether to throw exceptions in case of errors
495 const bool allow_exceptions = true;
496};
497} // namespace detail
498} // namespace nlohmann
499