1namespace simdjson {
2namespace SIMDJSON_IMPLEMENTATION {
3namespace ondemand {
4
5simdjson_inline value_iterator::value_iterator(
6 json_iterator *json_iter,
7 depth_t depth,
8 token_position start_position
9) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position}
10{
11}
12
13simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_object() noexcept {
14 SIMDJSON_TRY( start_container('{', "Not an object", "object") );
15 return started_object();
16}
17
18simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_root_object() noexcept {
19 SIMDJSON_TRY( start_container('{', "Not an object", "object") );
20 return started_root_object();
21}
22
23simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_object() noexcept {
24 assert_at_container_start();
25#if SIMDJSON_DEVELOPMENT_CHECKS
26 _json_iter->set_start_position(_depth, start_position());
27#endif
28 if (*_json_iter->peek() == '}') {
29 logger::log_value(iter: *_json_iter, type: "empty object");
30 _json_iter->return_current_and_advance();
31 end_container();
32 return false;
33 }
34 return true;
35}
36
37simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_root_object() noexcept {
38 // When in streaming mode, we cannot expect peek_last() to be the last structural element of the
39 // current document. It only works in the normal mode where we have indexed a single document.
40 // Note that adding a check for 'streaming' is not expensive since we only have at most
41 // one root element.
42 if ( ! _json_iter->streaming() ) {
43 if (*_json_iter->peek_last() != '}') {
44 _json_iter->abandon();
45 return report_error(error: INCOMPLETE_ARRAY_OR_OBJECT, message: "missing } at end");
46 }
47 // If the last character is } *and* the first gibberish character is also '}'
48 // then on-demand could accidentally go over. So we need additional checks.
49 // https://github.com/simdjson/simdjson/issues/1834
50 // Checking that the document is balanced requires a full scan which is potentially
51 // expensive, but it only happens in edge cases where the first padding character is
52 // a closing bracket.
53 if ((*_json_iter->peek(position: _json_iter->end_position()) == '}') && (!_json_iter->balanced())) {
54 _json_iter->abandon();
55 // The exact error would require more work. It will typically be an unclosed object.
56 return report_error(error: INCOMPLETE_ARRAY_OR_OBJECT, message: "the document is unbalanced");
57 }
58 }
59 return started_object();
60}
61
62simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept {
63#if SIMDJSON_CHECK_EOF
64 if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); }
65 // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); }
66#endif // SIMDJSON_CHECK_EOF
67 _json_iter->ascend_to(parent_depth: depth()-1);
68 return SUCCESS;
69}
70
71simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::has_next_field() noexcept {
72 assert_at_next();
73
74 // It's illegal to call this unless there are more tokens: anything that ends in } or ] is
75 // obligated to verify there are more tokens if they are not the top level.
76 switch (*_json_iter->return_current_and_advance()) {
77 case '}':
78 logger::log_end_value(iter: *_json_iter, type: "object");
79 SIMDJSON_TRY( end_container() );
80 return false;
81 case ',':
82 return true;
83 default:
84 return report_error(error: TAPE_ERROR, message: "Missing comma between object fields");
85 }
86}
87
88simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::find_field_raw(const std::string_view key) noexcept {
89 error_code error;
90 bool has_value;
91 //
92 // Initially, the object can be in one of a few different places:
93 //
94 // 1. The start of the object, at the first field:
95 //
96 // ```
97 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
98 // ^ (depth 2, index 1)
99 // ```
100 if (at_first_field()) {
101 has_value = true;
102
103 //
104 // 2. When a previous search did not yield a value or the object is empty:
105 //
106 // ```
107 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
108 // ^ (depth 0)
109 // { }
110 // ^ (depth 0, index 2)
111 // ```
112 //
113 } else if (!is_open()) {
114#if SIMDJSON_DEVELOPMENT_CHECKS
115 // If we're past the end of the object, we're being iterated out of order.
116 // Note: this isn't perfect detection. It's possible the user is inside some other object; if so,
117 // this object iterator will blithely scan that object for fields.
118 if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; }
119#endif
120 return false;
121
122 // 3. When a previous search found a field or an iterator yielded a value:
123 //
124 // ```
125 // // When a field was not fully consumed (or not even touched at all)
126 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
127 // ^ (depth 2)
128 // // When a field was fully consumed
129 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
130 // ^ (depth 1)
131 // // When the last field was fully consumed
132 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
133 // ^ (depth 1)
134 // ```
135 //
136 } else {
137 if ((error = skip_child() )) { abandon(); return error; }
138 if ((error = has_next_field().get(value&: has_value) )) { abandon(); return error; }
139#if SIMDJSON_DEVELOPMENT_CHECKS
140 if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; }
141#endif
142 }
143 while (has_value) {
144 // Get the key and colon, stopping at the value.
145 raw_json_string actual_key;
146 // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes
147 // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2.
148 // field_key() advances the pointer and checks that '"' is found (corresponding to a key).
149 // The depth is left unchanged by field_key().
150 if ((error = field_key().get(value&: actual_key) )) { abandon(); return error; };
151 // field_value() will advance and check that we find a ':' separating the
152 // key and the value. It will also increment the depth by one.
153 if ((error = field_value() )) { abandon(); return error; }
154 // If it matches, stop and return
155 // We could do it this way if we wanted to allow arbitrary
156 // key content (including escaped quotes).
157 //if (actual_key.unsafe_is_equal(max_key_length, key)) {
158 // Instead we do the following which may trigger buffer overruns if the
159 // user provides an adversarial key (containing a well placed unescaped quote
160 // character and being longer than the number of bytes remaining in the JSON
161 // input).
162 if (actual_key.unsafe_is_equal(target: key)) {
163 logger::log_event(iter: *this, type: "match", detail: key, delta: -2);
164 // If we return here, then we return while pointing at the ':' that we just checked.
165 return true;
166 }
167
168 // No match: skip the value and see if , or } is next
169 logger::log_event(iter: *this, type: "no match", detail: key, delta: -2);
170 // The call to skip_child is meant to skip over the value corresponding to the key.
171 // After skip_child(), we are right before the next comma (',') or the final brace ('}').
172 SIMDJSON_TRY( skip_child() ); // Skip the value entirely
173 // The has_next_field() advances the pointer and check that either ',' or '}' is found.
174 // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found,
175 // then we are in error and we abort.
176 if ((error = has_next_field().get(value&: has_value) )) { abandon(); return error; }
177 }
178
179 // If the loop ended, we're out of fields to look at.
180 return false;
181}
182
183SIMDJSON_PUSH_DISABLE_WARNINGS
184SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
185simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::find_field_unordered_raw(const std::string_view key) noexcept {
186 /**
187 * When find_field_unordered_raw is called, we can either be pointing at the
188 * first key, pointing outside (at the closing brace) or if a key was matched
189 * we can be either pointing right afterthe ':' right before the value (that we need skip),
190 * or we may have consumed the value and we might be at a comma or at the
191 * final brace (ready for a call to has_next_field()).
192 */
193 error_code error;
194 bool has_value;
195
196 // First, we scan from that point to the end.
197 // If we don't find a match, we may loop back around, and scan from the beginning to that point.
198 token_position search_start = _json_iter->position();
199
200 // We want to know whether we need to go back to the beginning.
201 bool at_first = at_first_field();
202 ///////////////
203 // Initially, the object can be in one of a few different places:
204 //
205 // 1. At the first key:
206 //
207 // ```
208 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
209 // ^ (depth 2, index 1)
210 // ```
211 //
212 if (at_first) {
213 has_value = true;
214
215 // 2. When a previous search did not yield a value or the object is empty:
216 //
217 // ```
218 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
219 // ^ (depth 0)
220 // { }
221 // ^ (depth 0, index 2)
222 // ```
223 //
224 } else if (!is_open()) {
225
226#if SIMDJSON_DEVELOPMENT_CHECKS
227 // If we're past the end of the object, we're being iterated out of order.
228 // Note: this isn't perfect detection. It's possible the user is inside some other object; if so,
229 // this object iterator will blithely scan that object for fields.
230 if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; }
231#endif
232 SIMDJSON_TRY(reset_object().get(has_value));
233 at_first = true;
234 // 3. When a previous search found a field or an iterator yielded a value:
235 //
236 // ```
237 // // When a field was not fully consumed (or not even touched at all)
238 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
239 // ^ (depth 2)
240 // // When a field was fully consumed
241 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
242 // ^ (depth 1)
243 // // When the last field was fully consumed
244 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
245 // ^ (depth 1)
246 // ```
247 //
248 } else {
249 // If someone queried a key but they not did access the value, then we are left pointing
250 // at the ':' and we need to move forward through the value... If the value was
251 // processed then skip_child() does not move the iterator (but may adjust the depth).
252 if ((error = skip_child() )) { abandon(); return error; }
253 search_start = _json_iter->position();
254 if ((error = has_next_field().get(value&: has_value) )) { abandon(); return error; }
255#if SIMDJSON_DEVELOPMENT_CHECKS
256 if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; }
257#endif
258 }
259
260 // After initial processing, we will be in one of two states:
261 //
262 // ```
263 // // At the beginning of a field
264 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
265 // ^ (depth 1)
266 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
267 // ^ (depth 1)
268 // // At the end of the object
269 // { "a": [ 1, 2 ], "b": [ 3, 4 ] }
270 // ^ (depth 0)
271 // ```
272 //
273 // Next, we find a match starting from the current position.
274 while (has_value) {
275 SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field
276
277 // Get the key and colon, stopping at the value.
278 raw_json_string actual_key;
279 // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes
280 // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2.
281 // field_key() advances the pointer and checks that '"' is found (corresponding to a key).
282 // The depth is left unchanged by field_key().
283 if ((error = field_key().get(value&: actual_key) )) { abandon(); return error; };
284 // field_value() will advance and check that we find a ':' separating the
285 // key and the value. It will also increment the depth by one.
286 if ((error = field_value() )) { abandon(); return error; }
287
288 // If it matches, stop and return
289 // We could do it this way if we wanted to allow arbitrary
290 // key content (including escaped quotes).
291 // if (actual_key.unsafe_is_equal(max_key_length, key)) {
292 // Instead we do the following which may trigger buffer overruns if the
293 // user provides an adversarial key (containing a well placed unescaped quote
294 // character and being longer than the number of bytes remaining in the JSON
295 // input).
296 if (actual_key.unsafe_is_equal(target: key)) {
297 logger::log_event(iter: *this, type: "match", detail: key, delta: -2);
298 // If we return here, then we return while pointing at the ':' that we just checked.
299 return true;
300 }
301
302 // No match: skip the value and see if , or } is next
303 logger::log_event(iter: *this, type: "no match", detail: key, delta: -2);
304 // The call to skip_child is meant to skip over the value corresponding to the key.
305 // After skip_child(), we are right before the next comma (',') or the final brace ('}').
306 SIMDJSON_TRY( skip_child() );
307 // The has_next_field() advances the pointer and check that either ',' or '}' is found.
308 // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found,
309 // then we are in error and we abort.
310 if ((error = has_next_field().get(value&: has_value) )) { abandon(); return error; }
311 }
312 // Performance note: it maybe wasteful to rewind to the beginning when there might be
313 // no other query following. Indeed, it would require reskipping the whole object.
314 // Instead, you can just stay where you are. If there is a new query, there is always time
315 // to rewind.
316 if(at_first) { return false; }
317
318 // If we reach the end without finding a match, search the rest of the fields starting at the
319 // beginning of the object.
320 // (We have already run through the object before, so we've already validated its structure. We
321 // don't check errors in this bit.)
322 SIMDJSON_TRY(reset_object().get(has_value));
323 while (true) {
324 SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object
325 SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field
326
327 // Get the key and colon, stopping at the value.
328 raw_json_string actual_key;
329 // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes
330 // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2.
331 // field_key() advances the pointer and checks that '"' is found (corresponding to a key).
332 // The depth is left unchanged by field_key().
333 error = field_key().get(value&: actual_key); SIMDJSON_ASSUME(!error);
334 // field_value() will advance and check that we find a ':' separating the
335 // key and the value. It will also increment the depth by one.
336 error = field_value(); SIMDJSON_ASSUME(!error);
337
338 // If it matches, stop and return
339 // We could do it this way if we wanted to allow arbitrary
340 // key content (including escaped quotes).
341 // if (actual_key.unsafe_is_equal(max_key_length, key)) {
342 // Instead we do the following which may trigger buffer overruns if the
343 // user provides an adversarial key (containing a well placed unescaped quote
344 // character and being longer than the number of bytes remaining in the JSON
345 // input).
346 if (actual_key.unsafe_is_equal(target: key)) {
347 logger::log_event(iter: *this, type: "match", detail: key, delta: -2);
348 // If we return here, then we return while pointing at the ':' that we just checked.
349 return true;
350 }
351
352 // No match: skip the value and see if , or } is next
353 logger::log_event(iter: *this, type: "no match", detail: key, delta: -2);
354 // The call to skip_child is meant to skip over the value corresponding to the key.
355 // After skip_child(), we are right before the next comma (',') or the final brace ('}').
356 SIMDJSON_TRY( skip_child() );
357 // If we reached the end of the key-value pair we started from, then we know
358 // that the key is not there so we return false. We are either right before
359 // the next comma or the final brace.
360 if(_json_iter->position() == search_start) { return false; }
361 // The has_next_field() advances the pointer and check that either ',' or '}' is found.
362 // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found,
363 // then we are in error and we abort.
364 error = has_next_field().get(value&: has_value); SIMDJSON_ASSUME(!error);
365 // If we make the mistake of exiting here, then we could be left pointing at a key
366 // in the middle of an object. That's not an allowable state.
367 }
368 // If the loop ended, we're out of fields to look at. The program should
369 // never reach this point.
370 return false;
371}
372SIMDJSON_POP_DISABLE_WARNINGS
373
374simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> value_iterator::field_key() noexcept {
375 assert_at_next();
376
377 const uint8_t *key = _json_iter->return_current_and_advance();
378 if (*(key++) != '"') { return report_error(error: TAPE_ERROR, message: "Object key is not a string"); }
379 return raw_json_string(key);
380}
381
382simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept {
383 assert_at_next();
384
385 if (*_json_iter->return_current_and_advance() != ':') { return report_error(error: TAPE_ERROR, message: "Missing colon in object field"); }
386 _json_iter->descend_to(child_depth: depth()+1);
387 return SUCCESS;
388}
389
390simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_array() noexcept {
391 SIMDJSON_TRY( start_container('[', "Not an array", "array") );
392 return started_array();
393}
394
395simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::start_root_array() noexcept {
396 SIMDJSON_TRY( start_container('[', "Not an array", "array") );
397 return started_root_array();
398}
399
400inline std::string value_iterator::to_string() const noexcept {
401 auto answer = std::string("value_iterator [ depth : ") + std::to_string(val: _depth) + std::string(", ");
402 if(_json_iter != nullptr) { answer += _json_iter->to_string(); }
403 answer += std::string(" ]");
404 return answer;
405}
406
407simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_array() noexcept {
408 assert_at_container_start();
409 if (*_json_iter->peek() == ']') {
410 logger::log_value(iter: *_json_iter, type: "empty array");
411 _json_iter->return_current_and_advance();
412 SIMDJSON_TRY( end_container() );
413 return false;
414 }
415 _json_iter->descend_to(child_depth: depth()+1);
416#if SIMDJSON_DEVELOPMENT_CHECKS
417 _json_iter->set_start_position(_depth, start_position());
418#endif
419 return true;
420}
421
422simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::started_root_array() noexcept {
423 // When in streaming mode, we cannot expect peek_last() to be the last structural element of the
424 // current document. It only works in the normal mode where we have indexed a single document.
425 // Note that adding a check for 'streaming' is not expensive since we only have at most
426 // one root element.
427 if ( ! _json_iter->streaming() ) {
428 if (*_json_iter->peek_last() != ']') {
429 _json_iter->abandon();
430 return report_error(error: INCOMPLETE_ARRAY_OR_OBJECT, message: "missing ] at end");
431 }
432 // If the last character is ] *and* the first gibberish character is also ']'
433 // then on-demand could accidentally go over. So we need additional checks.
434 // https://github.com/simdjson/simdjson/issues/1834
435 // Checking that the document is balanced requires a full scan which is potentially
436 // expensive, but it only happens in edge cases where the first padding character is
437 // a closing bracket.
438 if ((*_json_iter->peek(position: _json_iter->end_position()) == ']') && (!_json_iter->balanced())) {
439 _json_iter->abandon();
440 // The exact error would require more work. It will typically be an unclosed array.
441 return report_error(error: INCOMPLETE_ARRAY_OR_OBJECT, message: "the document is unbalanced");
442 }
443 }
444 return started_array();
445}
446
447simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::has_next_element() noexcept {
448 assert_at_next();
449
450 logger::log_event(iter: *this, type: "has_next_element");
451 switch (*_json_iter->return_current_and_advance()) {
452 case ']':
453 logger::log_end_value(iter: *_json_iter, type: "array");
454 SIMDJSON_TRY( end_container() );
455 return false;
456 case ',':
457 _json_iter->descend_to(child_depth: depth()+1);
458 return true;
459 default:
460 return report_error(error: TAPE_ERROR, message: "Missing comma between array elements");
461 }
462}
463
464simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::parse_bool(const uint8_t *json) const noexcept {
465 auto not_true = atomparsing::str4ncmp(src: json, atom: "true");
466 auto not_false = atomparsing::str4ncmp(src: json, atom: "fals") | (json[4] ^ 'e');
467 bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(c: json[not_true ? 5 : 4]);
468 if (error) { return incorrect_type_error(message: "Not a boolean"); }
469 return simdjson_result<bool>(!not_true);
470}
471simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::parse_null(const uint8_t *json) const noexcept {
472 bool is_null_string = !atomparsing::str4ncmp(src: json, atom: "null") && jsoncharutils::is_structural_or_whitespace(c: json[4]);
473 // if we start with 'n', we must be a null
474 if(!is_null_string && json[0]=='n') { return incorrect_type_error(message: "Not a null but starts with n"); }
475 return is_null_string;
476}
477
478simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> value_iterator::get_string(bool allow_replacement) noexcept {
479 return get_raw_json_string().unescape(iter&: json_iter(), allow_replacement);
480}
481simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> value_iterator::get_wobbly_string() noexcept {
482 return get_raw_json_string().unescape_wobbly(iter&: json_iter());
483}
484simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> value_iterator::get_raw_json_string() noexcept {
485 auto json = peek_scalar(type: "string");
486 if (*json != '"') { return incorrect_type_error(message: "Not a string"); }
487 advance_scalar(type: "string");
488 return raw_json_string(json+1);
489}
490simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_uint64() noexcept {
491 auto result = numberparsing::parse_unsigned(src: peek_non_root_scalar(type: "uint64"));
492 if(result.error() == SUCCESS) { advance_non_root_scalar(type: "uint64"); }
493 return result;
494}
495simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_uint64_in_string() noexcept {
496 auto result = numberparsing::parse_unsigned_in_string(src: peek_non_root_scalar(type: "uint64"));
497 if(result.error() == SUCCESS) { advance_non_root_scalar(type: "uint64"); }
498 return result;
499}
500simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_int64() noexcept {
501 auto result = numberparsing::parse_integer(src: peek_non_root_scalar(type: "int64"));
502 if(result.error() == SUCCESS) { advance_non_root_scalar(type: "int64"); }
503 return result;
504}
505simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_int64_in_string() noexcept {
506 auto result = numberparsing::parse_integer_in_string(src: peek_non_root_scalar(type: "int64"));
507 if(result.error() == SUCCESS) { advance_non_root_scalar(type: "int64"); }
508 return result;
509}
510simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_double() noexcept {
511 auto result = numberparsing::parse_double(src: peek_non_root_scalar(type: "double"));
512 if(result.error() == SUCCESS) { advance_non_root_scalar(type: "double"); }
513 return result;
514}
515simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_double_in_string() noexcept {
516 auto result = numberparsing::parse_double_in_string(src: peek_non_root_scalar(type: "double"));
517 if(result.error() == SUCCESS) { advance_non_root_scalar(type: "double"); }
518 return result;
519}
520simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::get_bool() noexcept {
521 auto result = parse_bool(json: peek_non_root_scalar(type: "bool"));
522 if(result.error() == SUCCESS) { advance_non_root_scalar(type: "bool"); }
523 return result;
524}
525simdjson_inline simdjson_result<bool> value_iterator::is_null() noexcept {
526 bool is_null_value;
527 SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value));
528 if(is_null_value) { advance_non_root_scalar(type: "null"); }
529 return is_null_value;
530}
531simdjson_inline bool value_iterator::is_negative() noexcept {
532 return numberparsing::is_negative(src: peek_non_root_scalar(type: "numbersign"));
533}
534simdjson_inline bool value_iterator::is_root_negative() noexcept {
535 return numberparsing::is_negative(src: peek_root_scalar(type: "numbersign"));
536}
537simdjson_inline simdjson_result<bool> value_iterator::is_integer() noexcept {
538 return numberparsing::is_integer(src: peek_non_root_scalar(type: "integer"));
539}
540simdjson_inline simdjson_result<number_type> value_iterator::get_number_type() noexcept {
541 return numberparsing::get_number_type(src: peek_non_root_scalar(type: "integer"));
542}
543simdjson_inline simdjson_result<number> value_iterator::get_number() noexcept {
544 number num;
545 error_code error = numberparsing::parse_number(src: peek_non_root_scalar(type: "number"), writer&: num);
546 if(error) { return error; }
547 return num;
548}
549
550simdjson_inline simdjson_result<bool> value_iterator::is_root_integer(bool check_trailing) noexcept {
551 auto max_len = peek_start_length();
552 auto json = peek_root_scalar(type: "is_root_integer");
553 uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
554 if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
555 return false; // if there are more than 20 characters, it cannot be represented as an integer.
556 }
557 auto answer = numberparsing::is_integer(src: tmpbuf);
558 // If the parsing was a success, we must still check that it is
559 // a single scalar. Note that we parse first because of cases like '[]' where
560 // getting TRAILING_CONTENT is wrong.
561 if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; }
562 return answer;
563}
564
565simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::number_type> value_iterator::get_root_number_type(bool check_trailing) noexcept {
566 auto max_len = peek_start_length();
567 auto json = peek_root_scalar(type: "number");
568 // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
569 // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
570 // number: -0.<fraction>e-308.
571 uint8_t tmpbuf[1074+8+1];
572 if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
573 logger::log_error(iter: *_json_iter, index: start_position(), depth: depth(), error: "Root number more than 1082 characters");
574 return NUMBER_ERROR;
575 }
576 auto answer = numberparsing::get_number_type(src: tmpbuf);
577 if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; }
578 return answer;
579}
580simdjson_inline simdjson_result<number> value_iterator::get_root_number(bool check_trailing) noexcept {
581 auto max_len = peek_start_length();
582 auto json = peek_root_scalar(type: "number");
583 // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
584 // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
585 // number: -0.<fraction>e-308.
586 uint8_t tmpbuf[1074+8+1];
587 if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
588 logger::log_error(iter: *_json_iter, index: start_position(), depth: depth(), error: "Root number more than 1082 characters");
589 return NUMBER_ERROR;
590 }
591 number num;
592 error_code error = numberparsing::parse_number(src: tmpbuf, writer&: num);
593 if(error) { return error; }
594 if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; }
595 advance_root_scalar(type: "number");
596 return num;
597}
598simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept {
599 return get_root_raw_json_string(check_trailing).unescape(iter&: json_iter(), allow_replacement);
600}
601simdjson_warn_unused simdjson_inline simdjson_result<std::string_view> value_iterator::get_root_wobbly_string(bool check_trailing) noexcept {
602 return get_root_raw_json_string(check_trailing).unescape_wobbly(iter&: json_iter());
603}
604simdjson_warn_unused simdjson_inline simdjson_result<raw_json_string> value_iterator::get_root_raw_json_string(bool check_trailing) noexcept {
605 auto json = peek_scalar(type: "string");
606 if (*json != '"') { return incorrect_type_error(message: "Not a string"); }
607 if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; }
608 advance_scalar(type: "string");
609 return raw_json_string(json+1);
610}
611simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_root_uint64(bool check_trailing) noexcept {
612 auto max_len = peek_start_length();
613 auto json = peek_root_scalar(type: "uint64");
614 uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
615 if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
616 logger::log_error(iter: *_json_iter, index: start_position(), depth: depth(), error: "Root number more than 20 characters");
617 return NUMBER_ERROR;
618 }
619 auto result = numberparsing::parse_unsigned(src: tmpbuf);
620 if(result.error() == SUCCESS) {
621 if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; }
622 advance_root_scalar(type: "uint64");
623 }
624 return result;
625}
626simdjson_warn_unused simdjson_inline simdjson_result<uint64_t> value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept {
627 auto max_len = peek_start_length();
628 auto json = peek_root_scalar(type: "uint64");
629 uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer
630 if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
631 logger::log_error(iter: *_json_iter, index: start_position(), depth: depth(), error: "Root number more than 20 characters");
632 return NUMBER_ERROR;
633 }
634 auto result = numberparsing::parse_unsigned_in_string(src: tmpbuf);
635 if(result.error() == SUCCESS) {
636 if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; }
637 advance_root_scalar(type: "uint64");
638 }
639 return result;
640}
641simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_root_int64(bool check_trailing) noexcept {
642 auto max_len = peek_start_length();
643 auto json = peek_root_scalar(type: "int64");
644 uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
645 if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
646 logger::log_error(iter: *_json_iter, index: start_position(), depth: depth(), error: "Root number more than 20 characters");
647 return NUMBER_ERROR;
648 }
649
650 auto result = numberparsing::parse_integer(src: tmpbuf);
651 if(result.error() == SUCCESS) {
652 if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; }
653 advance_root_scalar(type: "int64");
654 }
655 return result;
656}
657simdjson_warn_unused simdjson_inline simdjson_result<int64_t> value_iterator::get_root_int64_in_string(bool check_trailing) noexcept {
658 auto max_len = peek_start_length();
659 auto json = peek_root_scalar(type: "int64");
660 uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer
661 if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
662 logger::log_error(iter: *_json_iter, index: start_position(), depth: depth(), error: "Root number more than 20 characters");
663 return NUMBER_ERROR;
664 }
665
666 auto result = numberparsing::parse_integer_in_string(src: tmpbuf);
667 if(result.error() == SUCCESS) {
668 if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; }
669 advance_root_scalar(type: "int64");
670 }
671 return result;
672}
673simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_root_double(bool check_trailing) noexcept {
674 auto max_len = peek_start_length();
675 auto json = peek_root_scalar(type: "double");
676 // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
677 // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
678 // number: -0.<fraction>e-308.
679 uint8_t tmpbuf[1074+8+1];
680 if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
681 logger::log_error(iter: *_json_iter, index: start_position(), depth: depth(), error: "Root number more than 1082 characters");
682 return NUMBER_ERROR;
683 }
684 auto result = numberparsing::parse_double(src: tmpbuf);
685 if(result.error() == SUCCESS) {
686 if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; }
687 advance_root_scalar(type: "double");
688 }
689 return result;
690}
691
692simdjson_warn_unused simdjson_inline simdjson_result<double> value_iterator::get_root_double_in_string(bool check_trailing) noexcept {
693 auto max_len = peek_start_length();
694 auto json = peek_root_scalar(type: "double");
695 // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/,
696 // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest
697 // number: -0.<fraction>e-308.
698 uint8_t tmpbuf[1074+8+1];
699 if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) {
700 logger::log_error(iter: *_json_iter, index: start_position(), depth: depth(), error: "Root number more than 1082 characters");
701 return NUMBER_ERROR;
702 }
703 auto result = numberparsing::parse_double_in_string(src: tmpbuf);
704 if(result.error() == SUCCESS) {
705 if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; }
706 advance_root_scalar(type: "double");
707 }
708 return result;
709}
710simdjson_warn_unused simdjson_inline simdjson_result<bool> value_iterator::get_root_bool(bool check_trailing) noexcept {
711 auto max_len = peek_start_length();
712 auto json = peek_root_scalar(type: "bool");
713 uint8_t tmpbuf[5+1];
714 if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error(message: "Not a boolean"); }
715 auto result = parse_bool(json: tmpbuf);
716 if(result.error() == SUCCESS) {
717 if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; }
718 advance_root_scalar(type: "bool");
719 }
720 return result;
721}
722simdjson_inline simdjson_result<bool> value_iterator::is_root_null(bool check_trailing) noexcept {
723 auto max_len = peek_start_length();
724 auto json = peek_root_scalar(type: "null");
725 bool result = (max_len >= 4 && !atomparsing::str4ncmp(src: json, atom: "null") &&
726 (max_len == 4 || jsoncharutils::is_structural_or_whitespace(c: json[4])));
727 if(result) { // we have something that looks like a null.
728 if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; }
729 advance_root_scalar(type: "null");
730 }
731 return result;
732}
733
734simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept {
735 SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
736 SIMDJSON_ASSUME( _json_iter->_depth >= _depth );
737
738 return _json_iter->skip_child(parent_depth: depth());
739}
740
741simdjson_inline value_iterator value_iterator::child() const noexcept {
742 assert_at_child();
743 return { _json_iter, depth()+1, _json_iter->token.position() };
744}
745
746// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller
747// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is
748// marked non-inline.
749SIMDJSON_PUSH_DISABLE_WARNINGS
750SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
751simdjson_inline bool value_iterator::is_open() const noexcept {
752 return _json_iter->depth() >= depth();
753}
754SIMDJSON_POP_DISABLE_WARNINGS
755
756simdjson_inline bool value_iterator::at_end() const noexcept {
757 return _json_iter->at_end();
758}
759
760simdjson_inline bool value_iterator::at_start() const noexcept {
761 return _json_iter->token.position() == start_position();
762}
763
764simdjson_inline bool value_iterator::at_first_field() const noexcept {
765 SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
766 return _json_iter->token.position() == start_position() + 1;
767}
768
769simdjson_inline void value_iterator::abandon() noexcept {
770 _json_iter->abandon();
771}
772
773simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept {
774 return _depth;
775}
776simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept {
777 return _json_iter->error;
778}
779simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept {
780 return _json_iter->string_buf_loc();
781}
782simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept {
783 return *_json_iter;
784}
785simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept {
786 return *_json_iter;
787}
788
789simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept {
790 return _json_iter->peek(position: start_position());
791}
792simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept {
793 return _json_iter->peek_length(position: start_position());
794}
795
796simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept {
797 logger::log_value(iter: *_json_iter, index: start_position(), depth: depth(), type);
798 // If we're not at the position anymore, we don't want to advance the cursor.
799 if (!is_at_start()) { return peek_start(); }
800
801 // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value.
802 assert_at_start();
803 return _json_iter->peek();
804}
805
806simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept {
807 logger::log_value(iter: *_json_iter, index: start_position(), depth: depth(), type);
808 // If we're not at the position anymore, we don't want to advance the cursor.
809 if (!is_at_start()) { return; }
810
811 // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value.
812 assert_at_start();
813 _json_iter->return_current_and_advance();
814 _json_iter->ascend_to(parent_depth: depth()-1);
815}
816
817simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept {
818 logger::log_start_value(iter: *_json_iter, index: start_position(), depth: depth(), type);
819 // If we're not at the position anymore, we don't want to advance the cursor.
820 const uint8_t *json;
821 if (!is_at_start()) {
822#if SIMDJSON_DEVELOPMENT_CHECKS
823 if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; }
824#endif
825 json = peek_start();
826 if (*json != start_char) { return incorrect_type_error(message: incorrect_type_message); }
827 } else {
828 assert_at_start();
829 /**
830 * We should be prudent. Let us peek. If it is not the right type, we
831 * return an error. Only once we have determined that we have the right
832 * type are we allowed to advance!
833 */
834 json = _json_iter->peek();
835 if (*json != start_char) { return incorrect_type_error(message: incorrect_type_message); }
836 _json_iter->return_current_and_advance();
837 }
838
839
840 return SUCCESS;
841}
842
843
844simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept {
845 logger::log_value(iter: *_json_iter, index: start_position(), depth: depth(), type);
846 if (!is_at_start()) { return peek_start(); }
847
848 assert_at_root();
849 return _json_iter->peek();
850}
851simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept {
852 logger::log_value(iter: *_json_iter, index: start_position(), depth: depth(), type);
853 if (!is_at_start()) { return peek_start(); }
854
855 assert_at_non_root_start();
856 return _json_iter->peek();
857}
858
859simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept {
860 logger::log_value(iter: *_json_iter, index: start_position(), depth: depth(), type);
861 if (!is_at_start()) { return; }
862
863 assert_at_root();
864 _json_iter->return_current_and_advance();
865 _json_iter->ascend_to(parent_depth: depth()-1);
866}
867simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept {
868 logger::log_value(iter: *_json_iter, index: start_position(), depth: depth(), type);
869 if (!is_at_start()) { return; }
870
871 assert_at_non_root_start();
872 _json_iter->return_current_and_advance();
873 _json_iter->ascend_to(parent_depth: depth()-1);
874}
875
876simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept {
877 logger::log_error(iter: *_json_iter, index: start_position(), depth: depth(), error: message);
878 return INCORRECT_TYPE;
879}
880
881simdjson_inline bool value_iterator::is_at_start() const noexcept {
882 return position() == start_position();
883}
884
885simdjson_inline bool value_iterator::is_at_key() const noexcept {
886 // Keys are at the same depth as the object.
887 // Note here that we could be safer and check that we are within an object,
888 // but we do not.
889 return _depth == _json_iter->_depth && *_json_iter->peek() == '"';
890}
891
892simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept {
893 // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]).
894 auto delta = position() - start_position();
895 return delta == 1 || delta == 2;
896}
897
898inline void value_iterator::assert_at_start() const noexcept {
899 SIMDJSON_ASSUME( _json_iter->token._position == _start_position );
900 SIMDJSON_ASSUME( _json_iter->_depth == _depth );
901 SIMDJSON_ASSUME( _depth > 0 );
902}
903
904inline void value_iterator::assert_at_container_start() const noexcept {
905 SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 );
906 SIMDJSON_ASSUME( _json_iter->_depth == _depth );
907 SIMDJSON_ASSUME( _depth > 0 );
908}
909
910inline void value_iterator::assert_at_next() const noexcept {
911 SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
912 SIMDJSON_ASSUME( _json_iter->_depth == _depth );
913 SIMDJSON_ASSUME( _depth > 0 );
914}
915
916simdjson_inline void value_iterator::move_at_start() noexcept {
917 _json_iter->_depth = _depth;
918 _json_iter->token.set_position(_start_position);
919}
920
921simdjson_inline void value_iterator::move_at_container_start() noexcept {
922 _json_iter->_depth = _depth;
923 _json_iter->token.set_position(_start_position + 1);
924}
925
926simdjson_inline simdjson_result<bool> value_iterator::reset_array() noexcept {
927 move_at_container_start();
928 return started_array();
929}
930
931simdjson_inline simdjson_result<bool> value_iterator::reset_object() noexcept {
932 move_at_container_start();
933 return started_object();
934}
935
936inline void value_iterator::assert_at_child() const noexcept {
937 SIMDJSON_ASSUME( _json_iter->token._position > _start_position );
938 SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 );
939 SIMDJSON_ASSUME( _depth > 0 );
940}
941
942inline void value_iterator::assert_at_root() const noexcept {
943 assert_at_start();
944 SIMDJSON_ASSUME( _depth == 1 );
945}
946
947inline void value_iterator::assert_at_non_root_start() const noexcept {
948 assert_at_start();
949 SIMDJSON_ASSUME( _depth > 1 );
950}
951
952inline void value_iterator::assert_is_valid() const noexcept {
953 SIMDJSON_ASSUME( _json_iter != nullptr );
954}
955
956simdjson_inline bool value_iterator::is_valid() const noexcept {
957 return _json_iter != nullptr;
958}
959
960simdjson_inline simdjson_result<json_type> value_iterator::type() const noexcept {
961 switch (*peek_start()) {
962 case '{':
963 return json_type::object;
964 case '[':
965 return json_type::array;
966 case '"':
967 return json_type::string;
968 case 'n':
969 return json_type::null;
970 case 't': case 'f':
971 return json_type::boolean;
972 case '-':
973 case '0': case '1': case '2': case '3': case '4':
974 case '5': case '6': case '7': case '8': case '9':
975 return json_type::number;
976 default:
977 return TAPE_ERROR;
978 }
979}
980
981simdjson_inline token_position value_iterator::start_position() const noexcept {
982 return _start_position;
983}
984
985simdjson_inline token_position value_iterator::position() const noexcept {
986 return _json_iter->position();
987}
988
989simdjson_inline token_position value_iterator::end_position() const noexcept {
990 return _json_iter->end_position();
991}
992
993simdjson_inline token_position value_iterator::last_position() const noexcept {
994 return _json_iter->last_position();
995}
996
997simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept {
998 return _json_iter->report_error(error: error, message);
999}
1000
1001} // namespace ondemand
1002} // namespace SIMDJSON_IMPLEMENTATION
1003} // namespace simdjson
1004
1005namespace simdjson {
1006
1007simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value_iterator>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept
1008 : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::value_iterator>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::value_iterator>(t&: value)) {}
1009simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::value_iterator>::simdjson_result(error_code error) noexcept
1010 : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::value_iterator>(error) {}
1011
1012} // namespace simdjson
1013