1#ifndef SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H
2#define SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H
3
4#include "simdjson/dom/parsedjson_iterator.h"
5#include "simdjson/portability.h"
6#include <cstring>
7
8#ifndef SIMDJSON_DISABLE_DEPRECATED_API
9
10namespace simdjson {
11
12// VS2017 reports deprecated warnings when you define a deprecated class's methods.
13SIMDJSON_PUSH_DISABLE_WARNINGS
14SIMDJSON_DISABLE_DEPRECATED_WARNING
15
16// Because of template weirdness, the actual class definition is inline in the document class
17simdjson_warn_unused bool dom::parser::Iterator::is_ok() const {
18 return location < tape_length;
19}
20
21// useful for debugging purposes
22size_t dom::parser::Iterator::get_tape_location() const {
23 return location;
24}
25
26// useful for debugging purposes
27size_t dom::parser::Iterator::get_tape_length() const {
28 return tape_length;
29}
30
31// returns the current depth (start at 1 with 0 reserved for the fictitious root
32// node)
33size_t dom::parser::Iterator::get_depth() const {
34 return depth;
35}
36
37// A scope is a series of nodes at the same depth, typically it is either an
38// object ({) or an array ([). The root node has type 'r'.
39uint8_t dom::parser::Iterator::get_scope_type() const {
40 return depth_index[depth].scope_type;
41}
42
43bool dom::parser::Iterator::move_forward() {
44 if (location + 1 >= tape_length) {
45 return false; // we are at the end!
46 }
47
48 if ((current_type == '[') || (current_type == '{')) {
49 // We are entering a new scope
50 depth++;
51 assert(depth < max_depth);
52 depth_index[depth].start_of_scope = location;
53 depth_index[depth].scope_type = current_type;
54 } else if ((current_type == ']') || (current_type == '}')) {
55 // Leaving a scope.
56 depth--;
57 } else if (is_number()) {
58 // these types use 2 locations on the tape, not just one.
59 location += 1;
60 }
61
62 location += 1;
63 current_val = doc.tape[location];
64 current_type = uint8_t(current_val >> 56);
65 return true;
66}
67
68void dom::parser::Iterator::move_to_value() {
69 // assume that we are on a key, so move by 1.
70 location += 1;
71 current_val = doc.tape[location];
72 current_type = uint8_t(current_val >> 56);
73}
74
75bool dom::parser::Iterator::move_to_key(const char *key) {
76 if (down()) {
77 do {
78 const bool right_key = (strcmp(s1: get_string(), s2: key) == 0);
79 move_to_value();
80 if (right_key) {
81 return true;
82 }
83 } while (next());
84 up();
85 }
86 return false;
87}
88
89bool dom::parser::Iterator::move_to_key_insensitive(
90 const char *key) {
91 if (down()) {
92 do {
93 const bool right_key = (simdjson_strcasecmp(s1: get_string(), s2: key) == 0);
94 move_to_value();
95 if (right_key) {
96 return true;
97 }
98 } while (next());
99 up();
100 }
101 return false;
102}
103
104bool dom::parser::Iterator::move_to_key(const char *key,
105 uint32_t length) {
106 if (down()) {
107 do {
108 bool right_key = ((get_string_length() == length) &&
109 (memcmp(s1: get_string(), s2: key, n: length) == 0));
110 move_to_value();
111 if (right_key) {
112 return true;
113 }
114 } while (next());
115 up();
116 }
117 return false;
118}
119
120bool dom::parser::Iterator::move_to_index(uint32_t index) {
121 if (down()) {
122 uint32_t i = 0;
123 for (; i < index; i++) {
124 if (!next()) {
125 break;
126 }
127 }
128 if (i == index) {
129 return true;
130 }
131 up();
132 }
133 return false;
134}
135
136bool dom::parser::Iterator::prev() {
137 size_t target_location = location;
138 to_start_scope();
139 size_t npos = location;
140 if (target_location == npos) {
141 return false; // we were already at the start
142 }
143 size_t oldnpos;
144 // we have that npos < target_location here
145 do {
146 oldnpos = npos;
147 if ((current_type == '[') || (current_type == '{')) {
148 // we need to jump
149 npos = uint32_t(current_val);
150 } else {
151 npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
152 }
153 } while (npos < target_location);
154 location = oldnpos;
155 current_val = doc.tape[location];
156 current_type = uint8_t(current_val >> 56);
157 return true;
158}
159
160bool dom::parser::Iterator::up() {
161 if (depth == 1) {
162 return false; // don't allow moving back to root
163 }
164 to_start_scope();
165 // next we just move to the previous value
166 depth--;
167 location -= 1;
168 current_val = doc.tape[location];
169 current_type = uint8_t(current_val >> 56);
170 return true;
171}
172
173bool dom::parser::Iterator::down() {
174 if (location + 1 >= tape_length) {
175 return false;
176 }
177 if ((current_type == '[') || (current_type == '{')) {
178 size_t npos = uint32_t(current_val);
179 if (npos == location + 2) {
180 return false; // we have an empty scope
181 }
182 depth++;
183 assert(depth < max_depth);
184 location = location + 1;
185 depth_index[depth].start_of_scope = location;
186 depth_index[depth].scope_type = current_type;
187 current_val = doc.tape[location];
188 current_type = uint8_t(current_val >> 56);
189 return true;
190 }
191 return false;
192}
193
194void dom::parser::Iterator::to_start_scope() {
195 location = depth_index[depth].start_of_scope;
196 current_val = doc.tape[location];
197 current_type = uint8_t(current_val >> 56);
198}
199
200bool dom::parser::Iterator::next() {
201 size_t npos;
202 if ((current_type == '[') || (current_type == '{')) {
203 // we need to jump
204 npos = uint32_t(current_val);
205 } else {
206 npos = location + (is_number() ? 2 : 1);
207 }
208 uint64_t next_val = doc.tape[npos];
209 uint8_t next_type = uint8_t(next_val >> 56);
210 if ((next_type == ']') || (next_type == '}')) {
211 return false; // we reached the end of the scope
212 }
213 location = npos;
214 current_val = next_val;
215 current_type = next_type;
216 return true;
217}
218dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false)
219 : doc(pj.doc)
220{
221#if SIMDJSON_EXCEPTIONS
222 if (!pj.valid) { throw simdjson_error(pj.error); }
223#else
224 if (!pj.valid) { return; } // abort() usage is forbidden in the library
225#endif
226
227 max_depth = pj.max_depth();
228 depth_index = new scopeindex_t[max_depth + 1];
229 depth_index[0].start_of_scope = location;
230 current_val = doc.tape[location++];
231 current_type = uint8_t(current_val >> 56);
232 depth_index[0].scope_type = current_type;
233 tape_length = size_t(current_val & internal::JSON_VALUE_MASK);
234 if (location < tape_length) {
235 // If we make it here, then depth_capacity must >=2, but the compiler
236 // may not know this.
237 current_val = doc.tape[location];
238 current_type = uint8_t(current_val >> 56);
239 depth++;
240 assert(depth < max_depth);
241 depth_index[depth].start_of_scope = location;
242 depth_index[depth].scope_type = current_type;
243 }
244}
245dom::parser::Iterator::Iterator(
246 const dom::parser::Iterator &o) noexcept
247 : doc(o.doc),
248 max_depth(o.depth),
249 depth(o.depth),
250 location(o.location),
251 tape_length(o.tape_length),
252 current_type(o.current_type),
253 current_val(o.current_val)
254{
255 depth_index = new scopeindex_t[max_depth+1];
256 std::memcpy(dest: depth_index, src: o.depth_index, n: (depth + 1) * sizeof(depth_index[0]));
257}
258
259dom::parser::Iterator::~Iterator() noexcept {
260 if (depth_index) { delete[] depth_index; }
261}
262
263bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const {
264 if (!is_ok()) {
265 return false;
266 }
267 switch (current_type) {
268 case '"': // we have a string
269 os << '"';
270 if (escape_strings) {
271 os << internal::escape_json_string(std::string_view(get_string(), get_string_length()));
272 } else {
273 // was: os << get_string();, but given that we can include null chars, we
274 // have to do something crazier:
275 std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator<char>(os));
276 }
277 os << '"';
278 break;
279 case 'l': // we have a long int
280 os << get_integer();
281 break;
282 case 'u':
283 os << get_unsigned_integer();
284 break;
285 case 'd':
286 os << get_double();
287 break;
288 case 'n': // we have a null
289 os << "null";
290 break;
291 case 't': // we have a true
292 os << "true";
293 break;
294 case 'f': // we have a false
295 os << "false";
296 break;
297 case '{': // we have an object
298 case '}': // we end an object
299 case '[': // we start an array
300 case ']': // we end an array
301 os << char(current_type);
302 break;
303 default:
304 return false;
305 }
306 return true;
307}
308
309bool dom::parser::Iterator::move_to(const char *pointer,
310 uint32_t length) {
311 char *new_pointer = nullptr;
312 if (pointer[0] == '#') {
313 // Converting fragment representation to string representation
314 new_pointer = new char[length];
315 uint32_t new_length = 0;
316 for (uint32_t i = 1; i < length; i++) {
317 if (pointer[i] == '%' && pointer[i + 1] == 'x') {
318#if __cpp_exceptions
319 try {
320#endif
321 int fragment =
322 std::stoi(str: std::string(&pointer[i + 2], 2), idx: nullptr, base: 16);
323 if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) {
324 // escaping the character
325 new_pointer[new_length] = '\\';
326 new_length++;
327 }
328 new_pointer[new_length] = char(fragment);
329 i += 3;
330#if __cpp_exceptions
331 } catch (std::invalid_argument &) {
332 delete[] new_pointer;
333 return false; // the fragment is invalid
334 }
335#endif
336 } else {
337 new_pointer[new_length] = pointer[i];
338 }
339 new_length++;
340 }
341 length = new_length;
342 pointer = new_pointer;
343 }
344
345 // saving the current state
346 size_t depth_s = depth;
347 size_t location_s = location;
348 uint8_t current_type_s = current_type;
349 uint64_t current_val_s = current_val;
350
351 rewind(); // The json pointer is used from the root of the document.
352
353 bool found = relative_move_to(pointer, length);
354 delete[] new_pointer;
355
356 if (!found) {
357 // since the pointer has found nothing, we get back to the original
358 // position.
359 depth = depth_s;
360 location = location_s;
361 current_type = current_type_s;
362 current_val = current_val_s;
363 }
364
365 return found;
366}
367
368bool dom::parser::Iterator::relative_move_to(const char *pointer,
369 uint32_t length) {
370 if (length == 0) {
371 // returns the whole document
372 return true;
373 }
374
375 if (pointer[0] != '/') {
376 // '/' must be the first character
377 return false;
378 }
379
380 // finding the key in an object or the index in an array
381 std::string key_or_index;
382 uint32_t offset = 1;
383
384 // checking for the "-" case
385 if (is_array() && pointer[1] == '-') {
386 if (length != 2) {
387 // the pointer must be exactly "/-"
388 // there can't be anything more after '-' as an index
389 return false;
390 }
391 key_or_index = '-';
392 offset = length; // will skip the loop coming right after
393 }
394
395 // We either transform the first reference token to a valid json key
396 // or we make sure it is a valid index in an array.
397 for (; offset < length; offset++) {
398 if (pointer[offset] == '/') {
399 // beginning of the next key or index
400 break;
401 }
402 if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) {
403 // the index of an array must be an integer
404 // we also make sure std::stoi won't discard whitespaces later
405 return false;
406 }
407 if (pointer[offset] == '~') {
408 // "~1" represents "/"
409 if (pointer[offset + 1] == '1') {
410 key_or_index += '/';
411 offset++;
412 continue;
413 }
414 // "~0" represents "~"
415 if (pointer[offset + 1] == '0') {
416 key_or_index += '~';
417 offset++;
418 continue;
419 }
420 }
421 if (pointer[offset] == '\\') {
422 if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' ||
423 (pointer[offset + 1] <= 0x1F)) {
424 key_or_index += pointer[offset + 1];
425 offset++;
426 continue;
427 }
428 return false; // invalid escaped character
429 }
430 if (pointer[offset] == '\"') {
431 // unescaped quote character. this is an invalid case.
432 // lets do nothing and assume most pointers will be valid.
433 // it won't find any corresponding json key anyway.
434 // return false;
435 }
436 key_or_index += pointer[offset];
437 }
438
439 bool found = false;
440 if (is_object()) {
441 if (move_to_key(key: key_or_index.c_str(), length: uint32_t(key_or_index.length()))) {
442 found = relative_move_to(pointer: pointer + offset, length: length - offset);
443 }
444 } else if (is_array()) {
445 if (key_or_index == "-") { // handling "-" case first
446 if (down()) {
447 while (next())
448 ; // moving to the end of the array
449 // moving to the nonexistent value right after...
450 size_t npos;
451 if ((current_type == '[') || (current_type == '{')) {
452 // we need to jump
453 npos = uint32_t(current_val);
454 } else {
455 npos =
456 location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
457 }
458 location = npos;
459 current_val = doc.tape[npos];
460 current_type = uint8_t(current_val >> 56);
461 return true; // how could it fail ?
462 }
463 } else { // regular numeric index
464 // The index can't have a leading '0'
465 if (key_or_index[0] == '0' && key_or_index.length() > 1) {
466 return false;
467 }
468 // it cannot be empty
469 if (key_or_index.length() == 0) {
470 return false;
471 }
472 // we already checked the index contains only valid digits
473 uint32_t index = std::stoi(str: key_or_index);
474 if (move_to_index(index)) {
475 found = relative_move_to(pointer: pointer + offset, length: length - offset);
476 }
477 }
478 }
479
480 return found;
481}
482
483SIMDJSON_POP_DISABLE_WARNINGS
484} // namespace simdjson
485
486#endif // SIMDJSON_DISABLE_DEPRECATED_API
487
488
489#endif // SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H
490