1 | #ifndef SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H |
2 | #define SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H |
3 | |
4 | #include "simdjson/dom/parsedjson_iterator.h" |
5 | #include "simdjson/portability.h" |
6 | #include <cstring> |
7 | |
8 | #ifndef SIMDJSON_DISABLE_DEPRECATED_API |
9 | |
10 | namespace simdjson { |
11 | |
12 | // VS2017 reports deprecated warnings when you define a deprecated class's methods. |
13 | SIMDJSON_PUSH_DISABLE_WARNINGS |
14 | SIMDJSON_DISABLE_DEPRECATED_WARNING |
15 | |
16 | // Because of template weirdness, the actual class definition is inline in the document class |
17 | simdjson_warn_unused bool dom::parser::Iterator::is_ok() const { |
18 | return location < tape_length; |
19 | } |
20 | |
21 | // useful for debugging purposes |
22 | size_t dom::parser::Iterator::get_tape_location() const { |
23 | return location; |
24 | } |
25 | |
26 | // useful for debugging purposes |
27 | size_t dom::parser::Iterator::get_tape_length() const { |
28 | return tape_length; |
29 | } |
30 | |
31 | // returns the current depth (start at 1 with 0 reserved for the fictitious root |
32 | // node) |
33 | size_t dom::parser::Iterator::get_depth() const { |
34 | return depth; |
35 | } |
36 | |
37 | // A scope is a series of nodes at the same depth, typically it is either an |
38 | // object ({) or an array ([). The root node has type 'r'. |
39 | uint8_t dom::parser::Iterator::get_scope_type() const { |
40 | return depth_index[depth].scope_type; |
41 | } |
42 | |
43 | bool dom::parser::Iterator::move_forward() { |
44 | if (location + 1 >= tape_length) { |
45 | return false; // we are at the end! |
46 | } |
47 | |
48 | if ((current_type == '[') || (current_type == '{')) { |
49 | // We are entering a new scope |
50 | depth++; |
51 | assert(depth < max_depth); |
52 | depth_index[depth].start_of_scope = location; |
53 | depth_index[depth].scope_type = current_type; |
54 | } else if ((current_type == ']') || (current_type == '}')) { |
55 | // Leaving a scope. |
56 | depth--; |
57 | } else if (is_number()) { |
58 | // these types use 2 locations on the tape, not just one. |
59 | location += 1; |
60 | } |
61 | |
62 | location += 1; |
63 | current_val = doc.tape[location]; |
64 | current_type = uint8_t(current_val >> 56); |
65 | return true; |
66 | } |
67 | |
68 | void dom::parser::Iterator::move_to_value() { |
69 | // assume that we are on a key, so move by 1. |
70 | location += 1; |
71 | current_val = doc.tape[location]; |
72 | current_type = uint8_t(current_val >> 56); |
73 | } |
74 | |
75 | bool dom::parser::Iterator::move_to_key(const char *key) { |
76 | if (down()) { |
77 | do { |
78 | const bool right_key = (strcmp(s1: get_string(), s2: key) == 0); |
79 | move_to_value(); |
80 | if (right_key) { |
81 | return true; |
82 | } |
83 | } while (next()); |
84 | up(); |
85 | } |
86 | return false; |
87 | } |
88 | |
89 | bool dom::parser::Iterator::move_to_key_insensitive( |
90 | const char *key) { |
91 | if (down()) { |
92 | do { |
93 | const bool right_key = (simdjson_strcasecmp(s1: get_string(), s2: key) == 0); |
94 | move_to_value(); |
95 | if (right_key) { |
96 | return true; |
97 | } |
98 | } while (next()); |
99 | up(); |
100 | } |
101 | return false; |
102 | } |
103 | |
104 | bool dom::parser::Iterator::move_to_key(const char *key, |
105 | uint32_t length) { |
106 | if (down()) { |
107 | do { |
108 | bool right_key = ((get_string_length() == length) && |
109 | (memcmp(s1: get_string(), s2: key, n: length) == 0)); |
110 | move_to_value(); |
111 | if (right_key) { |
112 | return true; |
113 | } |
114 | } while (next()); |
115 | up(); |
116 | } |
117 | return false; |
118 | } |
119 | |
120 | bool dom::parser::Iterator::move_to_index(uint32_t index) { |
121 | if (down()) { |
122 | uint32_t i = 0; |
123 | for (; i < index; i++) { |
124 | if (!next()) { |
125 | break; |
126 | } |
127 | } |
128 | if (i == index) { |
129 | return true; |
130 | } |
131 | up(); |
132 | } |
133 | return false; |
134 | } |
135 | |
136 | bool dom::parser::Iterator::prev() { |
137 | size_t target_location = location; |
138 | to_start_scope(); |
139 | size_t npos = location; |
140 | if (target_location == npos) { |
141 | return false; // we were already at the start |
142 | } |
143 | size_t oldnpos; |
144 | // we have that npos < target_location here |
145 | do { |
146 | oldnpos = npos; |
147 | if ((current_type == '[') || (current_type == '{')) { |
148 | // we need to jump |
149 | npos = uint32_t(current_val); |
150 | } else { |
151 | npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1); |
152 | } |
153 | } while (npos < target_location); |
154 | location = oldnpos; |
155 | current_val = doc.tape[location]; |
156 | current_type = uint8_t(current_val >> 56); |
157 | return true; |
158 | } |
159 | |
160 | bool dom::parser::Iterator::up() { |
161 | if (depth == 1) { |
162 | return false; // don't allow moving back to root |
163 | } |
164 | to_start_scope(); |
165 | // next we just move to the previous value |
166 | depth--; |
167 | location -= 1; |
168 | current_val = doc.tape[location]; |
169 | current_type = uint8_t(current_val >> 56); |
170 | return true; |
171 | } |
172 | |
173 | bool dom::parser::Iterator::down() { |
174 | if (location + 1 >= tape_length) { |
175 | return false; |
176 | } |
177 | if ((current_type == '[') || (current_type == '{')) { |
178 | size_t npos = uint32_t(current_val); |
179 | if (npos == location + 2) { |
180 | return false; // we have an empty scope |
181 | } |
182 | depth++; |
183 | assert(depth < max_depth); |
184 | location = location + 1; |
185 | depth_index[depth].start_of_scope = location; |
186 | depth_index[depth].scope_type = current_type; |
187 | current_val = doc.tape[location]; |
188 | current_type = uint8_t(current_val >> 56); |
189 | return true; |
190 | } |
191 | return false; |
192 | } |
193 | |
194 | void dom::parser::Iterator::to_start_scope() { |
195 | location = depth_index[depth].start_of_scope; |
196 | current_val = doc.tape[location]; |
197 | current_type = uint8_t(current_val >> 56); |
198 | } |
199 | |
200 | bool dom::parser::Iterator::next() { |
201 | size_t npos; |
202 | if ((current_type == '[') || (current_type == '{')) { |
203 | // we need to jump |
204 | npos = uint32_t(current_val); |
205 | } else { |
206 | npos = location + (is_number() ? 2 : 1); |
207 | } |
208 | uint64_t next_val = doc.tape[npos]; |
209 | uint8_t next_type = uint8_t(next_val >> 56); |
210 | if ((next_type == ']') || (next_type == '}')) { |
211 | return false; // we reached the end of the scope |
212 | } |
213 | location = npos; |
214 | current_val = next_val; |
215 | current_type = next_type; |
216 | return true; |
217 | } |
218 | dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false) |
219 | : doc(pj.doc) |
220 | { |
221 | #if SIMDJSON_EXCEPTIONS |
222 | if (!pj.valid) { throw simdjson_error(pj.error); } |
223 | #else |
224 | if (!pj.valid) { return; } // abort() usage is forbidden in the library |
225 | #endif |
226 | |
227 | max_depth = pj.max_depth(); |
228 | depth_index = new scopeindex_t[max_depth + 1]; |
229 | depth_index[0].start_of_scope = location; |
230 | current_val = doc.tape[location++]; |
231 | current_type = uint8_t(current_val >> 56); |
232 | depth_index[0].scope_type = current_type; |
233 | tape_length = size_t(current_val & internal::JSON_VALUE_MASK); |
234 | if (location < tape_length) { |
235 | // If we make it here, then depth_capacity must >=2, but the compiler |
236 | // may not know this. |
237 | current_val = doc.tape[location]; |
238 | current_type = uint8_t(current_val >> 56); |
239 | depth++; |
240 | assert(depth < max_depth); |
241 | depth_index[depth].start_of_scope = location; |
242 | depth_index[depth].scope_type = current_type; |
243 | } |
244 | } |
245 | dom::parser::Iterator::Iterator( |
246 | const dom::parser::Iterator &o) noexcept |
247 | : doc(o.doc), |
248 | max_depth(o.depth), |
249 | depth(o.depth), |
250 | location(o.location), |
251 | tape_length(o.tape_length), |
252 | current_type(o.current_type), |
253 | current_val(o.current_val) |
254 | { |
255 | depth_index = new scopeindex_t[max_depth+1]; |
256 | std::memcpy(dest: depth_index, src: o.depth_index, n: (depth + 1) * sizeof(depth_index[0])); |
257 | } |
258 | |
259 | dom::parser::Iterator::~Iterator() noexcept { |
260 | if (depth_index) { delete[] depth_index; } |
261 | } |
262 | |
263 | bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const { |
264 | if (!is_ok()) { |
265 | return false; |
266 | } |
267 | switch (current_type) { |
268 | case '"': // we have a string |
269 | os << '"'; |
270 | if (escape_strings) { |
271 | os << internal::escape_json_string(std::string_view(get_string(), get_string_length())); |
272 | } else { |
273 | // was: os << get_string();, but given that we can include null chars, we |
274 | // have to do something crazier: |
275 | std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator<char>(os)); |
276 | } |
277 | os << '"'; |
278 | break; |
279 | case 'l': // we have a long int |
280 | os << get_integer(); |
281 | break; |
282 | case 'u': |
283 | os << get_unsigned_integer(); |
284 | break; |
285 | case 'd': |
286 | os << get_double(); |
287 | break; |
288 | case 'n': // we have a null |
289 | os << "null" ; |
290 | break; |
291 | case 't': // we have a true |
292 | os << "true" ; |
293 | break; |
294 | case 'f': // we have a false |
295 | os << "false" ; |
296 | break; |
297 | case '{': // we have an object |
298 | case '}': // we end an object |
299 | case '[': // we start an array |
300 | case ']': // we end an array |
301 | os << char(current_type); |
302 | break; |
303 | default: |
304 | return false; |
305 | } |
306 | return true; |
307 | } |
308 | |
309 | bool dom::parser::Iterator::move_to(const char *pointer, |
310 | uint32_t length) { |
311 | char *new_pointer = nullptr; |
312 | if (pointer[0] == '#') { |
313 | // Converting fragment representation to string representation |
314 | new_pointer = new char[length]; |
315 | uint32_t new_length = 0; |
316 | for (uint32_t i = 1; i < length; i++) { |
317 | if (pointer[i] == '%' && pointer[i + 1] == 'x') { |
318 | #if __cpp_exceptions |
319 | try { |
320 | #endif |
321 | int fragment = |
322 | std::stoi(str: std::string(&pointer[i + 2], 2), idx: nullptr, base: 16); |
323 | if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) { |
324 | // escaping the character |
325 | new_pointer[new_length] = '\\'; |
326 | new_length++; |
327 | } |
328 | new_pointer[new_length] = char(fragment); |
329 | i += 3; |
330 | #if __cpp_exceptions |
331 | } catch (std::invalid_argument &) { |
332 | delete[] new_pointer; |
333 | return false; // the fragment is invalid |
334 | } |
335 | #endif |
336 | } else { |
337 | new_pointer[new_length] = pointer[i]; |
338 | } |
339 | new_length++; |
340 | } |
341 | length = new_length; |
342 | pointer = new_pointer; |
343 | } |
344 | |
345 | // saving the current state |
346 | size_t depth_s = depth; |
347 | size_t location_s = location; |
348 | uint8_t current_type_s = current_type; |
349 | uint64_t current_val_s = current_val; |
350 | |
351 | rewind(); // The json pointer is used from the root of the document. |
352 | |
353 | bool found = relative_move_to(pointer, length); |
354 | delete[] new_pointer; |
355 | |
356 | if (!found) { |
357 | // since the pointer has found nothing, we get back to the original |
358 | // position. |
359 | depth = depth_s; |
360 | location = location_s; |
361 | current_type = current_type_s; |
362 | current_val = current_val_s; |
363 | } |
364 | |
365 | return found; |
366 | } |
367 | |
368 | bool dom::parser::Iterator::relative_move_to(const char *pointer, |
369 | uint32_t length) { |
370 | if (length == 0) { |
371 | // returns the whole document |
372 | return true; |
373 | } |
374 | |
375 | if (pointer[0] != '/') { |
376 | // '/' must be the first character |
377 | return false; |
378 | } |
379 | |
380 | // finding the key in an object or the index in an array |
381 | std::string key_or_index; |
382 | uint32_t offset = 1; |
383 | |
384 | // checking for the "-" case |
385 | if (is_array() && pointer[1] == '-') { |
386 | if (length != 2) { |
387 | // the pointer must be exactly "/-" |
388 | // there can't be anything more after '-' as an index |
389 | return false; |
390 | } |
391 | key_or_index = '-'; |
392 | offset = length; // will skip the loop coming right after |
393 | } |
394 | |
395 | // We either transform the first reference token to a valid json key |
396 | // or we make sure it is a valid index in an array. |
397 | for (; offset < length; offset++) { |
398 | if (pointer[offset] == '/') { |
399 | // beginning of the next key or index |
400 | break; |
401 | } |
402 | if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) { |
403 | // the index of an array must be an integer |
404 | // we also make sure std::stoi won't discard whitespaces later |
405 | return false; |
406 | } |
407 | if (pointer[offset] == '~') { |
408 | // "~1" represents "/" |
409 | if (pointer[offset + 1] == '1') { |
410 | key_or_index += '/'; |
411 | offset++; |
412 | continue; |
413 | } |
414 | // "~0" represents "~" |
415 | if (pointer[offset + 1] == '0') { |
416 | key_or_index += '~'; |
417 | offset++; |
418 | continue; |
419 | } |
420 | } |
421 | if (pointer[offset] == '\\') { |
422 | if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' || |
423 | (pointer[offset + 1] <= 0x1F)) { |
424 | key_or_index += pointer[offset + 1]; |
425 | offset++; |
426 | continue; |
427 | } |
428 | return false; // invalid escaped character |
429 | } |
430 | if (pointer[offset] == '\"') { |
431 | // unescaped quote character. this is an invalid case. |
432 | // lets do nothing and assume most pointers will be valid. |
433 | // it won't find any corresponding json key anyway. |
434 | // return false; |
435 | } |
436 | key_or_index += pointer[offset]; |
437 | } |
438 | |
439 | bool found = false; |
440 | if (is_object()) { |
441 | if (move_to_key(key: key_or_index.c_str(), length: uint32_t(key_or_index.length()))) { |
442 | found = relative_move_to(pointer: pointer + offset, length: length - offset); |
443 | } |
444 | } else if (is_array()) { |
445 | if (key_or_index == "-" ) { // handling "-" case first |
446 | if (down()) { |
447 | while (next()) |
448 | ; // moving to the end of the array |
449 | // moving to the nonexistent value right after... |
450 | size_t npos; |
451 | if ((current_type == '[') || (current_type == '{')) { |
452 | // we need to jump |
453 | npos = uint32_t(current_val); |
454 | } else { |
455 | npos = |
456 | location + ((current_type == 'd' || current_type == 'l') ? 2 : 1); |
457 | } |
458 | location = npos; |
459 | current_val = doc.tape[npos]; |
460 | current_type = uint8_t(current_val >> 56); |
461 | return true; // how could it fail ? |
462 | } |
463 | } else { // regular numeric index |
464 | // The index can't have a leading '0' |
465 | if (key_or_index[0] == '0' && key_or_index.length() > 1) { |
466 | return false; |
467 | } |
468 | // it cannot be empty |
469 | if (key_or_index.length() == 0) { |
470 | return false; |
471 | } |
472 | // we already checked the index contains only valid digits |
473 | uint32_t index = std::stoi(str: key_or_index); |
474 | if (move_to_index(index)) { |
475 | found = relative_move_to(pointer: pointer + offset, length: length - offset); |
476 | } |
477 | } |
478 | } |
479 | |
480 | return found; |
481 | } |
482 | |
483 | SIMDJSON_POP_DISABLE_WARNINGS |
484 | } // namespace simdjson |
485 | |
486 | #endif // SIMDJSON_DISABLE_DEPRECATED_API |
487 | |
488 | |
489 | #endif // SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H |
490 | |