| 1 | #ifndef SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H |
| 2 | #define SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H |
| 3 | |
| 4 | #include "simdjson/dom/parsedjson_iterator.h" |
| 5 | #include "simdjson/portability.h" |
| 6 | #include <cstring> |
| 7 | |
| 8 | #ifndef SIMDJSON_DISABLE_DEPRECATED_API |
| 9 | |
| 10 | namespace simdjson { |
| 11 | |
| 12 | // VS2017 reports deprecated warnings when you define a deprecated class's methods. |
| 13 | SIMDJSON_PUSH_DISABLE_WARNINGS |
| 14 | SIMDJSON_DISABLE_DEPRECATED_WARNING |
| 15 | |
| 16 | // Because of template weirdness, the actual class definition is inline in the document class |
| 17 | simdjson_warn_unused bool dom::parser::Iterator::is_ok() const { |
| 18 | return location < tape_length; |
| 19 | } |
| 20 | |
| 21 | // useful for debugging purposes |
| 22 | size_t dom::parser::Iterator::get_tape_location() const { |
| 23 | return location; |
| 24 | } |
| 25 | |
| 26 | // useful for debugging purposes |
| 27 | size_t dom::parser::Iterator::get_tape_length() const { |
| 28 | return tape_length; |
| 29 | } |
| 30 | |
| 31 | // returns the current depth (start at 1 with 0 reserved for the fictitious root |
| 32 | // node) |
| 33 | size_t dom::parser::Iterator::get_depth() const { |
| 34 | return depth; |
| 35 | } |
| 36 | |
| 37 | // A scope is a series of nodes at the same depth, typically it is either an |
| 38 | // object ({) or an array ([). The root node has type 'r'. |
| 39 | uint8_t dom::parser::Iterator::get_scope_type() const { |
| 40 | return depth_index[depth].scope_type; |
| 41 | } |
| 42 | |
| 43 | bool dom::parser::Iterator::move_forward() { |
| 44 | if (location + 1 >= tape_length) { |
| 45 | return false; // we are at the end! |
| 46 | } |
| 47 | |
| 48 | if ((current_type == '[') || (current_type == '{')) { |
| 49 | // We are entering a new scope |
| 50 | depth++; |
| 51 | assert(depth < max_depth); |
| 52 | depth_index[depth].start_of_scope = location; |
| 53 | depth_index[depth].scope_type = current_type; |
| 54 | } else if ((current_type == ']') || (current_type == '}')) { |
| 55 | // Leaving a scope. |
| 56 | depth--; |
| 57 | } else if (is_number()) { |
| 58 | // these types use 2 locations on the tape, not just one. |
| 59 | location += 1; |
| 60 | } |
| 61 | |
| 62 | location += 1; |
| 63 | current_val = doc.tape[location]; |
| 64 | current_type = uint8_t(current_val >> 56); |
| 65 | return true; |
| 66 | } |
| 67 | |
| 68 | void dom::parser::Iterator::move_to_value() { |
| 69 | // assume that we are on a key, so move by 1. |
| 70 | location += 1; |
| 71 | current_val = doc.tape[location]; |
| 72 | current_type = uint8_t(current_val >> 56); |
| 73 | } |
| 74 | |
| 75 | bool dom::parser::Iterator::move_to_key(const char *key) { |
| 76 | if (down()) { |
| 77 | do { |
| 78 | const bool right_key = (strcmp(s1: get_string(), s2: key) == 0); |
| 79 | move_to_value(); |
| 80 | if (right_key) { |
| 81 | return true; |
| 82 | } |
| 83 | } while (next()); |
| 84 | up(); |
| 85 | } |
| 86 | return false; |
| 87 | } |
| 88 | |
| 89 | bool dom::parser::Iterator::move_to_key_insensitive( |
| 90 | const char *key) { |
| 91 | if (down()) { |
| 92 | do { |
| 93 | const bool right_key = (simdjson_strcasecmp(s1: get_string(), s2: key) == 0); |
| 94 | move_to_value(); |
| 95 | if (right_key) { |
| 96 | return true; |
| 97 | } |
| 98 | } while (next()); |
| 99 | up(); |
| 100 | } |
| 101 | return false; |
| 102 | } |
| 103 | |
| 104 | bool dom::parser::Iterator::move_to_key(const char *key, |
| 105 | uint32_t length) { |
| 106 | if (down()) { |
| 107 | do { |
| 108 | bool right_key = ((get_string_length() == length) && |
| 109 | (memcmp(s1: get_string(), s2: key, n: length) == 0)); |
| 110 | move_to_value(); |
| 111 | if (right_key) { |
| 112 | return true; |
| 113 | } |
| 114 | } while (next()); |
| 115 | up(); |
| 116 | } |
| 117 | return false; |
| 118 | } |
| 119 | |
| 120 | bool dom::parser::Iterator::move_to_index(uint32_t index) { |
| 121 | if (down()) { |
| 122 | uint32_t i = 0; |
| 123 | for (; i < index; i++) { |
| 124 | if (!next()) { |
| 125 | break; |
| 126 | } |
| 127 | } |
| 128 | if (i == index) { |
| 129 | return true; |
| 130 | } |
| 131 | up(); |
| 132 | } |
| 133 | return false; |
| 134 | } |
| 135 | |
| 136 | bool dom::parser::Iterator::prev() { |
| 137 | size_t target_location = location; |
| 138 | to_start_scope(); |
| 139 | size_t npos = location; |
| 140 | if (target_location == npos) { |
| 141 | return false; // we were already at the start |
| 142 | } |
| 143 | size_t oldnpos; |
| 144 | // we have that npos < target_location here |
| 145 | do { |
| 146 | oldnpos = npos; |
| 147 | if ((current_type == '[') || (current_type == '{')) { |
| 148 | // we need to jump |
| 149 | npos = uint32_t(current_val); |
| 150 | } else { |
| 151 | npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1); |
| 152 | } |
| 153 | } while (npos < target_location); |
| 154 | location = oldnpos; |
| 155 | current_val = doc.tape[location]; |
| 156 | current_type = uint8_t(current_val >> 56); |
| 157 | return true; |
| 158 | } |
| 159 | |
| 160 | bool dom::parser::Iterator::up() { |
| 161 | if (depth == 1) { |
| 162 | return false; // don't allow moving back to root |
| 163 | } |
| 164 | to_start_scope(); |
| 165 | // next we just move to the previous value |
| 166 | depth--; |
| 167 | location -= 1; |
| 168 | current_val = doc.tape[location]; |
| 169 | current_type = uint8_t(current_val >> 56); |
| 170 | return true; |
| 171 | } |
| 172 | |
| 173 | bool dom::parser::Iterator::down() { |
| 174 | if (location + 1 >= tape_length) { |
| 175 | return false; |
| 176 | } |
| 177 | if ((current_type == '[') || (current_type == '{')) { |
| 178 | size_t npos = uint32_t(current_val); |
| 179 | if (npos == location + 2) { |
| 180 | return false; // we have an empty scope |
| 181 | } |
| 182 | depth++; |
| 183 | assert(depth < max_depth); |
| 184 | location = location + 1; |
| 185 | depth_index[depth].start_of_scope = location; |
| 186 | depth_index[depth].scope_type = current_type; |
| 187 | current_val = doc.tape[location]; |
| 188 | current_type = uint8_t(current_val >> 56); |
| 189 | return true; |
| 190 | } |
| 191 | return false; |
| 192 | } |
| 193 | |
| 194 | void dom::parser::Iterator::to_start_scope() { |
| 195 | location = depth_index[depth].start_of_scope; |
| 196 | current_val = doc.tape[location]; |
| 197 | current_type = uint8_t(current_val >> 56); |
| 198 | } |
| 199 | |
| 200 | bool dom::parser::Iterator::next() { |
| 201 | size_t npos; |
| 202 | if ((current_type == '[') || (current_type == '{')) { |
| 203 | // we need to jump |
| 204 | npos = uint32_t(current_val); |
| 205 | } else { |
| 206 | npos = location + (is_number() ? 2 : 1); |
| 207 | } |
| 208 | uint64_t next_val = doc.tape[npos]; |
| 209 | uint8_t next_type = uint8_t(next_val >> 56); |
| 210 | if ((next_type == ']') || (next_type == '}')) { |
| 211 | return false; // we reached the end of the scope |
| 212 | } |
| 213 | location = npos; |
| 214 | current_val = next_val; |
| 215 | current_type = next_type; |
| 216 | return true; |
| 217 | } |
| 218 | dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false) |
| 219 | : doc(pj.doc) |
| 220 | { |
| 221 | #if SIMDJSON_EXCEPTIONS |
| 222 | if (!pj.valid) { throw simdjson_error(pj.error); } |
| 223 | #else |
| 224 | if (!pj.valid) { return; } // abort() usage is forbidden in the library |
| 225 | #endif |
| 226 | |
| 227 | max_depth = pj.max_depth(); |
| 228 | depth_index = new scopeindex_t[max_depth + 1]; |
| 229 | depth_index[0].start_of_scope = location; |
| 230 | current_val = doc.tape[location++]; |
| 231 | current_type = uint8_t(current_val >> 56); |
| 232 | depth_index[0].scope_type = current_type; |
| 233 | tape_length = size_t(current_val & internal::JSON_VALUE_MASK); |
| 234 | if (location < tape_length) { |
| 235 | // If we make it here, then depth_capacity must >=2, but the compiler |
| 236 | // may not know this. |
| 237 | current_val = doc.tape[location]; |
| 238 | current_type = uint8_t(current_val >> 56); |
| 239 | depth++; |
| 240 | assert(depth < max_depth); |
| 241 | depth_index[depth].start_of_scope = location; |
| 242 | depth_index[depth].scope_type = current_type; |
| 243 | } |
| 244 | } |
| 245 | dom::parser::Iterator::Iterator( |
| 246 | const dom::parser::Iterator &o) noexcept |
| 247 | : doc(o.doc), |
| 248 | max_depth(o.depth), |
| 249 | depth(o.depth), |
| 250 | location(o.location), |
| 251 | tape_length(o.tape_length), |
| 252 | current_type(o.current_type), |
| 253 | current_val(o.current_val) |
| 254 | { |
| 255 | depth_index = new scopeindex_t[max_depth+1]; |
| 256 | std::memcpy(dest: depth_index, src: o.depth_index, n: (depth + 1) * sizeof(depth_index[0])); |
| 257 | } |
| 258 | |
| 259 | dom::parser::Iterator::~Iterator() noexcept { |
| 260 | if (depth_index) { delete[] depth_index; } |
| 261 | } |
| 262 | |
| 263 | bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const { |
| 264 | if (!is_ok()) { |
| 265 | return false; |
| 266 | } |
| 267 | switch (current_type) { |
| 268 | case '"': // we have a string |
| 269 | os << '"'; |
| 270 | if (escape_strings) { |
| 271 | os << internal::escape_json_string(std::string_view(get_string(), get_string_length())); |
| 272 | } else { |
| 273 | // was: os << get_string();, but given that we can include null chars, we |
| 274 | // have to do something crazier: |
| 275 | std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator<char>(os)); |
| 276 | } |
| 277 | os << '"'; |
| 278 | break; |
| 279 | case 'l': // we have a long int |
| 280 | os << get_integer(); |
| 281 | break; |
| 282 | case 'u': |
| 283 | os << get_unsigned_integer(); |
| 284 | break; |
| 285 | case 'd': |
| 286 | os << get_double(); |
| 287 | break; |
| 288 | case 'n': // we have a null |
| 289 | os << "null" ; |
| 290 | break; |
| 291 | case 't': // we have a true |
| 292 | os << "true" ; |
| 293 | break; |
| 294 | case 'f': // we have a false |
| 295 | os << "false" ; |
| 296 | break; |
| 297 | case '{': // we have an object |
| 298 | case '}': // we end an object |
| 299 | case '[': // we start an array |
| 300 | case ']': // we end an array |
| 301 | os << char(current_type); |
| 302 | break; |
| 303 | default: |
| 304 | return false; |
| 305 | } |
| 306 | return true; |
| 307 | } |
| 308 | |
| 309 | bool dom::parser::Iterator::move_to(const char *pointer, |
| 310 | uint32_t length) { |
| 311 | char *new_pointer = nullptr; |
| 312 | if (pointer[0] == '#') { |
| 313 | // Converting fragment representation to string representation |
| 314 | new_pointer = new char[length]; |
| 315 | uint32_t new_length = 0; |
| 316 | for (uint32_t i = 1; i < length; i++) { |
| 317 | if (pointer[i] == '%' && pointer[i + 1] == 'x') { |
| 318 | #if __cpp_exceptions |
| 319 | try { |
| 320 | #endif |
| 321 | int fragment = |
| 322 | std::stoi(str: std::string(&pointer[i + 2], 2), idx: nullptr, base: 16); |
| 323 | if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) { |
| 324 | // escaping the character |
| 325 | new_pointer[new_length] = '\\'; |
| 326 | new_length++; |
| 327 | } |
| 328 | new_pointer[new_length] = char(fragment); |
| 329 | i += 3; |
| 330 | #if __cpp_exceptions |
| 331 | } catch (std::invalid_argument &) { |
| 332 | delete[] new_pointer; |
| 333 | return false; // the fragment is invalid |
| 334 | } |
| 335 | #endif |
| 336 | } else { |
| 337 | new_pointer[new_length] = pointer[i]; |
| 338 | } |
| 339 | new_length++; |
| 340 | } |
| 341 | length = new_length; |
| 342 | pointer = new_pointer; |
| 343 | } |
| 344 | |
| 345 | // saving the current state |
| 346 | size_t depth_s = depth; |
| 347 | size_t location_s = location; |
| 348 | uint8_t current_type_s = current_type; |
| 349 | uint64_t current_val_s = current_val; |
| 350 | |
| 351 | rewind(); // The json pointer is used from the root of the document. |
| 352 | |
| 353 | bool found = relative_move_to(pointer, length); |
| 354 | delete[] new_pointer; |
| 355 | |
| 356 | if (!found) { |
| 357 | // since the pointer has found nothing, we get back to the original |
| 358 | // position. |
| 359 | depth = depth_s; |
| 360 | location = location_s; |
| 361 | current_type = current_type_s; |
| 362 | current_val = current_val_s; |
| 363 | } |
| 364 | |
| 365 | return found; |
| 366 | } |
| 367 | |
| 368 | bool dom::parser::Iterator::relative_move_to(const char *pointer, |
| 369 | uint32_t length) { |
| 370 | if (length == 0) { |
| 371 | // returns the whole document |
| 372 | return true; |
| 373 | } |
| 374 | |
| 375 | if (pointer[0] != '/') { |
| 376 | // '/' must be the first character |
| 377 | return false; |
| 378 | } |
| 379 | |
| 380 | // finding the key in an object or the index in an array |
| 381 | std::string key_or_index; |
| 382 | uint32_t offset = 1; |
| 383 | |
| 384 | // checking for the "-" case |
| 385 | if (is_array() && pointer[1] == '-') { |
| 386 | if (length != 2) { |
| 387 | // the pointer must be exactly "/-" |
| 388 | // there can't be anything more after '-' as an index |
| 389 | return false; |
| 390 | } |
| 391 | key_or_index = '-'; |
| 392 | offset = length; // will skip the loop coming right after |
| 393 | } |
| 394 | |
| 395 | // We either transform the first reference token to a valid json key |
| 396 | // or we make sure it is a valid index in an array. |
| 397 | for (; offset < length; offset++) { |
| 398 | if (pointer[offset] == '/') { |
| 399 | // beginning of the next key or index |
| 400 | break; |
| 401 | } |
| 402 | if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) { |
| 403 | // the index of an array must be an integer |
| 404 | // we also make sure std::stoi won't discard whitespaces later |
| 405 | return false; |
| 406 | } |
| 407 | if (pointer[offset] == '~') { |
| 408 | // "~1" represents "/" |
| 409 | if (pointer[offset + 1] == '1') { |
| 410 | key_or_index += '/'; |
| 411 | offset++; |
| 412 | continue; |
| 413 | } |
| 414 | // "~0" represents "~" |
| 415 | if (pointer[offset + 1] == '0') { |
| 416 | key_or_index += '~'; |
| 417 | offset++; |
| 418 | continue; |
| 419 | } |
| 420 | } |
| 421 | if (pointer[offset] == '\\') { |
| 422 | if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' || |
| 423 | (pointer[offset + 1] <= 0x1F)) { |
| 424 | key_or_index += pointer[offset + 1]; |
| 425 | offset++; |
| 426 | continue; |
| 427 | } |
| 428 | return false; // invalid escaped character |
| 429 | } |
| 430 | if (pointer[offset] == '\"') { |
| 431 | // unescaped quote character. this is an invalid case. |
| 432 | // lets do nothing and assume most pointers will be valid. |
| 433 | // it won't find any corresponding json key anyway. |
| 434 | // return false; |
| 435 | } |
| 436 | key_or_index += pointer[offset]; |
| 437 | } |
| 438 | |
| 439 | bool found = false; |
| 440 | if (is_object()) { |
| 441 | if (move_to_key(key: key_or_index.c_str(), length: uint32_t(key_or_index.length()))) { |
| 442 | found = relative_move_to(pointer: pointer + offset, length: length - offset); |
| 443 | } |
| 444 | } else if (is_array()) { |
| 445 | if (key_or_index == "-" ) { // handling "-" case first |
| 446 | if (down()) { |
| 447 | while (next()) |
| 448 | ; // moving to the end of the array |
| 449 | // moving to the nonexistent value right after... |
| 450 | size_t npos; |
| 451 | if ((current_type == '[') || (current_type == '{')) { |
| 452 | // we need to jump |
| 453 | npos = uint32_t(current_val); |
| 454 | } else { |
| 455 | npos = |
| 456 | location + ((current_type == 'd' || current_type == 'l') ? 2 : 1); |
| 457 | } |
| 458 | location = npos; |
| 459 | current_val = doc.tape[npos]; |
| 460 | current_type = uint8_t(current_val >> 56); |
| 461 | return true; // how could it fail ? |
| 462 | } |
| 463 | } else { // regular numeric index |
| 464 | // The index can't have a leading '0' |
| 465 | if (key_or_index[0] == '0' && key_or_index.length() > 1) { |
| 466 | return false; |
| 467 | } |
| 468 | // it cannot be empty |
| 469 | if (key_or_index.length() == 0) { |
| 470 | return false; |
| 471 | } |
| 472 | // we already checked the index contains only valid digits |
| 473 | uint32_t index = std::stoi(str: key_or_index); |
| 474 | if (move_to_index(index)) { |
| 475 | found = relative_move_to(pointer: pointer + offset, length: length - offset); |
| 476 | } |
| 477 | } |
| 478 | } |
| 479 | |
| 480 | return found; |
| 481 | } |
| 482 | |
| 483 | SIMDJSON_POP_DISABLE_WARNINGS |
| 484 | } // namespace simdjson |
| 485 | |
| 486 | #endif // SIMDJSON_DISABLE_DEPRECATED_API |
| 487 | |
| 488 | |
| 489 | #endif // SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H |
| 490 | |