| 1 | // TODO Remove this -- deprecated API and files |
| 2 | |
| 3 | #ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H |
| 4 | #define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H |
| 5 | |
| 6 | #include <cstring> |
| 7 | #include <string> |
| 8 | #include <ostream> |
| 9 | #include <iterator> |
| 10 | #include <limits> |
| 11 | #include <stdexcept> |
| 12 | |
| 13 | #include "simdjson/dom/document.h" |
| 14 | #include "simdjson/dom/parsedjson.h" |
| 15 | #include "simdjson/internal/jsonformatutils.h" |
| 16 | |
| 17 | #ifndef SIMDJSON_DISABLE_DEPRECATED_API |
| 18 | |
| 19 | namespace simdjson { |
| 20 | /** @private **/ |
| 21 | class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)" )]] dom::parser::Iterator { |
| 22 | public: |
| 23 | inline Iterator(const dom::parser &parser) noexcept(false); |
| 24 | inline Iterator(const Iterator &o) noexcept; |
| 25 | inline ~Iterator() noexcept; |
| 26 | |
| 27 | inline Iterator& operator=(const Iterator&) = delete; |
| 28 | |
| 29 | inline bool is_ok() const; |
| 30 | |
| 31 | // useful for debugging purposes |
| 32 | inline size_t get_tape_location() const; |
| 33 | |
| 34 | // useful for debugging purposes |
| 35 | inline size_t get_tape_length() const; |
| 36 | |
| 37 | // returns the current depth (start at 1 with 0 reserved for the fictitious |
| 38 | // root node) |
| 39 | inline size_t get_depth() const; |
| 40 | |
| 41 | // A scope is a series of nodes at the same depth, typically it is either an |
| 42 | // object ({) or an array ([). The root node has type 'r'. |
| 43 | inline uint8_t get_scope_type() const; |
| 44 | |
| 45 | // move forward in document order |
| 46 | inline bool move_forward(); |
| 47 | |
| 48 | // retrieve the character code of what we're looking at: |
| 49 | // [{"slutfn are the possibilities |
| 50 | inline uint8_t get_type() const { |
| 51 | return current_type; // short functions should be inlined! |
| 52 | } |
| 53 | |
| 54 | // get the int64_t value at this node; valid only if get_type is "l" |
| 55 | inline int64_t get_integer() const { |
| 56 | if (location + 1 >= tape_length) { |
| 57 | return 0; // default value in case of error |
| 58 | } |
| 59 | return static_cast<int64_t>(doc.tape[location + 1]); |
| 60 | } |
| 61 | |
| 62 | // get the value as uint64; valid only if if get_type is "u" |
| 63 | inline uint64_t get_unsigned_integer() const { |
| 64 | if (location + 1 >= tape_length) { |
| 65 | return 0; // default value in case of error |
| 66 | } |
| 67 | return doc.tape[location + 1]; |
| 68 | } |
| 69 | |
| 70 | // get the string value at this node (NULL ended); valid only if get_type is " |
| 71 | // note that tabs, and line endings are escaped in the returned value (see |
| 72 | // print_with_escapes) return value is valid UTF-8, it may contain NULL chars |
| 73 | // within the string: get_string_length determines the true string length. |
| 74 | inline const char *get_string() const { |
| 75 | return reinterpret_cast<const char *>( |
| 76 | doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t)); |
| 77 | } |
| 78 | |
| 79 | // return the length of the string in bytes |
| 80 | inline uint32_t get_string_length() const { |
| 81 | uint32_t answer; |
| 82 | std::memcpy(dest: &answer, |
| 83 | src: reinterpret_cast<const char *>(doc.string_buf.get() + |
| 84 | (current_val & internal::JSON_VALUE_MASK)), |
| 85 | n: sizeof(uint32_t)); |
| 86 | return answer; |
| 87 | } |
| 88 | |
| 89 | // get the double value at this node; valid only if |
| 90 | // get_type() is "d" |
| 91 | inline double get_double() const { |
| 92 | if (location + 1 >= tape_length) { |
| 93 | return std::numeric_limits<double>::quiet_NaN(); // default value in |
| 94 | // case of error |
| 95 | } |
| 96 | double answer; |
| 97 | std::memcpy(dest: &answer, src: &doc.tape[location + 1], n: sizeof(answer)); |
| 98 | return answer; |
| 99 | } |
| 100 | |
| 101 | inline bool is_object_or_array() const { return is_object() || is_array(); } |
| 102 | |
| 103 | inline bool is_object() const { return get_type() == '{'; } |
| 104 | |
| 105 | inline bool is_array() const { return get_type() == '['; } |
| 106 | |
| 107 | inline bool is_string() const { return get_type() == '"'; } |
| 108 | |
| 109 | // Returns true if the current type of the node is an signed integer. |
| 110 | // You can get its value with `get_integer()`. |
| 111 | inline bool is_integer() const { return get_type() == 'l'; } |
| 112 | |
| 113 | // Returns true if the current type of the node is an unsigned integer. |
| 114 | // You can get its value with `get_unsigned_integer()`. |
| 115 | // |
| 116 | // NOTE: |
| 117 | // Only a large value, which is out of range of a 64-bit signed integer, is |
| 118 | // represented internally as an unsigned node. On the other hand, a typical |
| 119 | // positive integer, such as 1, 42, or 1000000, is as a signed node. |
| 120 | // Be aware this function returns false for a signed node. |
| 121 | inline bool is_unsigned_integer() const { return get_type() == 'u'; } |
| 122 | // Returns true if the current type of the node is a double floating-point number. |
| 123 | inline bool is_double() const { return get_type() == 'd'; } |
| 124 | // Returns true if the current type of the node is a number (integer or floating-point). |
| 125 | inline bool is_number() const { |
| 126 | return is_integer() || is_unsigned_integer() || is_double(); |
| 127 | } |
| 128 | // Returns true if the current type of the node is a bool with true value. |
| 129 | inline bool is_true() const { return get_type() == 't'; } |
| 130 | // Returns true if the current type of the node is a bool with false value. |
| 131 | inline bool is_false() const { return get_type() == 'f'; } |
| 132 | // Returns true if the current type of the node is null. |
| 133 | inline bool is_null() const { return get_type() == 'n'; } |
| 134 | // Returns true if the type byte represents an object of an array |
| 135 | static bool is_object_or_array(uint8_t type) { |
| 136 | return ((type == '[') || (type == '{')); |
| 137 | } |
| 138 | |
| 139 | // when at {, go one level deep, looking for a given key |
| 140 | // if successful, we are left pointing at the value, |
| 141 | // if not, we are still pointing at the object ({) |
| 142 | // (in case of repeated keys, this only finds the first one). |
| 143 | // We seek the key using C's strcmp so if your JSON strings contain |
| 144 | // NULL chars, this would trigger a false positive: if you expect that |
| 145 | // to be the case, take extra precautions. |
| 146 | // Furthermore, we do the comparison character-by-character |
| 147 | // without taking into account Unicode equivalence. |
| 148 | inline bool move_to_key(const char *key); |
| 149 | |
| 150 | // as above, but case insensitive lookup (strcmpi instead of strcmp) |
| 151 | inline bool move_to_key_insensitive(const char *key); |
| 152 | |
| 153 | // when at {, go one level deep, looking for a given key |
| 154 | // if successful, we are left pointing at the value, |
| 155 | // if not, we are still pointing at the object ({) |
| 156 | // (in case of repeated keys, this only finds the first one). |
| 157 | // The string we search for can contain NULL values. |
| 158 | // Furthermore, we do the comparison character-by-character |
| 159 | // without taking into account Unicode equivalence. |
| 160 | inline bool move_to_key(const char *key, uint32_t length); |
| 161 | |
| 162 | // when at a key location within an object, this moves to the accompanying |
| 163 | // value (located next to it). This is equivalent but much faster than |
| 164 | // calling "next()". |
| 165 | inline void move_to_value(); |
| 166 | |
| 167 | // when at [, go one level deep, and advance to the given index. |
| 168 | // if successful, we are left pointing at the value, |
| 169 | // if not, we are still pointing at the array ([) |
| 170 | inline bool move_to_index(uint32_t index); |
| 171 | |
| 172 | // Moves the iterator to the value corresponding to the json pointer. |
| 173 | // Always search from the root of the document. |
| 174 | // if successful, we are left pointing at the value, |
| 175 | // if not, we are still pointing the same value we were pointing before the |
| 176 | // call. The json pointer follows the rfc6901 standard's syntax: |
| 177 | // https://tools.ietf.org/html/rfc6901 However, the standard says "If a |
| 178 | // referenced member name is not unique in an object, the member that is |
| 179 | // referenced is undefined, and evaluation fails". Here we just return the |
| 180 | // first corresponding value. The length parameter is the length of the |
| 181 | // jsonpointer string ('pointer'). |
| 182 | inline bool move_to(const char *pointer, uint32_t length); |
| 183 | |
| 184 | // Moves the iterator to the value corresponding to the json pointer. |
| 185 | // Always search from the root of the document. |
| 186 | // if successful, we are left pointing at the value, |
| 187 | // if not, we are still pointing the same value we were pointing before the |
| 188 | // call. The json pointer implementation follows the rfc6901 standard's |
| 189 | // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says |
| 190 | // "If a referenced member name is not unique in an object, the member that |
| 191 | // is referenced is undefined, and evaluation fails". Here we just return |
| 192 | // the first corresponding value. |
| 193 | inline bool move_to(const std::string &pointer) { |
| 194 | return move_to(pointer: pointer.c_str(), length: uint32_t(pointer.length())); |
| 195 | } |
| 196 | |
| 197 | private: |
| 198 | // Almost the same as move_to(), except it searches from the current |
| 199 | // position. The pointer's syntax is identical, though that case is not |
| 200 | // handled by the rfc6901 standard. The '/' is still required at the |
| 201 | // beginning. However, contrary to move_to(), the URI Fragment Identifier |
| 202 | // Representation is not supported here. Also, in case of failure, we are |
| 203 | // left pointing at the closest value it could reach. For these reasons it |
| 204 | // is private. It exists because it is used by move_to(). |
| 205 | inline bool relative_move_to(const char *pointer, uint32_t length); |
| 206 | |
| 207 | public: |
| 208 | // throughout return true if we can do the navigation, false |
| 209 | // otherwise |
| 210 | |
| 211 | // Within a given scope (series of nodes at the same depth within either an |
| 212 | // array or an object), we move forward. |
| 213 | // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { |
| 214 | // and [. At the object ({) or at the array ([), you can issue a "down" to |
| 215 | // visit their content. valid if we're not at the end of a scope (returns |
| 216 | // true). |
| 217 | inline bool next(); |
| 218 | |
| 219 | // Within a given scope (series of nodes at the same depth within either an |
| 220 | // array or an object), we move backward. |
| 221 | // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true |
| 222 | // when starting at the end of the scope. At the object ({) or at the array |
| 223 | // ([), you can issue a "down" to visit their content. |
| 224 | // Performance warning: This function is implemented by starting again |
| 225 | // from the beginning of the scope and scanning forward. You should expect |
| 226 | // it to be relatively slow. |
| 227 | inline bool prev(); |
| 228 | |
| 229 | // Moves back to either the containing array or object (type { or [) from |
| 230 | // within a contained scope. |
| 231 | // Valid unless we are at the first level of the document |
| 232 | inline bool up(); |
| 233 | |
| 234 | // Valid if we're at a [ or { and it starts a non-empty scope; moves us to |
| 235 | // start of that deeper scope if it not empty. Thus, given [true, null, |
| 236 | // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node. |
| 237 | inline bool down(); |
| 238 | |
| 239 | // move us to the start of our current scope, |
| 240 | // a scope is a series of nodes at the same level |
| 241 | inline void to_start_scope(); |
| 242 | |
| 243 | inline void rewind() { |
| 244 | while (up()) |
| 245 | ; |
| 246 | } |
| 247 | |
| 248 | |
| 249 | |
| 250 | // print the node we are currently pointing at |
| 251 | inline bool print(std::ostream &os, bool escape_strings = true) const; |
| 252 | |
| 253 | private: |
| 254 | const document &doc; |
| 255 | size_t max_depth{}; |
| 256 | size_t depth{}; |
| 257 | size_t location{}; // our current location on a tape |
| 258 | size_t tape_length{}; |
| 259 | uint8_t current_type{}; |
| 260 | uint64_t current_val{}; |
| 261 | typedef struct { |
| 262 | size_t start_of_scope; |
| 263 | uint8_t scope_type; |
| 264 | } scopeindex_t; |
| 265 | |
| 266 | scopeindex_t *depth_index{}; |
| 267 | }; |
| 268 | |
| 269 | } // namespace simdjson |
| 270 | #endif // SIMDJSON_DISABLE_DEPRECATED_API |
| 271 | |
| 272 | #endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H |
| 273 | |