1 | // TODO Remove this -- deprecated API and files |
2 | |
3 | #ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H |
4 | #define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H |
5 | |
6 | #include <cstring> |
7 | #include <string> |
8 | #include <ostream> |
9 | #include <iterator> |
10 | #include <limits> |
11 | #include <stdexcept> |
12 | |
13 | #include "simdjson/dom/document.h" |
14 | #include "simdjson/dom/parsedjson.h" |
15 | #include "simdjson/internal/jsonformatutils.h" |
16 | |
17 | #ifndef SIMDJSON_DISABLE_DEPRECATED_API |
18 | |
19 | namespace simdjson { |
20 | /** @private **/ |
21 | class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)" )]] dom::parser::Iterator { |
22 | public: |
23 | inline Iterator(const dom::parser &parser) noexcept(false); |
24 | inline Iterator(const Iterator &o) noexcept; |
25 | inline ~Iterator() noexcept; |
26 | |
27 | inline Iterator& operator=(const Iterator&) = delete; |
28 | |
29 | inline bool is_ok() const; |
30 | |
31 | // useful for debugging purposes |
32 | inline size_t get_tape_location() const; |
33 | |
34 | // useful for debugging purposes |
35 | inline size_t get_tape_length() const; |
36 | |
37 | // returns the current depth (start at 1 with 0 reserved for the fictitious |
38 | // root node) |
39 | inline size_t get_depth() const; |
40 | |
41 | // A scope is a series of nodes at the same depth, typically it is either an |
42 | // object ({) or an array ([). The root node has type 'r'. |
43 | inline uint8_t get_scope_type() const; |
44 | |
45 | // move forward in document order |
46 | inline bool move_forward(); |
47 | |
48 | // retrieve the character code of what we're looking at: |
49 | // [{"slutfn are the possibilities |
50 | inline uint8_t get_type() const { |
51 | return current_type; // short functions should be inlined! |
52 | } |
53 | |
54 | // get the int64_t value at this node; valid only if get_type is "l" |
55 | inline int64_t get_integer() const { |
56 | if (location + 1 >= tape_length) { |
57 | return 0; // default value in case of error |
58 | } |
59 | return static_cast<int64_t>(doc.tape[location + 1]); |
60 | } |
61 | |
62 | // get the value as uint64; valid only if if get_type is "u" |
63 | inline uint64_t get_unsigned_integer() const { |
64 | if (location + 1 >= tape_length) { |
65 | return 0; // default value in case of error |
66 | } |
67 | return doc.tape[location + 1]; |
68 | } |
69 | |
70 | // get the string value at this node (NULL ended); valid only if get_type is " |
71 | // note that tabs, and line endings are escaped in the returned value (see |
72 | // print_with_escapes) return value is valid UTF-8, it may contain NULL chars |
73 | // within the string: get_string_length determines the true string length. |
74 | inline const char *get_string() const { |
75 | return reinterpret_cast<const char *>( |
76 | doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t)); |
77 | } |
78 | |
79 | // return the length of the string in bytes |
80 | inline uint32_t get_string_length() const { |
81 | uint32_t answer; |
82 | std::memcpy(dest: &answer, |
83 | src: reinterpret_cast<const char *>(doc.string_buf.get() + |
84 | (current_val & internal::JSON_VALUE_MASK)), |
85 | n: sizeof(uint32_t)); |
86 | return answer; |
87 | } |
88 | |
89 | // get the double value at this node; valid only if |
90 | // get_type() is "d" |
91 | inline double get_double() const { |
92 | if (location + 1 >= tape_length) { |
93 | return std::numeric_limits<double>::quiet_NaN(); // default value in |
94 | // case of error |
95 | } |
96 | double answer; |
97 | std::memcpy(dest: &answer, src: &doc.tape[location + 1], n: sizeof(answer)); |
98 | return answer; |
99 | } |
100 | |
101 | inline bool is_object_or_array() const { return is_object() || is_array(); } |
102 | |
103 | inline bool is_object() const { return get_type() == '{'; } |
104 | |
105 | inline bool is_array() const { return get_type() == '['; } |
106 | |
107 | inline bool is_string() const { return get_type() == '"'; } |
108 | |
109 | // Returns true if the current type of the node is an signed integer. |
110 | // You can get its value with `get_integer()`. |
111 | inline bool is_integer() const { return get_type() == 'l'; } |
112 | |
113 | // Returns true if the current type of the node is an unsigned integer. |
114 | // You can get its value with `get_unsigned_integer()`. |
115 | // |
116 | // NOTE: |
117 | // Only a large value, which is out of range of a 64-bit signed integer, is |
118 | // represented internally as an unsigned node. On the other hand, a typical |
119 | // positive integer, such as 1, 42, or 1000000, is as a signed node. |
120 | // Be aware this function returns false for a signed node. |
121 | inline bool is_unsigned_integer() const { return get_type() == 'u'; } |
122 | // Returns true if the current type of the node is a double floating-point number. |
123 | inline bool is_double() const { return get_type() == 'd'; } |
124 | // Returns true if the current type of the node is a number (integer or floating-point). |
125 | inline bool is_number() const { |
126 | return is_integer() || is_unsigned_integer() || is_double(); |
127 | } |
128 | // Returns true if the current type of the node is a bool with true value. |
129 | inline bool is_true() const { return get_type() == 't'; } |
130 | // Returns true if the current type of the node is a bool with false value. |
131 | inline bool is_false() const { return get_type() == 'f'; } |
132 | // Returns true if the current type of the node is null. |
133 | inline bool is_null() const { return get_type() == 'n'; } |
134 | // Returns true if the type byte represents an object of an array |
135 | static bool is_object_or_array(uint8_t type) { |
136 | return ((type == '[') || (type == '{')); |
137 | } |
138 | |
139 | // when at {, go one level deep, looking for a given key |
140 | // if successful, we are left pointing at the value, |
141 | // if not, we are still pointing at the object ({) |
142 | // (in case of repeated keys, this only finds the first one). |
143 | // We seek the key using C's strcmp so if your JSON strings contain |
144 | // NULL chars, this would trigger a false positive: if you expect that |
145 | // to be the case, take extra precautions. |
146 | // Furthermore, we do the comparison character-by-character |
147 | // without taking into account Unicode equivalence. |
148 | inline bool move_to_key(const char *key); |
149 | |
150 | // as above, but case insensitive lookup (strcmpi instead of strcmp) |
151 | inline bool move_to_key_insensitive(const char *key); |
152 | |
153 | // when at {, go one level deep, looking for a given key |
154 | // if successful, we are left pointing at the value, |
155 | // if not, we are still pointing at the object ({) |
156 | // (in case of repeated keys, this only finds the first one). |
157 | // The string we search for can contain NULL values. |
158 | // Furthermore, we do the comparison character-by-character |
159 | // without taking into account Unicode equivalence. |
160 | inline bool move_to_key(const char *key, uint32_t length); |
161 | |
162 | // when at a key location within an object, this moves to the accompanying |
163 | // value (located next to it). This is equivalent but much faster than |
164 | // calling "next()". |
165 | inline void move_to_value(); |
166 | |
167 | // when at [, go one level deep, and advance to the given index. |
168 | // if successful, we are left pointing at the value, |
169 | // if not, we are still pointing at the array ([) |
170 | inline bool move_to_index(uint32_t index); |
171 | |
172 | // Moves the iterator to the value corresponding to the json pointer. |
173 | // Always search from the root of the document. |
174 | // if successful, we are left pointing at the value, |
175 | // if not, we are still pointing the same value we were pointing before the |
176 | // call. The json pointer follows the rfc6901 standard's syntax: |
177 | // https://tools.ietf.org/html/rfc6901 However, the standard says "If a |
178 | // referenced member name is not unique in an object, the member that is |
179 | // referenced is undefined, and evaluation fails". Here we just return the |
180 | // first corresponding value. The length parameter is the length of the |
181 | // jsonpointer string ('pointer'). |
182 | inline bool move_to(const char *pointer, uint32_t length); |
183 | |
184 | // Moves the iterator to the value corresponding to the json pointer. |
185 | // Always search from the root of the document. |
186 | // if successful, we are left pointing at the value, |
187 | // if not, we are still pointing the same value we were pointing before the |
188 | // call. The json pointer implementation follows the rfc6901 standard's |
189 | // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says |
190 | // "If a referenced member name is not unique in an object, the member that |
191 | // is referenced is undefined, and evaluation fails". Here we just return |
192 | // the first corresponding value. |
193 | inline bool move_to(const std::string &pointer) { |
194 | return move_to(pointer: pointer.c_str(), length: uint32_t(pointer.length())); |
195 | } |
196 | |
197 | private: |
198 | // Almost the same as move_to(), except it searches from the current |
199 | // position. The pointer's syntax is identical, though that case is not |
200 | // handled by the rfc6901 standard. The '/' is still required at the |
201 | // beginning. However, contrary to move_to(), the URI Fragment Identifier |
202 | // Representation is not supported here. Also, in case of failure, we are |
203 | // left pointing at the closest value it could reach. For these reasons it |
204 | // is private. It exists because it is used by move_to(). |
205 | inline bool relative_move_to(const char *pointer, uint32_t length); |
206 | |
207 | public: |
208 | // throughout return true if we can do the navigation, false |
209 | // otherwise |
210 | |
211 | // Within a given scope (series of nodes at the same depth within either an |
212 | // array or an object), we move forward. |
213 | // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { |
214 | // and [. At the object ({) or at the array ([), you can issue a "down" to |
215 | // visit their content. valid if we're not at the end of a scope (returns |
216 | // true). |
217 | inline bool next(); |
218 | |
219 | // Within a given scope (series of nodes at the same depth within either an |
220 | // array or an object), we move backward. |
221 | // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true |
222 | // when starting at the end of the scope. At the object ({) or at the array |
223 | // ([), you can issue a "down" to visit their content. |
224 | // Performance warning: This function is implemented by starting again |
225 | // from the beginning of the scope and scanning forward. You should expect |
226 | // it to be relatively slow. |
227 | inline bool prev(); |
228 | |
229 | // Moves back to either the containing array or object (type { or [) from |
230 | // within a contained scope. |
231 | // Valid unless we are at the first level of the document |
232 | inline bool up(); |
233 | |
234 | // Valid if we're at a [ or { and it starts a non-empty scope; moves us to |
235 | // start of that deeper scope if it not empty. Thus, given [true, null, |
236 | // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node. |
237 | inline bool down(); |
238 | |
239 | // move us to the start of our current scope, |
240 | // a scope is a series of nodes at the same level |
241 | inline void to_start_scope(); |
242 | |
243 | inline void rewind() { |
244 | while (up()) |
245 | ; |
246 | } |
247 | |
248 | |
249 | |
250 | // print the node we are currently pointing at |
251 | inline bool print(std::ostream &os, bool escape_strings = true) const; |
252 | |
253 | private: |
254 | const document &doc; |
255 | size_t max_depth{}; |
256 | size_t depth{}; |
257 | size_t location{}; // our current location on a tape |
258 | size_t tape_length{}; |
259 | uint8_t current_type{}; |
260 | uint64_t current_val{}; |
261 | typedef struct { |
262 | size_t start_of_scope; |
263 | uint8_t scope_type; |
264 | } scopeindex_t; |
265 | |
266 | scopeindex_t *depth_index{}; |
267 | }; |
268 | |
269 | } // namespace simdjson |
270 | #endif // SIMDJSON_DISABLE_DEPRECATED_API |
271 | |
272 | #endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H |
273 | |