1// TODO Remove this -- deprecated API and files
2
3#ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H
4#define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H
5
6#include <cstring>
7#include <string>
8#include <ostream>
9#include <iterator>
10#include <limits>
11#include <stdexcept>
12
13#include "simdjson/dom/document.h"
14#include "simdjson/dom/parsedjson.h"
15#include "simdjson/internal/jsonformatutils.h"
16
17#ifndef SIMDJSON_DISABLE_DEPRECATED_API
18
19namespace simdjson {
20/** @private **/
21class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)")]] dom::parser::Iterator {
22public:
23 inline Iterator(const dom::parser &parser) noexcept(false);
24 inline Iterator(const Iterator &o) noexcept;
25 inline ~Iterator() noexcept;
26
27 inline Iterator& operator=(const Iterator&) = delete;
28
29 inline bool is_ok() const;
30
31 // useful for debugging purposes
32 inline size_t get_tape_location() const;
33
34 // useful for debugging purposes
35 inline size_t get_tape_length() const;
36
37 // returns the current depth (start at 1 with 0 reserved for the fictitious
38 // root node)
39 inline size_t get_depth() const;
40
41 // A scope is a series of nodes at the same depth, typically it is either an
42 // object ({) or an array ([). The root node has type 'r'.
43 inline uint8_t get_scope_type() const;
44
45 // move forward in document order
46 inline bool move_forward();
47
48 // retrieve the character code of what we're looking at:
49 // [{"slutfn are the possibilities
50 inline uint8_t get_type() const {
51 return current_type; // short functions should be inlined!
52 }
53
54 // get the int64_t value at this node; valid only if get_type is "l"
55 inline int64_t get_integer() const {
56 if (location + 1 >= tape_length) {
57 return 0; // default value in case of error
58 }
59 return static_cast<int64_t>(doc.tape[location + 1]);
60 }
61
62 // get the value as uint64; valid only if if get_type is "u"
63 inline uint64_t get_unsigned_integer() const {
64 if (location + 1 >= tape_length) {
65 return 0; // default value in case of error
66 }
67 return doc.tape[location + 1];
68 }
69
70 // get the string value at this node (NULL ended); valid only if get_type is "
71 // note that tabs, and line endings are escaped in the returned value (see
72 // print_with_escapes) return value is valid UTF-8, it may contain NULL chars
73 // within the string: get_string_length determines the true string length.
74 inline const char *get_string() const {
75 return reinterpret_cast<const char *>(
76 doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t));
77 }
78
79 // return the length of the string in bytes
80 inline uint32_t get_string_length() const {
81 uint32_t answer;
82 std::memcpy(dest: &answer,
83 src: reinterpret_cast<const char *>(doc.string_buf.get() +
84 (current_val & internal::JSON_VALUE_MASK)),
85 n: sizeof(uint32_t));
86 return answer;
87 }
88
89 // get the double value at this node; valid only if
90 // get_type() is "d"
91 inline double get_double() const {
92 if (location + 1 >= tape_length) {
93 return std::numeric_limits<double>::quiet_NaN(); // default value in
94 // case of error
95 }
96 double answer;
97 std::memcpy(dest: &answer, src: &doc.tape[location + 1], n: sizeof(answer));
98 return answer;
99 }
100
101 inline bool is_object_or_array() const { return is_object() || is_array(); }
102
103 inline bool is_object() const { return get_type() == '{'; }
104
105 inline bool is_array() const { return get_type() == '['; }
106
107 inline bool is_string() const { return get_type() == '"'; }
108
109 // Returns true if the current type of the node is an signed integer.
110 // You can get its value with `get_integer()`.
111 inline bool is_integer() const { return get_type() == 'l'; }
112
113 // Returns true if the current type of the node is an unsigned integer.
114 // You can get its value with `get_unsigned_integer()`.
115 //
116 // NOTE:
117 // Only a large value, which is out of range of a 64-bit signed integer, is
118 // represented internally as an unsigned node. On the other hand, a typical
119 // positive integer, such as 1, 42, or 1000000, is as a signed node.
120 // Be aware this function returns false for a signed node.
121 inline bool is_unsigned_integer() const { return get_type() == 'u'; }
122 // Returns true if the current type of the node is a double floating-point number.
123 inline bool is_double() const { return get_type() == 'd'; }
124 // Returns true if the current type of the node is a number (integer or floating-point).
125 inline bool is_number() const {
126 return is_integer() || is_unsigned_integer() || is_double();
127 }
128 // Returns true if the current type of the node is a bool with true value.
129 inline bool is_true() const { return get_type() == 't'; }
130 // Returns true if the current type of the node is a bool with false value.
131 inline bool is_false() const { return get_type() == 'f'; }
132 // Returns true if the current type of the node is null.
133 inline bool is_null() const { return get_type() == 'n'; }
134 // Returns true if the type byte represents an object of an array
135 static bool is_object_or_array(uint8_t type) {
136 return ((type == '[') || (type == '{'));
137 }
138
139 // when at {, go one level deep, looking for a given key
140 // if successful, we are left pointing at the value,
141 // if not, we are still pointing at the object ({)
142 // (in case of repeated keys, this only finds the first one).
143 // We seek the key using C's strcmp so if your JSON strings contain
144 // NULL chars, this would trigger a false positive: if you expect that
145 // to be the case, take extra precautions.
146 // Furthermore, we do the comparison character-by-character
147 // without taking into account Unicode equivalence.
148 inline bool move_to_key(const char *key);
149
150 // as above, but case insensitive lookup (strcmpi instead of strcmp)
151 inline bool move_to_key_insensitive(const char *key);
152
153 // when at {, go one level deep, looking for a given key
154 // if successful, we are left pointing at the value,
155 // if not, we are still pointing at the object ({)
156 // (in case of repeated keys, this only finds the first one).
157 // The string we search for can contain NULL values.
158 // Furthermore, we do the comparison character-by-character
159 // without taking into account Unicode equivalence.
160 inline bool move_to_key(const char *key, uint32_t length);
161
162 // when at a key location within an object, this moves to the accompanying
163 // value (located next to it). This is equivalent but much faster than
164 // calling "next()".
165 inline void move_to_value();
166
167 // when at [, go one level deep, and advance to the given index.
168 // if successful, we are left pointing at the value,
169 // if not, we are still pointing at the array ([)
170 inline bool move_to_index(uint32_t index);
171
172 // Moves the iterator to the value corresponding to the json pointer.
173 // Always search from the root of the document.
174 // if successful, we are left pointing at the value,
175 // if not, we are still pointing the same value we were pointing before the
176 // call. The json pointer follows the rfc6901 standard's syntax:
177 // https://tools.ietf.org/html/rfc6901 However, the standard says "If a
178 // referenced member name is not unique in an object, the member that is
179 // referenced is undefined, and evaluation fails". Here we just return the
180 // first corresponding value. The length parameter is the length of the
181 // jsonpointer string ('pointer').
182 inline bool move_to(const char *pointer, uint32_t length);
183
184 // Moves the iterator to the value corresponding to the json pointer.
185 // Always search from the root of the document.
186 // if successful, we are left pointing at the value,
187 // if not, we are still pointing the same value we were pointing before the
188 // call. The json pointer implementation follows the rfc6901 standard's
189 // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says
190 // "If a referenced member name is not unique in an object, the member that
191 // is referenced is undefined, and evaluation fails". Here we just return
192 // the first corresponding value.
193 inline bool move_to(const std::string &pointer) {
194 return move_to(pointer: pointer.c_str(), length: uint32_t(pointer.length()));
195 }
196
197 private:
198 // Almost the same as move_to(), except it searches from the current
199 // position. The pointer's syntax is identical, though that case is not
200 // handled by the rfc6901 standard. The '/' is still required at the
201 // beginning. However, contrary to move_to(), the URI Fragment Identifier
202 // Representation is not supported here. Also, in case of failure, we are
203 // left pointing at the closest value it could reach. For these reasons it
204 // is private. It exists because it is used by move_to().
205 inline bool relative_move_to(const char *pointer, uint32_t length);
206
207 public:
208 // throughout return true if we can do the navigation, false
209 // otherwise
210
211 // Within a given scope (series of nodes at the same depth within either an
212 // array or an object), we move forward.
213 // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, {
214 // and [. At the object ({) or at the array ([), you can issue a "down" to
215 // visit their content. valid if we're not at the end of a scope (returns
216 // true).
217 inline bool next();
218
219 // Within a given scope (series of nodes at the same depth within either an
220 // array or an object), we move backward.
221 // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true
222 // when starting at the end of the scope. At the object ({) or at the array
223 // ([), you can issue a "down" to visit their content.
224 // Performance warning: This function is implemented by starting again
225 // from the beginning of the scope and scanning forward. You should expect
226 // it to be relatively slow.
227 inline bool prev();
228
229 // Moves back to either the containing array or object (type { or [) from
230 // within a contained scope.
231 // Valid unless we are at the first level of the document
232 inline bool up();
233
234 // Valid if we're at a [ or { and it starts a non-empty scope; moves us to
235 // start of that deeper scope if it not empty. Thus, given [true, null,
236 // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node.
237 inline bool down();
238
239 // move us to the start of our current scope,
240 // a scope is a series of nodes at the same level
241 inline void to_start_scope();
242
243 inline void rewind() {
244 while (up())
245 ;
246 }
247
248
249
250 // print the node we are currently pointing at
251 inline bool print(std::ostream &os, bool escape_strings = true) const;
252
253 private:
254 const document &doc;
255 size_t max_depth{};
256 size_t depth{};
257 size_t location{}; // our current location on a tape
258 size_t tape_length{};
259 uint8_t current_type{};
260 uint64_t current_val{};
261 typedef struct {
262 size_t start_of_scope;
263 uint8_t scope_type;
264 } scopeindex_t;
265
266 scopeindex_t *depth_index{};
267};
268
269} // namespace simdjson
270#endif // SIMDJSON_DISABLE_DEPRECATED_API
271
272#endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H
273