1namespace simdjson {
2namespace SIMDJSON_IMPLEMENTATION {
3namespace ondemand {
4
5class document;
6class document_stream;
7class object;
8class array;
9class value;
10class raw_json_string;
11class parser;
12
13/**
14 * Iterates through JSON tokens, keeping track of depth and string buffer.
15 *
16 * @private This is not intended for external use.
17 */
18class json_iterator {
19protected:
20 token_iterator token{};
21 ondemand::parser *parser{};
22 /**
23 * Next free location in the string buffer.
24 *
25 * Used by raw_json_string::unescape() to have a place to unescape strings to.
26 */
27 uint8_t *_string_buf_loc{};
28 /**
29 * JSON error, if there is one.
30 *
31 * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever.
32 *
33 * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first
34 * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If
35 * this is not elided, we should make sure it's at least not using up a register. Failing that,
36 * we should store it in document so there's only one of them.
37 */
38 error_code error{SUCCESS};
39 /**
40 * Depth of the current token in the JSON.
41 *
42 * - 0 = finished with document
43 * - 1 = document root value (could be [ or {, not yet known)
44 * - 2 = , or } inside root array/object
45 * - 3 = key or value inside root array/object.
46 */
47 depth_t _depth{};
48 /**
49 * Beginning of the document indexes.
50 * Normally we have root == parser->implementation->structural_indexes.get()
51 * but this may differ, especially in streaming mode (where we have several
52 * documents);
53 */
54 token_position _root{};
55 /**
56 * Normally, a json_iterator operates over a single document, but in
57 * some cases, we may have a stream of documents. This attribute is meant
58 * as meta-data: the json_iterator works the same irrespective of the
59 * value of this attribute.
60 */
61 bool _streaming{false};
62
63public:
64 simdjson_inline json_iterator() noexcept = default;
65 simdjson_inline json_iterator(json_iterator &&other) noexcept;
66 simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept;
67 simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default;
68 simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default;
69 /**
70 * Skips a JSON value, whether it is a scalar, array or object.
71 */
72 simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept;
73
74 /**
75 * Tell whether the iterator is still at the start
76 */
77 simdjson_inline bool at_root() const noexcept;
78
79 /**
80 * Tell whether we should be expected to run in streaming
81 * mode (iterating over many documents). It is pure metadata
82 * that does not affect how the iterator works. It is used by
83 * start_root_array() and start_root_object().
84 */
85 simdjson_inline bool streaming() const noexcept;
86
87 /**
88 * Get the root value iterator
89 */
90 simdjson_inline token_position root_position() const noexcept;
91 /**
92 * Assert that we are at the document depth (== 1)
93 */
94 simdjson_inline void assert_at_document_depth() const noexcept;
95 /**
96 * Assert that we are at the root of the document
97 */
98 simdjson_inline void assert_at_root() const noexcept;
99
100 /**
101 * Tell whether the iterator is at the EOF mark
102 */
103 simdjson_inline bool at_end() const noexcept;
104
105 /**
106 * Tell whether the iterator is live (has not been moved).
107 */
108 simdjson_inline bool is_alive() const noexcept;
109
110 /**
111 * Abandon this iterator, setting depth to 0 (as if the document is finished).
112 */
113 simdjson_inline void abandon() noexcept;
114
115 /**
116 * Advance the current token without modifying depth.
117 */
118 simdjson_inline const uint8_t *return_current_and_advance() noexcept;
119
120 /**
121 * Returns true if there is a single token in the index (i.e., it is
122 * a JSON with a scalar value such as a single number).
123 *
124 * @return whether there is a single token
125 */
126 simdjson_inline bool is_single_token() const noexcept;
127
128 /**
129 * Assert that there are at least the given number of tokens left.
130 *
131 * Has no effect in release builds.
132 */
133 simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept;
134 /**
135 * Assert that the given position addresses an actual token (is within bounds).
136 *
137 * Has no effect in release builds.
138 */
139 simdjson_inline void assert_valid_position(token_position position) const noexcept;
140 /**
141 * Get the JSON text for a given token (relative).
142 *
143 * This is not null-terminated; it is a view into the JSON.
144 *
145 * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token.
146 *
147 * TODO consider a string_view, assuming the length will get stripped out by the optimizer when
148 * it isn't used ...
149 */
150 simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept;
151 /**
152 * Get the maximum length of the JSON text for the current token (or relative).
153 *
154 * The length will include any whitespace at the end of the token.
155 *
156 * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token.
157 */
158 simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept;
159 /**
160 * Get a pointer to the current location in the input buffer.
161 *
162 * This is not null-terminated; it is a view into the JSON.
163 *
164 * You may be pointing outside of the input buffer: it is not generally
165 * safe to dereference this pointer.
166 */
167 simdjson_inline const uint8_t *unsafe_pointer() const noexcept;
168 /**
169 * Get the JSON text for a given token.
170 *
171 * This is not null-terminated; it is a view into the JSON.
172 *
173 * @param position The position of the token to retrieve.
174 *
175 * TODO consider a string_view, assuming the length will get stripped out by the optimizer when
176 * it isn't used ...
177 */
178 simdjson_inline const uint8_t *peek(token_position position) const noexcept;
179 /**
180 * Get the maximum length of the JSON text for the current token (or relative).
181 *
182 * The length will include any whitespace at the end of the token.
183 *
184 * @param position The position of the token to retrieve.
185 */
186 simdjson_inline uint32_t peek_length(token_position position) const noexcept;
187 /**
188 * Get the JSON text for the last token in the document.
189 *
190 * This is not null-terminated; it is a view into the JSON.
191 *
192 * TODO consider a string_view, assuming the length will get stripped out by the optimizer when
193 * it isn't used ...
194 */
195 simdjson_inline const uint8_t *peek_last() const noexcept;
196
197 /**
198 * Ascend one level.
199 *
200 * Validates that the depth - 1 == parent_depth.
201 *
202 * @param parent_depth the expected parent depth.
203 */
204 simdjson_inline void ascend_to(depth_t parent_depth) noexcept;
205
206 /**
207 * Descend one level.
208 *
209 * Validates that the new depth == child_depth.
210 *
211 * @param child_depth the expected child depth.
212 */
213 simdjson_inline void descend_to(depth_t child_depth) noexcept;
214 simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept;
215
216 /**
217 * Get current depth.
218 */
219 simdjson_inline depth_t depth() const noexcept;
220
221 /**
222 * Get current (writeable) location in the string buffer.
223 */
224 simdjson_inline uint8_t *&string_buf_loc() noexcept;
225
226 /**
227 * Report an unrecoverable error, preventing further iteration.
228 *
229 * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD.
230 * @param message An error message to report with the error.
231 */
232 simdjson_inline error_code report_error(error_code error, const char *message) noexcept;
233
234 /**
235 * Log error, but don't stop iteration.
236 * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD.
237 * @param message An error message to report with the error.
238 */
239 simdjson_inline error_code optional_error(error_code error, const char *message) noexcept;
240
241 template<int N> simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept;
242
243 simdjson_inline token_position position() const noexcept;
244 /**
245 * Write the raw_json_string to the string buffer and return a string_view.
246 * Each raw_json_string should be unescaped once, or else the string buffer might
247 * overflow.
248 */
249 simdjson_inline simdjson_result<std::string_view> unescape(raw_json_string in, bool allow_replacement) noexcept;
250 simdjson_inline simdjson_result<std::string_view> unescape_wobbly(raw_json_string in) noexcept;
251 simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept;
252
253#if SIMDJSON_DEVELOPMENT_CHECKS
254 simdjson_inline token_position start_position(depth_t depth) const noexcept;
255 simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept;
256#endif
257
258 /* Useful for debugging and logging purposes. */
259 inline std::string to_string() const noexcept;
260
261 /**
262 * Returns the current location in the document if in bounds.
263 */
264 inline simdjson_result<const char *> current_location() noexcept;
265
266 /**
267 * Updates this json iterator so that it is back at the beginning of the document,
268 * as if it had just been created.
269 */
270 inline void rewind() noexcept;
271 /**
272 * This checks whether the {,},[,] are balanced so that the document
273 * ends with proper zero depth. This requires scanning the whole document
274 * and it may be expensive. It is expected that it will be rarely called.
275 * It does not attempt to match { with } and [ with ].
276 */
277 inline bool balanced() const noexcept;
278protected:
279 simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept;
280 /// The last token before the end
281 simdjson_inline token_position last_position() const noexcept;
282 /// The token *at* the end. This points at gibberish and should only be used for comparison.
283 simdjson_inline token_position end_position() const noexcept;
284 /// The end of the buffer.
285 simdjson_inline token_position end() const noexcept;
286
287 friend class document;
288 friend class document_stream;
289 friend class object;
290 friend class array;
291 friend class value;
292 friend class raw_json_string;
293 friend class parser;
294 friend class value_iterator;
295 friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept;
296 friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept;
297}; // json_iterator
298
299} // namespace ondemand
300} // namespace SIMDJSON_IMPLEMENTATION
301} // namespace simdjson
302
303namespace simdjson {
304
305template<>
306struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator> {
307public:
308 simdjson_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private
309 simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
310
311 simdjson_inline simdjson_result() noexcept = default;
312};
313
314} // namespace simdjson
315