1 | namespace simdjson { |
2 | namespace SIMDJSON_IMPLEMENTATION { |
3 | namespace ondemand { |
4 | |
5 | class document; |
6 | class document_stream; |
7 | class object; |
8 | class array; |
9 | class value; |
10 | class raw_json_string; |
11 | class parser; |
12 | |
13 | /** |
14 | * Iterates through JSON tokens, keeping track of depth and string buffer. |
15 | * |
16 | * @private This is not intended for external use. |
17 | */ |
18 | class json_iterator { |
19 | protected: |
20 | token_iterator token{}; |
21 | ondemand::parser *parser{}; |
22 | /** |
23 | * Next free location in the string buffer. |
24 | * |
25 | * Used by raw_json_string::unescape() to have a place to unescape strings to. |
26 | */ |
27 | uint8_t *_string_buf_loc{}; |
28 | /** |
29 | * JSON error, if there is one. |
30 | * |
31 | * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. |
32 | * |
33 | * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first |
34 | * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If |
35 | * this is not elided, we should make sure it's at least not using up a register. Failing that, |
36 | * we should store it in document so there's only one of them. |
37 | */ |
38 | error_code error{SUCCESS}; |
39 | /** |
40 | * Depth of the current token in the JSON. |
41 | * |
42 | * - 0 = finished with document |
43 | * - 1 = document root value (could be [ or {, not yet known) |
44 | * - 2 = , or } inside root array/object |
45 | * - 3 = key or value inside root array/object. |
46 | */ |
47 | depth_t _depth{}; |
48 | /** |
49 | * Beginning of the document indexes. |
50 | * Normally we have root == parser->implementation->structural_indexes.get() |
51 | * but this may differ, especially in streaming mode (where we have several |
52 | * documents); |
53 | */ |
54 | token_position _root{}; |
55 | /** |
56 | * Normally, a json_iterator operates over a single document, but in |
57 | * some cases, we may have a stream of documents. This attribute is meant |
58 | * as meta-data: the json_iterator works the same irrespective of the |
59 | * value of this attribute. |
60 | */ |
61 | bool _streaming{false}; |
62 | |
63 | public: |
64 | simdjson_inline json_iterator() noexcept = default; |
65 | simdjson_inline json_iterator(json_iterator &&other) noexcept; |
66 | simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; |
67 | simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; |
68 | simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; |
69 | /** |
70 | * Skips a JSON value, whether it is a scalar, array or object. |
71 | */ |
72 | simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; |
73 | |
74 | /** |
75 | * Tell whether the iterator is still at the start |
76 | */ |
77 | simdjson_inline bool at_root() const noexcept; |
78 | |
79 | /** |
80 | * Tell whether we should be expected to run in streaming |
81 | * mode (iterating over many documents). It is pure metadata |
82 | * that does not affect how the iterator works. It is used by |
83 | * start_root_array() and start_root_object(). |
84 | */ |
85 | simdjson_inline bool streaming() const noexcept; |
86 | |
87 | /** |
88 | * Get the root value iterator |
89 | */ |
90 | simdjson_inline token_position root_position() const noexcept; |
91 | /** |
92 | * Assert that we are at the document depth (== 1) |
93 | */ |
94 | simdjson_inline void assert_at_document_depth() const noexcept; |
95 | /** |
96 | * Assert that we are at the root of the document |
97 | */ |
98 | simdjson_inline void assert_at_root() const noexcept; |
99 | |
100 | /** |
101 | * Tell whether the iterator is at the EOF mark |
102 | */ |
103 | simdjson_inline bool at_end() const noexcept; |
104 | |
105 | /** |
106 | * Tell whether the iterator is live (has not been moved). |
107 | */ |
108 | simdjson_inline bool is_alive() const noexcept; |
109 | |
110 | /** |
111 | * Abandon this iterator, setting depth to 0 (as if the document is finished). |
112 | */ |
113 | simdjson_inline void abandon() noexcept; |
114 | |
115 | /** |
116 | * Advance the current token without modifying depth. |
117 | */ |
118 | simdjson_inline const uint8_t *return_current_and_advance() noexcept; |
119 | |
120 | /** |
121 | * Returns true if there is a single token in the index (i.e., it is |
122 | * a JSON with a scalar value such as a single number). |
123 | * |
124 | * @return whether there is a single token |
125 | */ |
126 | simdjson_inline bool is_single_token() const noexcept; |
127 | |
128 | /** |
129 | * Assert that there are at least the given number of tokens left. |
130 | * |
131 | * Has no effect in release builds. |
132 | */ |
133 | simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; |
134 | /** |
135 | * Assert that the given position addresses an actual token (is within bounds). |
136 | * |
137 | * Has no effect in release builds. |
138 | */ |
139 | simdjson_inline void assert_valid_position(token_position position) const noexcept; |
140 | /** |
141 | * Get the JSON text for a given token (relative). |
142 | * |
143 | * This is not null-terminated; it is a view into the JSON. |
144 | * |
145 | * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. |
146 | * |
147 | * TODO consider a string_view, assuming the length will get stripped out by the optimizer when |
148 | * it isn't used ... |
149 | */ |
150 | simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; |
151 | /** |
152 | * Get the maximum length of the JSON text for the current token (or relative). |
153 | * |
154 | * The length will include any whitespace at the end of the token. |
155 | * |
156 | * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. |
157 | */ |
158 | simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; |
159 | /** |
160 | * Get a pointer to the current location in the input buffer. |
161 | * |
162 | * This is not null-terminated; it is a view into the JSON. |
163 | * |
164 | * You may be pointing outside of the input buffer: it is not generally |
165 | * safe to dereference this pointer. |
166 | */ |
167 | simdjson_inline const uint8_t *unsafe_pointer() const noexcept; |
168 | /** |
169 | * Get the JSON text for a given token. |
170 | * |
171 | * This is not null-terminated; it is a view into the JSON. |
172 | * |
173 | * @param position The position of the token to retrieve. |
174 | * |
175 | * TODO consider a string_view, assuming the length will get stripped out by the optimizer when |
176 | * it isn't used ... |
177 | */ |
178 | simdjson_inline const uint8_t *peek(token_position position) const noexcept; |
179 | /** |
180 | * Get the maximum length of the JSON text for the current token (or relative). |
181 | * |
182 | * The length will include any whitespace at the end of the token. |
183 | * |
184 | * @param position The position of the token to retrieve. |
185 | */ |
186 | simdjson_inline uint32_t peek_length(token_position position) const noexcept; |
187 | /** |
188 | * Get the JSON text for the last token in the document. |
189 | * |
190 | * This is not null-terminated; it is a view into the JSON. |
191 | * |
192 | * TODO consider a string_view, assuming the length will get stripped out by the optimizer when |
193 | * it isn't used ... |
194 | */ |
195 | simdjson_inline const uint8_t *peek_last() const noexcept; |
196 | |
197 | /** |
198 | * Ascend one level. |
199 | * |
200 | * Validates that the depth - 1 == parent_depth. |
201 | * |
202 | * @param parent_depth the expected parent depth. |
203 | */ |
204 | simdjson_inline void ascend_to(depth_t parent_depth) noexcept; |
205 | |
206 | /** |
207 | * Descend one level. |
208 | * |
209 | * Validates that the new depth == child_depth. |
210 | * |
211 | * @param child_depth the expected child depth. |
212 | */ |
213 | simdjson_inline void descend_to(depth_t child_depth) noexcept; |
214 | simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; |
215 | |
216 | /** |
217 | * Get current depth. |
218 | */ |
219 | simdjson_inline depth_t depth() const noexcept; |
220 | |
221 | /** |
222 | * Get current (writeable) location in the string buffer. |
223 | */ |
224 | simdjson_inline uint8_t *&string_buf_loc() noexcept; |
225 | |
226 | /** |
227 | * Report an unrecoverable error, preventing further iteration. |
228 | * |
229 | * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. |
230 | * @param message An error message to report with the error. |
231 | */ |
232 | simdjson_inline error_code report_error(error_code error, const char *message) noexcept; |
233 | |
234 | /** |
235 | * Log error, but don't stop iteration. |
236 | * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. |
237 | * @param message An error message to report with the error. |
238 | */ |
239 | simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; |
240 | |
241 | template<int N> simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept; |
242 | |
243 | simdjson_inline token_position position() const noexcept; |
244 | /** |
245 | * Write the raw_json_string to the string buffer and return a string_view. |
246 | * Each raw_json_string should be unescaped once, or else the string buffer might |
247 | * overflow. |
248 | */ |
249 | simdjson_inline simdjson_result<std::string_view> unescape(raw_json_string in, bool allow_replacement) noexcept; |
250 | simdjson_inline simdjson_result<std::string_view> unescape_wobbly(raw_json_string in) noexcept; |
251 | simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; |
252 | |
253 | #if SIMDJSON_DEVELOPMENT_CHECKS |
254 | simdjson_inline token_position start_position(depth_t depth) const noexcept; |
255 | simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; |
256 | #endif |
257 | |
258 | /* Useful for debugging and logging purposes. */ |
259 | inline std::string to_string() const noexcept; |
260 | |
261 | /** |
262 | * Returns the current location in the document if in bounds. |
263 | */ |
264 | inline simdjson_result<const char *> current_location() noexcept; |
265 | |
266 | /** |
267 | * Updates this json iterator so that it is back at the beginning of the document, |
268 | * as if it had just been created. |
269 | */ |
270 | inline void rewind() noexcept; |
271 | /** |
272 | * This checks whether the {,},[,] are balanced so that the document |
273 | * ends with proper zero depth. This requires scanning the whole document |
274 | * and it may be expensive. It is expected that it will be rarely called. |
275 | * It does not attempt to match { with } and [ with ]. |
276 | */ |
277 | inline bool balanced() const noexcept; |
278 | protected: |
279 | simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; |
280 | /// The last token before the end |
281 | simdjson_inline token_position last_position() const noexcept; |
282 | /// The token *at* the end. This points at gibberish and should only be used for comparison. |
283 | simdjson_inline token_position end_position() const noexcept; |
284 | /// The end of the buffer. |
285 | simdjson_inline token_position end() const noexcept; |
286 | |
287 | friend class document; |
288 | friend class document_stream; |
289 | friend class object; |
290 | friend class array; |
291 | friend class value; |
292 | friend class raw_json_string; |
293 | friend class parser; |
294 | friend class value_iterator; |
295 | friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; |
296 | friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; |
297 | }; // json_iterator |
298 | |
299 | } // namespace ondemand |
300 | } // namespace SIMDJSON_IMPLEMENTATION |
301 | } // namespace simdjson |
302 | |
303 | namespace simdjson { |
304 | |
305 | template<> |
306 | struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator> { |
307 | public: |
308 | simdjson_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private |
309 | simdjson_inline simdjson_result(error_code error) noexcept; ///< @private |
310 | |
311 | simdjson_inline simdjson_result() noexcept = default; |
312 | }; |
313 | |
314 | } // namespace simdjson |
315 | |