| 1 | namespace simdjson { |
| 2 | namespace SIMDJSON_IMPLEMENTATION { |
| 3 | namespace ondemand { |
| 4 | |
| 5 | class document; |
| 6 | class document_stream; |
| 7 | class object; |
| 8 | class array; |
| 9 | class value; |
| 10 | class raw_json_string; |
| 11 | class parser; |
| 12 | |
| 13 | /** |
| 14 | * Iterates through JSON tokens, keeping track of depth and string buffer. |
| 15 | * |
| 16 | * @private This is not intended for external use. |
| 17 | */ |
| 18 | class json_iterator { |
| 19 | protected: |
| 20 | token_iterator token{}; |
| 21 | ondemand::parser *parser{}; |
| 22 | /** |
| 23 | * Next free location in the string buffer. |
| 24 | * |
| 25 | * Used by raw_json_string::unescape() to have a place to unescape strings to. |
| 26 | */ |
| 27 | uint8_t *_string_buf_loc{}; |
| 28 | /** |
| 29 | * JSON error, if there is one. |
| 30 | * |
| 31 | * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. |
| 32 | * |
| 33 | * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first |
| 34 | * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If |
| 35 | * this is not elided, we should make sure it's at least not using up a register. Failing that, |
| 36 | * we should store it in document so there's only one of them. |
| 37 | */ |
| 38 | error_code error{SUCCESS}; |
| 39 | /** |
| 40 | * Depth of the current token in the JSON. |
| 41 | * |
| 42 | * - 0 = finished with document |
| 43 | * - 1 = document root value (could be [ or {, not yet known) |
| 44 | * - 2 = , or } inside root array/object |
| 45 | * - 3 = key or value inside root array/object. |
| 46 | */ |
| 47 | depth_t _depth{}; |
| 48 | /** |
| 49 | * Beginning of the document indexes. |
| 50 | * Normally we have root == parser->implementation->structural_indexes.get() |
| 51 | * but this may differ, especially in streaming mode (where we have several |
| 52 | * documents); |
| 53 | */ |
| 54 | token_position _root{}; |
| 55 | /** |
| 56 | * Normally, a json_iterator operates over a single document, but in |
| 57 | * some cases, we may have a stream of documents. This attribute is meant |
| 58 | * as meta-data: the json_iterator works the same irrespective of the |
| 59 | * value of this attribute. |
| 60 | */ |
| 61 | bool _streaming{false}; |
| 62 | |
| 63 | public: |
| 64 | simdjson_inline json_iterator() noexcept = default; |
| 65 | simdjson_inline json_iterator(json_iterator &&other) noexcept; |
| 66 | simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; |
| 67 | simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; |
| 68 | simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; |
| 69 | /** |
| 70 | * Skips a JSON value, whether it is a scalar, array or object. |
| 71 | */ |
| 72 | simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; |
| 73 | |
| 74 | /** |
| 75 | * Tell whether the iterator is still at the start |
| 76 | */ |
| 77 | simdjson_inline bool at_root() const noexcept; |
| 78 | |
| 79 | /** |
| 80 | * Tell whether we should be expected to run in streaming |
| 81 | * mode (iterating over many documents). It is pure metadata |
| 82 | * that does not affect how the iterator works. It is used by |
| 83 | * start_root_array() and start_root_object(). |
| 84 | */ |
| 85 | simdjson_inline bool streaming() const noexcept; |
| 86 | |
| 87 | /** |
| 88 | * Get the root value iterator |
| 89 | */ |
| 90 | simdjson_inline token_position root_position() const noexcept; |
| 91 | /** |
| 92 | * Assert that we are at the document depth (== 1) |
| 93 | */ |
| 94 | simdjson_inline void assert_at_document_depth() const noexcept; |
| 95 | /** |
| 96 | * Assert that we are at the root of the document |
| 97 | */ |
| 98 | simdjson_inline void assert_at_root() const noexcept; |
| 99 | |
| 100 | /** |
| 101 | * Tell whether the iterator is at the EOF mark |
| 102 | */ |
| 103 | simdjson_inline bool at_end() const noexcept; |
| 104 | |
| 105 | /** |
| 106 | * Tell whether the iterator is live (has not been moved). |
| 107 | */ |
| 108 | simdjson_inline bool is_alive() const noexcept; |
| 109 | |
| 110 | /** |
| 111 | * Abandon this iterator, setting depth to 0 (as if the document is finished). |
| 112 | */ |
| 113 | simdjson_inline void abandon() noexcept; |
| 114 | |
| 115 | /** |
| 116 | * Advance the current token without modifying depth. |
| 117 | */ |
| 118 | simdjson_inline const uint8_t *return_current_and_advance() noexcept; |
| 119 | |
| 120 | /** |
| 121 | * Returns true if there is a single token in the index (i.e., it is |
| 122 | * a JSON with a scalar value such as a single number). |
| 123 | * |
| 124 | * @return whether there is a single token |
| 125 | */ |
| 126 | simdjson_inline bool is_single_token() const noexcept; |
| 127 | |
| 128 | /** |
| 129 | * Assert that there are at least the given number of tokens left. |
| 130 | * |
| 131 | * Has no effect in release builds. |
| 132 | */ |
| 133 | simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; |
| 134 | /** |
| 135 | * Assert that the given position addresses an actual token (is within bounds). |
| 136 | * |
| 137 | * Has no effect in release builds. |
| 138 | */ |
| 139 | simdjson_inline void assert_valid_position(token_position position) const noexcept; |
| 140 | /** |
| 141 | * Get the JSON text for a given token (relative). |
| 142 | * |
| 143 | * This is not null-terminated; it is a view into the JSON. |
| 144 | * |
| 145 | * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. |
| 146 | * |
| 147 | * TODO consider a string_view, assuming the length will get stripped out by the optimizer when |
| 148 | * it isn't used ... |
| 149 | */ |
| 150 | simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; |
| 151 | /** |
| 152 | * Get the maximum length of the JSON text for the current token (or relative). |
| 153 | * |
| 154 | * The length will include any whitespace at the end of the token. |
| 155 | * |
| 156 | * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. |
| 157 | */ |
| 158 | simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; |
| 159 | /** |
| 160 | * Get a pointer to the current location in the input buffer. |
| 161 | * |
| 162 | * This is not null-terminated; it is a view into the JSON. |
| 163 | * |
| 164 | * You may be pointing outside of the input buffer: it is not generally |
| 165 | * safe to dereference this pointer. |
| 166 | */ |
| 167 | simdjson_inline const uint8_t *unsafe_pointer() const noexcept; |
| 168 | /** |
| 169 | * Get the JSON text for a given token. |
| 170 | * |
| 171 | * This is not null-terminated; it is a view into the JSON. |
| 172 | * |
| 173 | * @param position The position of the token to retrieve. |
| 174 | * |
| 175 | * TODO consider a string_view, assuming the length will get stripped out by the optimizer when |
| 176 | * it isn't used ... |
| 177 | */ |
| 178 | simdjson_inline const uint8_t *peek(token_position position) const noexcept; |
| 179 | /** |
| 180 | * Get the maximum length of the JSON text for the current token (or relative). |
| 181 | * |
| 182 | * The length will include any whitespace at the end of the token. |
| 183 | * |
| 184 | * @param position The position of the token to retrieve. |
| 185 | */ |
| 186 | simdjson_inline uint32_t peek_length(token_position position) const noexcept; |
| 187 | /** |
| 188 | * Get the JSON text for the last token in the document. |
| 189 | * |
| 190 | * This is not null-terminated; it is a view into the JSON. |
| 191 | * |
| 192 | * TODO consider a string_view, assuming the length will get stripped out by the optimizer when |
| 193 | * it isn't used ... |
| 194 | */ |
| 195 | simdjson_inline const uint8_t *peek_last() const noexcept; |
| 196 | |
| 197 | /** |
| 198 | * Ascend one level. |
| 199 | * |
| 200 | * Validates that the depth - 1 == parent_depth. |
| 201 | * |
| 202 | * @param parent_depth the expected parent depth. |
| 203 | */ |
| 204 | simdjson_inline void ascend_to(depth_t parent_depth) noexcept; |
| 205 | |
| 206 | /** |
| 207 | * Descend one level. |
| 208 | * |
| 209 | * Validates that the new depth == child_depth. |
| 210 | * |
| 211 | * @param child_depth the expected child depth. |
| 212 | */ |
| 213 | simdjson_inline void descend_to(depth_t child_depth) noexcept; |
| 214 | simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; |
| 215 | |
| 216 | /** |
| 217 | * Get current depth. |
| 218 | */ |
| 219 | simdjson_inline depth_t depth() const noexcept; |
| 220 | |
| 221 | /** |
| 222 | * Get current (writeable) location in the string buffer. |
| 223 | */ |
| 224 | simdjson_inline uint8_t *&string_buf_loc() noexcept; |
| 225 | |
| 226 | /** |
| 227 | * Report an unrecoverable error, preventing further iteration. |
| 228 | * |
| 229 | * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. |
| 230 | * @param message An error message to report with the error. |
| 231 | */ |
| 232 | simdjson_inline error_code report_error(error_code error, const char *message) noexcept; |
| 233 | |
| 234 | /** |
| 235 | * Log error, but don't stop iteration. |
| 236 | * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. |
| 237 | * @param message An error message to report with the error. |
| 238 | */ |
| 239 | simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; |
| 240 | |
| 241 | template<int N> simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept; |
| 242 | |
| 243 | simdjson_inline token_position position() const noexcept; |
| 244 | /** |
| 245 | * Write the raw_json_string to the string buffer and return a string_view. |
| 246 | * Each raw_json_string should be unescaped once, or else the string buffer might |
| 247 | * overflow. |
| 248 | */ |
| 249 | simdjson_inline simdjson_result<std::string_view> unescape(raw_json_string in, bool allow_replacement) noexcept; |
| 250 | simdjson_inline simdjson_result<std::string_view> unescape_wobbly(raw_json_string in) noexcept; |
| 251 | simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; |
| 252 | |
| 253 | #if SIMDJSON_DEVELOPMENT_CHECKS |
| 254 | simdjson_inline token_position start_position(depth_t depth) const noexcept; |
| 255 | simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; |
| 256 | #endif |
| 257 | |
| 258 | /* Useful for debugging and logging purposes. */ |
| 259 | inline std::string to_string() const noexcept; |
| 260 | |
| 261 | /** |
| 262 | * Returns the current location in the document if in bounds. |
| 263 | */ |
| 264 | inline simdjson_result<const char *> current_location() noexcept; |
| 265 | |
| 266 | /** |
| 267 | * Updates this json iterator so that it is back at the beginning of the document, |
| 268 | * as if it had just been created. |
| 269 | */ |
| 270 | inline void rewind() noexcept; |
| 271 | /** |
| 272 | * This checks whether the {,},[,] are balanced so that the document |
| 273 | * ends with proper zero depth. This requires scanning the whole document |
| 274 | * and it may be expensive. It is expected that it will be rarely called. |
| 275 | * It does not attempt to match { with } and [ with ]. |
| 276 | */ |
| 277 | inline bool balanced() const noexcept; |
| 278 | protected: |
| 279 | simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; |
| 280 | /// The last token before the end |
| 281 | simdjson_inline token_position last_position() const noexcept; |
| 282 | /// The token *at* the end. This points at gibberish and should only be used for comparison. |
| 283 | simdjson_inline token_position end_position() const noexcept; |
| 284 | /// The end of the buffer. |
| 285 | simdjson_inline token_position end() const noexcept; |
| 286 | |
| 287 | friend class document; |
| 288 | friend class document_stream; |
| 289 | friend class object; |
| 290 | friend class array; |
| 291 | friend class value; |
| 292 | friend class raw_json_string; |
| 293 | friend class parser; |
| 294 | friend class value_iterator; |
| 295 | friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; |
| 296 | friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; |
| 297 | }; // json_iterator |
| 298 | |
| 299 | } // namespace ondemand |
| 300 | } // namespace SIMDJSON_IMPLEMENTATION |
| 301 | } // namespace simdjson |
| 302 | |
| 303 | namespace simdjson { |
| 304 | |
| 305 | template<> |
| 306 | struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator> { |
| 307 | public: |
| 308 | simdjson_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private |
| 309 | simdjson_inline simdjson_result(error_code error) noexcept; ///< @private |
| 310 | |
| 311 | simdjson_inline simdjson_result() noexcept = default; |
| 312 | }; |
| 313 | |
| 314 | } // namespace simdjson |
| 315 | |