1#ifndef SIMDJSON_DOM_PARSER_H
2#define SIMDJSON_DOM_PARSER_H
3
4#include "simdjson/common_defs.h"
5#include "simdjson/dom/document.h"
6#include "simdjson/error.h"
7#include "simdjson/internal/dom_parser_implementation.h"
8#include "simdjson/internal/tape_ref.h"
9#include "simdjson/padded_string.h"
10#include "simdjson/portability.h"
11#include <memory>
12#include <ostream>
13#include <string>
14
15namespace simdjson {
16
17namespace dom {
18
19class document_stream;
20class element;
21
22/** The default batch size for parser.parse_many() and parser.load_many() */
23static constexpr size_t DEFAULT_BATCH_SIZE = 1000000;
24/**
25 * Some adversary might try to set the batch size to 0 or 1, which might cause problems.
26 * We set a minimum of 32B since anything else is highly likely to be an error. In practice,
27 * most users will want a much larger batch size.
28 *
29 * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON
30 * document can ever span 0 or 1 byte and that very large values would create memory allocation issues.
31 */
32static constexpr size_t MINIMAL_BATCH_SIZE = 32;
33
34/**
35 * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes).
36 */
37static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32;
38
39/**
40 * A persistent document parser.
41 *
42 * The parser is designed to be reused, holding the internal buffers necessary to do parsing,
43 * as well as memory for a single document. The parsed document is overwritten on each parse.
44 *
45 * This class cannot be copied, only moved, to avoid unintended allocations.
46 *
47 * @note Moving a parser instance may invalidate "dom::element" instances. If you need to
48 * preserve both the "dom::element" instances and the parser, consider wrapping the parser
49 * instance in a std::unique_ptr instance:
50 *
51 * std::unique_ptr<dom::parser> parser(new dom::parser{});
52 * auto error = parser->load(f).get(root);
53 *
54 * You can then move std::unique_ptr safely.
55 *
56 * @note This is not thread safe: one parser cannot produce two documents at the same time!
57 */
58class parser {
59public:
60 /**
61 * Create a JSON parser.
62 *
63 * The new parser will have zero capacity.
64 *
65 * @param max_capacity The maximum document length the parser can automatically handle. The parser
66 * will allocate more capacity on an as needed basis (when it sees documents too big to handle)
67 * up to this amount. The parser still starts with zero capacity no matter what this number is:
68 * to allocate an initial capacity, call allocate() after constructing the parser.
69 * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
70 */
71 simdjson_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept;
72 /**
73 * Take another parser's buffers and state.
74 *
75 * @param other The parser to take. Its capacity is zeroed.
76 */
77 simdjson_inline parser(parser &&other) noexcept;
78 parser(const parser &) = delete; ///< @private Disallow copying
79 /**
80 * Take another parser's buffers and state.
81 *
82 * @param other The parser to take. Its capacity is zeroed.
83 */
84 simdjson_inline parser &operator=(parser &&other) noexcept;
85 parser &operator=(const parser &) = delete; ///< @private Disallow copying
86
87 /** Deallocate the JSON parser. */
88 ~parser()=default;
89
90 /**
91 * Load a JSON document from a file and return a reference to it.
92 *
93 * dom::parser parser;
94 * const element doc = parser.load("jsonexamples/twitter.json");
95 *
96 * The function is eager: the file's content is loaded in memory inside the parser instance
97 * and immediately parsed. The file can be deleted after the `parser.load` call.
98 *
99 * ### IMPORTANT: Document Lifetime
100 *
101 * The JSON document still lives in the parser: this is the most efficient way to parse JSON
102 * documents because it reuses the same buffers, but you *must* use the document before you
103 * destroy the parser or call parse() again.
104 *
105 * Moving the parser instance is safe, but it invalidates the element instances. You may store
106 * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
107 * so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
108 *
109 * ### Parser Capacity
110 *
111 * If the parser's current capacity is less than the file length, it will allocate enough capacity
112 * to handle it (up to max_capacity).
113 *
114 * @param path The path to load.
115 * @return The document, or an error:
116 * - IO_ERROR if there was an error opening or reading the file.
117 * Be mindful that on some 32-bit systems,
118 * the file size might be limited to 2 GB.
119 * - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
120 * - CAPACITY if the parser does not have enough capacity and len > max_capacity.
121 * - other json errors if parsing fails. You should not rely on these errors to always the same for the
122 * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
123 */
124 inline simdjson_result<element> load(const std::string &path) & noexcept;
125 inline simdjson_result<element> load(const std::string &path) && = delete ;
126 /**
127 * Parse a JSON document and return a temporary reference to it.
128 *
129 * dom::parser parser;
130 * element doc_root = parser.parse(buf, len);
131 *
132 * The function eagerly parses the input: the input can be modified and discarded after
133 * the `parser.parse(buf, len)` call has completed.
134 *
135 * ### IMPORTANT: Document Lifetime
136 *
137 * The JSON document still lives in the parser: this is the most efficient way to parse JSON
138 * documents because it reuses the same buffers, but you *must* use the document before you
139 * destroy the parser or call parse() again.
140 *
141 * Moving the parser instance is safe, but it invalidates the element instances. You may store
142 * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
143 * so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
144 *
145 * ### REQUIRED: Buffer Padding
146 *
147 * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
148 * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you
149 * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the
150 * SIMDJSON_PADDING bytes to avoid runtime warnings.
151 *
152 * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding,
153 * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe:
154 *
155 * const char *json = R"({"key":"value"})";
156 * const size_t json_len = std::strlen(json);
157 * simdjson::dom::parser parser;
158 * simdjson::dom::element element = parser.parse(json, json_len);
159 *
160 * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)),
161 * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end.
162 * The benefit of setting realloc_if_needed to false is that you avoid a temporary
163 * memory allocation and a copy.
164 *
165 * The padded bytes may be read. It is not important how you initialize
166 * these bytes though we recommend a sensible default like null character values or spaces.
167 * For example, the following low-level code is safe:
168 *
169 * const char *json = R"({"key":"value"})";
170 * const size_t json_len = std::strlen(json);
171 * std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
172 * std::memcpy(padded_json_copy.get(), json, json_len);
173 * std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING);
174 * simdjson::dom::parser parser;
175 * simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
176 *
177 * ### Parser Capacity
178 *
179 * If the parser's current capacity is less than len, it will allocate enough capacity
180 * to handle it (up to max_capacity).
181 *
182 * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
183 * realloc_if_needed is true.
184 * @param len The length of the JSON.
185 * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
186 * @return An element pointing at the root of the document, or an error:
187 * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
188 * and memory allocation fails.
189 * - CAPACITY if the parser does not have enough capacity and len > max_capacity.
190 * - other json errors if parsing fails. You should not rely on these errors to always the same for the
191 * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
192 */
193 inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept;
194 inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete;
195 /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
196 simdjson_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept;
197 simdjson_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete;
198 /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
199 simdjson_inline simdjson_result<element> parse(const std::string &s) & noexcept;
200 simdjson_inline simdjson_result<element> parse(const std::string &s) && =delete;
201 /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
202 simdjson_inline simdjson_result<element> parse(const padded_string &s) & noexcept;
203 simdjson_inline simdjson_result<element> parse(const padded_string &s) && =delete;
204 /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
205 simdjson_inline simdjson_result<element> parse(const padded_string_view &v) & noexcept;
206 simdjson_inline simdjson_result<element> parse(const padded_string_view &v) && =delete;
207
208 /** @private We do not want to allow implicit conversion from C string to std::string. */
209 simdjson_inline simdjson_result<element> parse(const char *buf) noexcept = delete;
210
211 /**
212 * Parse a JSON document into a provide document instance and return a temporary reference to it.
213 * It is similar to the function `parse` except that instead of parsing into the internal
214 * `document` instance associated with the parser, it allows the user to provide a document
215 * instance.
216 *
217 * dom::parser parser;
218 * dom::document doc;
219 * element doc_root = parser.parse_into_document(doc, buf, len);
220 *
221 * The function eagerly parses the input: the input can be modified and discarded after
222 * the `parser.parse(buf, len)` call has completed.
223 *
224 * ### IMPORTANT: Document Lifetime
225 *
226 * After the call to parse_into_document, the parser is no longer needed.
227 *
228 * The JSON document lives in the document instance: you must keep the document
229 * instance alive while you navigate through it (i.e., used the returned value from
230 * parse_into_document). You are encourage to reuse the document instance
231 * many times with new data to avoid reallocations:
232 *
233 * dom::document doc;
234 * element doc_root1 = parser.parse_into_document(doc, buf1, len);
235 * //... doc_root1 is a pointer inside doc
236 * element doc_root2 = parser.parse_into_document(doc, buf1, len);
237 * //... doc_root2 is a pointer inside doc
238 * // at this point doc_root1 is no longer safe
239 *
240 * Moving the document instance is safe, but it invalidates the element instances. After
241 * moving a document, you can recover safe access to the document root with its `root()` method.
242 *
243 * @param doc The document instance where the parsed data will be stored (on success).
244 * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
245 * realloc_if_needed is true.
246 * @param len The length of the JSON.
247 * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
248 * @return An element pointing at the root of document, or an error:
249 * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
250 * and memory allocation fails.
251 * - CAPACITY if the parser does not have enough capacity and len > max_capacity.
252 * - other json errors if parsing fails. You should not rely on these errors to always the same for the
253 * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
254 */
255 inline simdjson_result<element> parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept;
256 inline simdjson_result<element> parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete;
257 /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */
258 simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept;
259 simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete;
260 /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */
261 simdjson_inline simdjson_result<element> parse_into_document(document& doc, const std::string &s) & noexcept;
262 simdjson_inline simdjson_result<element> parse_into_document(document& doc, const std::string &s) && =delete;
263 /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */
264 simdjson_inline simdjson_result<element> parse_into_document(document& doc, const padded_string &s) & noexcept;
265 simdjson_inline simdjson_result<element> parse_into_document(document& doc, const padded_string &s) && =delete;
266
267 /** @private We do not want to allow implicit conversion from C string to std::string. */
268 simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf) noexcept = delete;
269
270 /**
271 * Load a file containing many JSON documents.
272 *
273 * dom::parser parser;
274 * for (const element doc : parser.load_many(path)) {
275 * cout << std::string(doc["title"]) << endl;
276 * }
277 *
278 * The file is loaded in memory and can be safely deleted after the `parser.load_many(path)`
279 * function has returned. The memory is held by the `parser` instance.
280 *
281 * The function is lazy: it may be that no more than one JSON document at a time is parsed.
282 * And, possibly, no document many have been parsed when the `parser.load_many(path)` function
283 * returned.
284 *
285 * ### Format
286 *
287 * The file must contain a series of one or more JSON documents, concatenated into a single
288 * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
289 * then starts parsing the next document at that point. (It does this with more parallelism and
290 * lookahead than you might think, though.)
291 *
292 * Documents that consist of an object or array may omit the whitespace between them, concatenating
293 * with no separator. documents that consist of a single primitive (i.e. documents that are not
294 * arrays or objects) MUST be separated with whitespace.
295 *
296 * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
297 * Setting batch_size to excessively large or excesively small values may impact negatively the
298 * performance.
299 *
300 * ### Error Handling
301 *
302 * All errors are returned during iteration: if there is a global error such as memory allocation,
303 * it will be yielded as the first result. Iteration always stops after the first error.
304 *
305 * As with all other simdjson methods, non-exception error handling is readily available through
306 * the same interface, requiring you to check the error before using the document:
307 *
308 * dom::parser parser;
309 * dom::document_stream docs;
310 * auto error = parser.load_many(path).get(docs);
311 * if (error) { cerr << error << endl; exit(1); }
312 * for (auto doc : docs) {
313 * std::string_view title;
314 * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); }
315 * cout << title << endl;
316 * }
317 *
318 * ### Threads
319 *
320 * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
321 * hood to do some lookahead.
322 *
323 * ### Parser Capacity
324 *
325 * If the parser's current capacity is less than batch_size, it will allocate enough capacity
326 * to handle it (up to max_capacity).
327 *
328 * @param path File name pointing at the concatenated JSON to parse.
329 * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
330 * spot is cache-related: small enough to fit in cache, yet big enough to
331 * parse as many documents as possible in one tight loop.
332 * Defaults to 1MB (as simdjson::dom::DEFAULT_BATCH_SIZE), which has been a reasonable sweet
333 * spot in our tests.
334 * If you set the batch_size to a value smaller than simdjson::dom::MINIMAL_BATCH_SIZE
335 * (currently 32B), it will be replaced by simdjson::dom::MINIMAL_BATCH_SIZE.
336 * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors:
337 * - IO_ERROR if there was an error opening or reading the file.
338 * - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
339 * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
340 * - other json errors if parsing fails. You should not rely on these errors to always the same for the
341 * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
342 */
343 inline simdjson_result<document_stream> load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
344
345 /**
346 * Parse a buffer containing many JSON documents.
347 *
348 * dom::parser parser;
349 * for (element doc : parser.parse_many(buf, len)) {
350 * cout << std::string(doc["title"]) << endl;
351 * }
352 *
353 * No copy of the input buffer is made.
354 *
355 * The function is lazy: it may be that no more than one JSON document at a time is parsed.
356 * And, possibly, no document many have been parsed when the `parser.load_many(path)` function
357 * returned.
358 *
359 * The caller is responsabile to ensure that the input string data remains unchanged and is
360 * not deleted during the loop. In particular, the following is unsafe and will not compile:
361 *
362 * auto docs = parser.parse_many("[\"temporary data\"]"_padded);
363 * // here the string "[\"temporary data\"]" may no longer exist in memory
364 * // the parser instance may not have even accessed the input yet
365 * for (element doc : docs) {
366 * cout << std::string(doc["title"]) << endl;
367 * }
368 *
369 * The following is safe:
370 *
371 * auto json = "[\"temporary data\"]"_padded;
372 * auto docs = parser.parse_many(json);
373 * for (element doc : docs) {
374 * cout << std::string(doc["title"]) << endl;
375 * }
376 *
377 * ### Format
378 *
379 * The buffer must contain a series of one or more JSON documents, concatenated into a single
380 * buffer, separated by whitespace. It effectively parses until it has a fully valid document,
381 * then starts parsing the next document at that point. (It does this with more parallelism and
382 * lookahead than you might think, though.)
383 *
384 * documents that consist of an object or array may omit the whitespace between them, concatenating
385 * with no separator. documents that consist of a single primitive (i.e. documents that are not
386 * arrays or objects) MUST be separated with whitespace.
387 *
388 * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
389 * Setting batch_size to excessively large or excesively small values may impact negatively the
390 * performance.
391 *
392 * ### Error Handling
393 *
394 * All errors are returned during iteration: if there is a global error such as memory allocation,
395 * it will be yielded as the first result. Iteration always stops after the first error.
396 *
397 * As with all other simdjson methods, non-exception error handling is readily available through
398 * the same interface, requiring you to check the error before using the document:
399 *
400 * dom::parser parser;
401 * dom::document_stream docs;
402 * auto error = parser.load_many(path).get(docs);
403 * if (error) { cerr << error << endl; exit(1); }
404 * for (auto doc : docs) {
405 * std::string_view title;
406 * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); }
407 * cout << title << endl;
408 * }
409 *
410 * ### REQUIRED: Buffer Padding
411 *
412 * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
413 * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you
414 * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the
415 * SIMDJSON_PADDING bytes to avoid runtime warnings.
416 *
417 * ### Threads
418 *
419 * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
420 * hood to do some lookahead.
421 *
422 * ### Parser Capacity
423 *
424 * If the parser's current capacity is less than batch_size, it will allocate enough capacity
425 * to handle it (up to max_capacity).
426 *
427 * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
428 * @param len The length of the concatenated JSON.
429 * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
430 * spot is cache-related: small enough to fit in cache, yet big enough to
431 * parse as many documents as possible in one tight loop.
432 * Defaults to 10MB, which has been a reasonable sweet spot in our tests.
433 * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors:
434 * - MEMALLOC if the parser does not have enough capacity and memory allocation fails
435 * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
436 * - other json errors if parsing fails. You should not rely on these errors to always the same for the
437 * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
438 */
439 inline simdjson_result<document_stream> parse_many(const uint8_t *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
440 /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
441 inline simdjson_result<document_stream> parse_many(const char *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
442 /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
443 inline simdjson_result<document_stream> parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
444 inline simdjson_result<document_stream> parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe
445 /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
446 inline simdjson_result<document_stream> parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
447 inline simdjson_result<document_stream> parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe
448
449 /** @private We do not want to allow implicit conversion from C string to std::string. */
450 simdjson_result<document_stream> parse_many(const char *buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept = delete;
451
452 /**
453 * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
454 * and `max_depth` depth.
455 *
456 * @param capacity The new capacity.
457 * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
458 * @return The error, if there is one.
459 */
460 simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
461
462#ifndef SIMDJSON_DISABLE_DEPRECATED_API
463 /**
464 * @private deprecated because it returns bool instead of error_code, which is our standard for
465 * failures. Use allocate() instead.
466 *
467 * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
468 * and `max_depth` depth.
469 *
470 * @param capacity The new capacity.
471 * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
472 * @return true if successful, false if allocation failed.
473 */
474 [[deprecated("Use allocate() instead.")]]
475 simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
476#endif // SIMDJSON_DISABLE_DEPRECATED_API
477 /**
478 * The largest document this parser can support without reallocating.
479 *
480 * @return Current capacity, in bytes.
481 */
482 simdjson_inline size_t capacity() const noexcept;
483
484 /**
485 * The largest document this parser can automatically support.
486 *
487 * The parser may reallocate internal buffers as needed up to this amount.
488 *
489 * @return Maximum capacity, in bytes.
490 */
491 simdjson_inline size_t max_capacity() const noexcept;
492
493 /**
494 * The maximum level of nested object and arrays supported by this parser.
495 *
496 * @return Maximum depth, in bytes.
497 */
498 simdjson_inline size_t max_depth() const noexcept;
499
500 /**
501 * Set max_capacity. This is the largest document this parser can automatically support.
502 *
503 * The parser may reallocate internal buffers as needed up to this amount as documents are passed
504 * to it.
505 *
506 * Note: To avoid limiting the memory to an absurd value, such as zero or two bytes,
507 * iff you try to set max_capacity to a value lower than MINIMAL_DOCUMENT_CAPACITY,
508 * then the maximal capacity is set to MINIMAL_DOCUMENT_CAPACITY.
509 *
510 * This call will not allocate or deallocate, even if capacity is currently above max_capacity.
511 *
512 * @param max_capacity The new maximum capacity, in bytes.
513 */
514 simdjson_inline void set_max_capacity(size_t max_capacity) noexcept;
515
516#ifdef SIMDJSON_THREADS_ENABLED
517 /**
518 * The parser instance can use threads when they are available to speed up some
519 * operations. It is enabled by default. Changing this attribute will change the
520 * behavior of the parser for future operations.
521 */
522 bool threaded{true};
523#endif
524 /** @private Use the new DOM API instead */
525 class Iterator;
526 /** @private Use simdjson_error instead */
527 using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error;
528
529 /** @private [for benchmarking access] The implementation to use */
530 std::unique_ptr<internal::dom_parser_implementation> implementation{};
531
532 /** @private Use `if (parser.parse(...).error())` instead */
533 bool valid{false};
534 /** @private Use `parser.parse(...).error()` instead */
535 error_code error{UNINITIALIZED};
536
537 /** @private Use `parser.parse(...).value()` instead */
538 document doc{};
539
540 /** @private returns true if the document parsed was valid */
541 [[deprecated("Use the result of parser.parse() instead")]]
542 inline bool is_valid() const noexcept;
543
544 /**
545 * @private return an error code corresponding to the last parsing attempt, see
546 * simdjson.h will return UNINITIALIZED if no parsing was attempted
547 */
548 [[deprecated("Use the result of parser.parse() instead")]]
549 inline int get_error_code() const noexcept;
550
551 /** @private return the string equivalent of "get_error_code" */
552 [[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]]
553 inline std::string get_error_message() const noexcept;
554
555 /** @private */
556 [[deprecated("Use cout << on the result of parser.parse() instead")]]
557 inline bool print_json(std::ostream &os) const noexcept;
558
559 /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */
560 inline bool dump_raw_tape(std::ostream &os) const noexcept;
561
562
563private:
564 /**
565 * The maximum document length this parser will automatically support.
566 *
567 * The parser will not be automatically allocated above this amount.
568 */
569 size_t _max_capacity;
570
571 /**
572 * The loaded buffer (reused each time load() is called)
573 */
574 std::unique_ptr<char[]> loaded_bytes;
575
576 /** Capacity of loaded_bytes buffer. */
577 size_t _loaded_bytes_capacity{0};
578
579 // all nodes are stored on the doc.tape using a 64-bit word.
580 //
581 // strings, double and ints are stored as
582 // a 64-bit word with a pointer to the actual value
583 //
584 //
585 //
586 // for objects or arrays, store [ or { at the beginning and } and ] at the
587 // end. For the openings ([ or {), we annotate them with a reference to the
588 // location on the doc.tape of the end, and for then closings (} and ]), we
589 // annotate them with a reference to the location of the opening
590 //
591 //
592
593 /**
594 * Ensure we have enough capacity to handle at least desired_capacity bytes,
595 * and auto-allocate if not. This also allocates memory if needed in the
596 * internal document.
597 */
598 inline error_code ensure_capacity(size_t desired_capacity) noexcept;
599 /**
600 * Ensure we have enough capacity to handle at least desired_capacity bytes,
601 * and auto-allocate if not. This also allocates memory if needed in the
602 * provided document.
603 */
604 inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept;
605
606 /** Read the file into loaded_bytes */
607 inline simdjson_result<size_t> read_file(const std::string &path) noexcept;
608
609 friend class parser::Iterator;
610 friend class document_stream;
611
612
613}; // class parser
614
615} // namespace dom
616} // namespace simdjson
617
618#endif // SIMDJSON_DOM_PARSER_H
619