parser.h source code [Velox/build/_deps/simdjson-src/include/simdjson/dom/parser.h]

1	#ifndef SIMDJSON_DOM_PARSER_H
2	#define SIMDJSON_DOM_PARSER_H
3
4	#include "simdjson/common_defs.h"
5	#include "simdjson/dom/document.h"
6	#include "simdjson/error.h"
7	#include "simdjson/internal/dom_parser_implementation.h"
8	#include "simdjson/internal/tape_ref.h"
9	#include "simdjson/padded_string.h"
10	#include "simdjson/portability.h"
11	#include <memory>
12	#include <ostream>
13	#include <string>
14
15	namespace simdjson {
16
17	namespace dom {
18
19	class document_stream;
20	class element;
21
22	/* The default batch size for parser.parse_many() and parser.load_many() /
23	static constexpr size_t DEFAULT_BATCH_SIZE = `1000000`;
24	/**
25	* Some adversary might try to set the batch size to 0 or 1, which might cause problems.
26	* We set a minimum of 32B since anything else is highly likely to be an error. In practice,
27	* most users will want a much larger batch size.
28	*
29	* All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON
30	* document can ever span 0 or 1 byte and that very large values would create memory allocation issues.
31	*/
32	static constexpr size_t MINIMAL_BATCH_SIZE = `32`;
33
34	/**
35	* It is wasteful to allocate memory for tiny documents (e.g., 4 bytes).
36	*/
37	static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = `32`;
38
39	/**
40	* A persistent document parser.
41	*
42	* The parser is designed to be reused, holding the internal buffers necessary to do parsing,
43	* as well as memory for a single document. The parsed document is overwritten on each parse.
44	*
45	* This class cannot be copied, only moved, to avoid unintended allocations.
46	*
47	* @note Moving a parser instance may invalidate "dom::element" instances. If you need to
48	* preserve both the "dom::element" instances and the parser, consider wrapping the parser
49	* instance in a std::unique_ptr instance:
50	*
51	* std::unique_ptr<dom::parser> parser(new dom::parser{});
52	* auto error = parser->load(f).get(root);
53	*
54	* You can then move std::unique_ptr safely.
55	*
56	* @note This is not thread safe: one parser cannot produce two documents at the same time!
57	*/
58	class parser {
59	public:
60	/**
61	* Create a JSON parser.
62	*
63	* The new parser will have zero capacity.
64	*
65	* @param max_capacity The maximum document length the parser can automatically handle. The parser
66	* will allocate more capacity on an as needed basis (when it sees documents too big to handle)
67	* up to this amount. The parser still starts with zero capacity no matter what this number is:
68	* to allocate an initial capacity, call allocate() after constructing the parser.
69	* Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
70	*/
71	simdjson_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept;
72	/**
73	* Take another parser's buffers and state.
74	*
75	* @param other The parser to take. Its capacity is zeroed.
76	*/
77	simdjson_inline parser(parser &&other) noexcept;
78	parser(const parser &) = delete; ///< @private Disallow copying
79	/**
80	* Take another parser's buffers and state.
81	*
82	* @param other The parser to take. Its capacity is zeroed.
83	*/
84	simdjson_inline parser &operator=(parser &&other) noexcept;
85	parser &operator=(const parser &) = delete; ///< @private Disallow copying
86
87	/* Deallocate the JSON parser. /
88	~parser()=default;
89
90	/**
91	* Load a JSON document from a file and return a reference to it.
92	*
93	* dom::parser parser;
94	* const element doc = parser.load("jsonexamples/twitter.json");
95	*
96	* The function is eager: the file's content is loaded in memory inside the parser instance
97	* and immediately parsed. The file can be deleted after the `parser.load` call.
98	*
99	* ### IMPORTANT: Document Lifetime
100	*
101	* The JSON document still lives in the parser: this is the most efficient way to parse JSON
102	* documents because it reuses the same buffers, but you must use the document before you
103	* destroy the parser or call parse() again.
104	*
105	* Moving the parser instance is safe, but it invalidates the element instances. You may store
106	* the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
107	* so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
108	*
109	* ### Parser Capacity
110	*
111	* If the parser's current capacity is less than the file length, it will allocate enough capacity
112	* to handle it (up to max_capacity).
113	*
114	* @param path The path to load.
115	* @return The document, or an error:
116	* - IO_ERROR if there was an error opening or reading the file.
117	* Be mindful that on some 32-bit systems,
118	* the file size might be limited to 2 GB.
119	* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
120	* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
121	* - other json errors if parsing fails. You should not rely on these errors to always the same for the
122	* same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
123	*/
124	inline simdjson_result<element> load(const std::string &path) & noexcept;
125	inline simdjson_result<element> load(const std::string &path) && = delete ;
126	/**
127	* Parse a JSON document and return a temporary reference to it.
128	*
129	* dom::parser parser;
130	* element doc_root = parser.parse(buf, len);
131	*
132	* The function eagerly parses the input: the input can be modified and discarded after
133	* the `parser.parse(buf, len)` call has completed.
134	*
135	* ### IMPORTANT: Document Lifetime
136	*
137	* The JSON document still lives in the parser: this is the most efficient way to parse JSON
138	* documents because it reuses the same buffers, but you must use the document before you
139	* destroy the parser or call parse() again.
140	*
141	* Moving the parser instance is safe, but it invalidates the element instances. You may store
142	* the parser instance without moving it by wrapping it inside an `unique_ptr` instance like
143	* so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`.
144	*
145	* ### REQUIRED: Buffer Padding
146	*
147	* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
148	* those bytes are initialized to, as long as they are allocated. These bytes will be read: if you
149	* using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the
150	* SIMDJSON_PADDING bytes to avoid runtime warnings.
151	*
152	* If realloc_if_needed is true (the default), it is assumed that the buffer does not have enough padding,
153	* and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe:
154	*
155	* const char *json = R"({"key":"value"})";
156	* const size_t json_len = std::strlen(json);
157	* simdjson::dom::parser parser;
158	* simdjson::dom::element element = parser.parse(json, json_len);
159	*
160	* If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)),
161	* you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end.
162	* The benefit of setting realloc_if_needed to false is that you avoid a temporary
163	* memory allocation and a copy.
164	*
165	* The padded bytes may be read. It is not important how you initialize
166	* these bytes though we recommend a sensible default like null character values or spaces.
167	* For example, the following low-level code is safe:
168	*
169	* const char *json = R"({"key":"value"})";
170	* const size_t json_len = std::strlen(json);
171	* std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]};
172	* std::memcpy(padded_json_copy.get(), json, json_len);
173	* std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING);
174	* simdjson::dom::parser parser;
175	* simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false);
176	*
177	* ### Parser Capacity
178	*
179	* If the parser's current capacity is less than len, it will allocate enough capacity
180	* to handle it (up to max_capacity).
181	*
182	* @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
183	* realloc_if_needed is true.
184	* @param len The length of the JSON.
185	* @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
186	* @return An element pointing at the root of the document, or an error:
187	* - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
188	* and memory allocation fails.
189	* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
190	* - other json errors if parsing fails. You should not rely on these errors to always the same for the
191	* same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
192	*/
193	inline simdjson_result<element> parse(const uint8_t buf, size_t len, bool* realloc_if_needed = true) & noexcept;
194	inline simdjson_result<element> parse(const uint8_t buf, size_t len, bool* realloc_if_needed = true) && =delete;
195	/* @overload parse(const uint8_t buf, size_t len, bool realloc_if_needed) /*
196	simdjson_inline simdjson_result<element> parse(const char buf, size_t len, bool* realloc_if_needed = true) & noexcept;
197	simdjson_inline simdjson_result<element> parse(const char buf, size_t len, bool* realloc_if_needed = true) && =delete;
198	/* @overload parse(const uint8_t buf, size_t len, bool realloc_if_needed) /*
199	simdjson_inline simdjson_result<element> parse(const std::string &s) & noexcept;
200	simdjson_inline simdjson_result<element> parse(const std::string &s) && =delete;
201	/* @overload parse(const uint8_t buf, size_t len, bool realloc_if_needed) /*
202	simdjson_inline simdjson_result<element> parse(const padded_string &s) & noexcept;
203	simdjson_inline simdjson_result<element> parse(const padded_string &s) && =delete;
204	/* @overload parse(const uint8_t buf, size_t len, bool realloc_if_needed) /*
205	simdjson_inline simdjson_result<element> parse(const padded_string_view &v) & noexcept;
206	simdjson_inline simdjson_result<element> parse(const padded_string_view &v) && =delete;
207
208	/* @private We do not want to allow implicit conversion from C string to std::string. /
209	simdjson_inline simdjson_result<element> parse(const char buf) noexcept* = delete;
210
211	/**
212	* Parse a JSON document into a provide document instance and return a temporary reference to it.
213	* It is similar to the function `parse` except that instead of parsing into the internal
214	* `document` instance associated with the parser, it allows the user to provide a document
215	* instance.
216	*
217	* dom::parser parser;
218	* dom::document doc;
219	* element doc_root = parser.parse_into_document(doc, buf, len);
220	*
221	* The function eagerly parses the input: the input can be modified and discarded after
222	* the `parser.parse(buf, len)` call has completed.
223	*
224	* ### IMPORTANT: Document Lifetime
225	*
226	* After the call to parse_into_document, the parser is no longer needed.
227	*
228	* The JSON document lives in the document instance: you must keep the document
229	* instance alive while you navigate through it (i.e., used the returned value from
230	* parse_into_document). You are encourage to reuse the document instance
231	* many times with new data to avoid reallocations:
232	*
233	* dom::document doc;
234	* element doc_root1 = parser.parse_into_document(doc, buf1, len);
235	* //... doc_root1 is a pointer inside doc
236	* element doc_root2 = parser.parse_into_document(doc, buf1, len);
237	* //... doc_root2 is a pointer inside doc
238	* // at this point doc_root1 is no longer safe
239	*
240	* Moving the document instance is safe, but it invalidates the element instances. After
241	* moving a document, you can recover safe access to the document root with its `root()` method.
242	*
243	* @param doc The document instance where the parsed data will be stored (on success).
244	* @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
245	* realloc_if_needed is true.
246	* @param len The length of the JSON.
247	* @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
248	* @return An element pointing at the root of document, or an error:
249	* - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
250	* and memory allocation fails.
251	* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
252	* - other json errors if parsing fails. You should not rely on these errors to always the same for the
253	* same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
254	*/
255	inline simdjson_result<element> parse_into_document(document& doc, const uint8_t buf, size_t len, bool* realloc_if_needed = true) & noexcept;
256	inline simdjson_result<element> parse_into_document(document& doc, const uint8_t buf, size_t len, bool* realloc_if_needed = true) && =delete;
257	/* @overload parse_into_document(const uint8_t buf, size_t len, bool realloc_if_needed) /*
258	simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char buf, size_t len, bool* realloc_if_needed = true) & noexcept;
259	simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char buf, size_t len, bool* realloc_if_needed = true) && =delete;
260	/* @overload parse_into_document(const uint8_t buf, size_t len, bool realloc_if_needed) /*
261	simdjson_inline simdjson_result<element> parse_into_document(document& doc, const std::string &s) & noexcept;
262	simdjson_inline simdjson_result<element> parse_into_document(document& doc, const std::string &s) && =delete;
263	/* @overload parse_into_document(const uint8_t buf, size_t len, bool realloc_if_needed) /*
264	simdjson_inline simdjson_result<element> parse_into_document(document& doc, const padded_string &s) & noexcept;
265	simdjson_inline simdjson_result<element> parse_into_document(document& doc, const padded_string &s) && =delete;
266
267	/* @private We do not want to allow implicit conversion from C string to std::string. /
268	simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char buf) noexcept* = delete;
269
270	/**
271	* Load a file containing many JSON documents.
272	*
273	* dom::parser parser;
274	* for (const element doc : parser.load_many(path)) {
275	* cout << std::string(doc["title"]) << endl;
276	* }
277	*
278	* The file is loaded in memory and can be safely deleted after the `parser.load_many(path)`
279	* function has returned. The memory is held by the `parser` instance.
280	*
281	* The function is lazy: it may be that no more than one JSON document at a time is parsed.
282	* And, possibly, no document many have been parsed when the `parser.load_many(path)` function
283	* returned.
284	*
285	* ### Format
286	*
287	* The file must contain a series of one or more JSON documents, concatenated into a single
288	* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
289	* then starts parsing the next document at that point. (It does this with more parallelism and
290	* lookahead than you might think, though.)
291	*
292	* Documents that consist of an object or array may omit the whitespace between them, concatenating
293	* with no separator. documents that consist of a single primitive (i.e. documents that are not
294	* arrays or objects) MUST be separated with whitespace.
295	*
296	* The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
297	* Setting batch_size to excessively large or excesively small values may impact negatively the
298	* performance.
299	*
300	* ### Error Handling
301	*
302	* All errors are returned during iteration: if there is a global error such as memory allocation,
303	* it will be yielded as the first result. Iteration always stops after the first error.
304	*
305	* As with all other simdjson methods, non-exception error handling is readily available through
306	* the same interface, requiring you to check the error before using the document:
307	*
308	* dom::parser parser;
309	* dom::document_stream docs;
310	* auto error = parser.load_many(path).get(docs);
311	* if (error) { cerr << error << endl; exit(1); }
312	* for (auto doc : docs) {
313	* std::string_view title;
314	* if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); }
315	* cout << title << endl;
316	* }
317	*
318	* ### Threads
319	*
320	* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
321	* hood to do some lookahead.
322	*
323	* ### Parser Capacity
324	*
325	* If the parser's current capacity is less than batch_size, it will allocate enough capacity
326	* to handle it (up to max_capacity).
327	*
328	* @param path File name pointing at the concatenated JSON to parse.
329	* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
330	* spot is cache-related: small enough to fit in cache, yet big enough to
331	* parse as many documents as possible in one tight loop.
332	* Defaults to 1MB (as simdjson::dom::DEFAULT_BATCH_SIZE), which has been a reasonable sweet
333	* spot in our tests.
334	* If you set the batch_size to a value smaller than simdjson::dom::MINIMAL_BATCH_SIZE
335	* (currently 32B), it will be replaced by simdjson::dom::MINIMAL_BATCH_SIZE.
336	* @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors:
337	* - IO_ERROR if there was an error opening or reading the file.
338	* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
339	* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
340	* - other json errors if parsing fails. You should not rely on these errors to always the same for the
341	* same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
342	*/
343	inline simdjson_result<document_stream> load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
344
345	/**
346	* Parse a buffer containing many JSON documents.
347	*
348	* dom::parser parser;
349	* for (element doc : parser.parse_many(buf, len)) {
350	* cout << std::string(doc["title"]) << endl;
351	* }
352	*
353	* No copy of the input buffer is made.
354	*
355	* The function is lazy: it may be that no more than one JSON document at a time is parsed.
356	* And, possibly, no document many have been parsed when the `parser.load_many(path)` function
357	* returned.
358	*
359	* The caller is responsabile to ensure that the input string data remains unchanged and is
360	* not deleted during the loop. In particular, the following is unsafe and will not compile:
361	*
362	* auto docs = parser.parse_many("[\"temporary data\"]"_padded);
363	* // here the string "[\"temporary data\"]" may no longer exist in memory
364	* // the parser instance may not have even accessed the input yet
365	* for (element doc : docs) {
366	* cout << std::string(doc["title"]) << endl;
367	* }
368	*
369	* The following is safe:
370	*
371	* auto json = "[\"temporary data\"]"_padded;
372	* auto docs = parser.parse_many(json);
373	* for (element doc : docs) {
374	* cout << std::string(doc["title"]) << endl;
375	* }
376	*
377	* ### Format
378	*
379	* The buffer must contain a series of one or more JSON documents, concatenated into a single
380	* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
381	* then starts parsing the next document at that point. (It does this with more parallelism and
382	* lookahead than you might think, though.)
383	*
384	* documents that consist of an object or array may omit the whitespace between them, concatenating
385	* with no separator. documents that consist of a single primitive (i.e. documents that are not
386	* arrays or objects) MUST be separated with whitespace.
387	*
388	* The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse.
389	* Setting batch_size to excessively large or excesively small values may impact negatively the
390	* performance.
391	*
392	* ### Error Handling
393	*
394	* All errors are returned during iteration: if there is a global error such as memory allocation,
395	* it will be yielded as the first result. Iteration always stops after the first error.
396	*
397	* As with all other simdjson methods, non-exception error handling is readily available through
398	* the same interface, requiring you to check the error before using the document:
399	*
400	* dom::parser parser;
401	* dom::document_stream docs;
402	* auto error = parser.load_many(path).get(docs);
403	* if (error) { cerr << error << endl; exit(1); }
404	* for (auto doc : docs) {
405	* std::string_view title;
406	* if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); }
407	* cout << title << endl;
408	* }
409	*
410	* ### REQUIRED: Buffer Padding
411	*
412	* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
413	* those bytes are initialized to, as long as they are allocated. These bytes will be read: if you
414	* using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the
415	* SIMDJSON_PADDING bytes to avoid runtime warnings.
416	*
417	* ### Threads
418	*
419	* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
420	* hood to do some lookahead.
421	*
422	* ### Parser Capacity
423	*
424	* If the parser's current capacity is less than batch_size, it will allocate enough capacity
425	* to handle it (up to max_capacity).
426	*
427	* @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
428	* @param len The length of the concatenated JSON.
429	* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
430	* spot is cache-related: small enough to fit in cache, yet big enough to
431	* parse as many documents as possible in one tight loop.
432	* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
433	* @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors:
434	* - MEMALLOC if the parser does not have enough capacity and memory allocation fails
435	* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
436	* - other json errors if parsing fails. You should not rely on these errors to always the same for the
437	* same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware).
438	*/
439	inline simdjson_result<document_stream> parse_many(const uint8_t buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept*;
440	/* @overload parse_many(const uint8_t buf, size_t len, size_t batch_size) /*
441	inline simdjson_result<document_stream> parse_many(const char buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept*;
442	/* @overload parse_many(const uint8_t buf, size_t len, size_t batch_size) /*
443	inline simdjson_result<document_stream> parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
444	inline simdjson_result<document_stream> parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe
445	/* @overload parse_many(const uint8_t buf, size_t len, size_t batch_size) /*
446	inline simdjson_result<document_stream> parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept;
447	inline simdjson_result<document_stream> parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe
448
449	/* @private We do not want to allow implicit conversion from C string to std::string. /
450	simdjson_result<document_stream> parse_many(const char buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept* = delete;
451
452	/**
453	* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
454	* and `max_depth` depth.
455	*
456	* @param capacity The new capacity.
457	* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
458	* @return The error, if there is one.
459	*/
460	simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
461
462	#ifndef SIMDJSON_DISABLE_DEPRECATED_API
463	/**
464	* @private deprecated because it returns bool instead of error_code, which is our standard for
465	* failures. Use allocate() instead.
466	*
467	* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
468	* and `max_depth` depth.
469	*
470	* @param capacity The new capacity.
471	* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
472	* @return true if successful, false if allocation failed.
473	*/
474	[[deprecated("Use allocate() instead.")]]
475	simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
476	#endif // SIMDJSON_DISABLE_DEPRECATED_API
477	/**
478	* The largest document this parser can support without reallocating.
479	*
480	* @return Current capacity, in bytes.
481	*/
482	simdjson_inline size_t capacity() const noexcept;
483
484	/**
485	* The largest document this parser can automatically support.
486	*
487	* The parser may reallocate internal buffers as needed up to this amount.
488	*
489	* @return Maximum capacity, in bytes.
490	*/
491	simdjson_inline size_t max_capacity() const noexcept;
492
493	/**
494	* The maximum level of nested object and arrays supported by this parser.
495	*
496	* @return Maximum depth, in bytes.
497	*/
498	simdjson_inline size_t max_depth() const noexcept;
499
500	/**
501	* Set max_capacity. This is the largest document this parser can automatically support.
502	*
503	* The parser may reallocate internal buffers as needed up to this amount as documents are passed
504	* to it.
505	*
506	* Note: To avoid limiting the memory to an absurd value, such as zero or two bytes,
507	* iff you try to set max_capacity to a value lower than MINIMAL_DOCUMENT_CAPACITY,
508	* then the maximal capacity is set to MINIMAL_DOCUMENT_CAPACITY.
509	*
510	* This call will not allocate or deallocate, even if capacity is currently above max_capacity.
511	*
512	* @param max_capacity The new maximum capacity, in bytes.
513	*/
514	simdjson_inline void set_max_capacity(size_t max_capacity) noexcept;
515
516	#ifdef SIMDJSON_THREADS_ENABLED
517	/**
518	* The parser instance can use threads when they are available to speed up some
519	* operations. It is enabled by default. Changing this attribute will change the
520	* behavior of the parser for future operations.
521	*/
522	bool threaded{true};
523	#endif
524	/* @private Use the new DOM API instead /
525	class Iterator;
526	/* @private Use simdjson_error instead /
527	using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error;
528
529	/* @private [for benchmarking access] The implementation to use /
530	std::unique_ptr<internal::dom_parser_implementation> implementation{};
531
532	/* @private Use `if (parser.parse(...).error())` instead /
533	bool valid{false};
534	/* @private Use `parser.parse(...).error()` instead /
535	error_code error{UNINITIALIZED};
536
537	/* @private Use `parser.parse(...).value()` instead /
538	document doc{};
539
540	/* @private returns true if the document parsed was valid /
541	[[deprecated("Use the result of parser.parse() instead")]]
542	inline bool is_valid() const noexcept;
543
544	/**
545	* @private return an error code corresponding to the last parsing attempt, see
546	* simdjson.h will return UNINITIALIZED if no parsing was attempted
547	*/
548	[[deprecated("Use the result of parser.parse() instead")]]
549	inline int get_error_code() const noexcept;
550
551	/* @private return the string equivalent of "get_error_code" /
552	[[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]]
553	inline std::string get_error_message() const noexcept;
554
555	/* @private /
556	[[deprecated("Use cout << on the result of parser.parse() instead")]]
557	inline bool print_json(std::ostream &os) const noexcept;
558
559	/* @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead /
560	inline bool dump_raw_tape(std::ostream &os) const noexcept;
561
562
563	private:
564	/**
565	* The maximum document length this parser will automatically support.
566	*
567	* The parser will not be automatically allocated above this amount.
568	*/
569	size_t _max_capacity;
570
571	/**
572	* The loaded buffer (reused each time load() is called)
573	*/
574	std::unique_ptr<char[]> loaded_bytes;
575
576	/* Capacity of loaded_bytes buffer. /
577	size_t _loaded_bytes_capacity{`0`};
578
579	// all nodes are stored on the doc.tape using a 64-bit word.
580	//
581	// strings, double and ints are stored as
582	// a 64-bit word with a pointer to the actual value
583	//
584	//
585	//
586	// for objects or arrays, store [ or { at the beginning and } and ] at the
587	// end. For the openings ([ or {), we annotate them with a reference to the
588	// location on the doc.tape of the end, and for then closings (} and ]), we
589	// annotate them with a reference to the location of the opening
590	//
591	//
592
593	/**
594	* Ensure we have enough capacity to handle at least desired_capacity bytes,
595	* and auto-allocate if not. This also allocates memory if needed in the
596	* internal document.
597	*/
598	inline error_code ensure_capacity(size_t desired_capacity) noexcept;
599	/**
600	* Ensure we have enough capacity to handle at least desired_capacity bytes,
601	* and auto-allocate if not. This also allocates memory if needed in the
602	* provided document.
603	*/
604	inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept;
605
606	/* Read the file into loaded_bytes /
607	inline simdjson_result<size_t> read_file(const std::string &path) noexcept;
608
609	friend class parser::Iterator;
610	friend class document_stream;
611
612
613	}; // class parser
614
615	} // namespace dom
616	} // namespace simdjson
617
618	#endif // SIMDJSON_DOM_PARSER_H
619

Browse the source code of Velox/build/_deps/simdjson-src/include/simdjson/dom/parser.h