1 | #ifndef SIMDJSON_DOM_PARSER_H |
2 | #define SIMDJSON_DOM_PARSER_H |
3 | |
4 | #include "simdjson/common_defs.h" |
5 | #include "simdjson/dom/document.h" |
6 | #include "simdjson/error.h" |
7 | #include "simdjson/internal/dom_parser_implementation.h" |
8 | #include "simdjson/internal/tape_ref.h" |
9 | #include "simdjson/padded_string.h" |
10 | #include "simdjson/portability.h" |
11 | #include <memory> |
12 | #include <ostream> |
13 | #include <string> |
14 | |
15 | namespace simdjson { |
16 | |
17 | namespace dom { |
18 | |
19 | class document_stream; |
20 | class element; |
21 | |
22 | /** The default batch size for parser.parse_many() and parser.load_many() */ |
23 | static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; |
24 | /** |
25 | * Some adversary might try to set the batch size to 0 or 1, which might cause problems. |
26 | * We set a minimum of 32B since anything else is highly likely to be an error. In practice, |
27 | * most users will want a much larger batch size. |
28 | * |
29 | * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON |
30 | * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. |
31 | */ |
32 | static constexpr size_t MINIMAL_BATCH_SIZE = 32; |
33 | |
34 | /** |
35 | * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes). |
36 | */ |
37 | static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32; |
38 | |
39 | /** |
40 | * A persistent document parser. |
41 | * |
42 | * The parser is designed to be reused, holding the internal buffers necessary to do parsing, |
43 | * as well as memory for a single document. The parsed document is overwritten on each parse. |
44 | * |
45 | * This class cannot be copied, only moved, to avoid unintended allocations. |
46 | * |
47 | * @note Moving a parser instance may invalidate "dom::element" instances. If you need to |
48 | * preserve both the "dom::element" instances and the parser, consider wrapping the parser |
49 | * instance in a std::unique_ptr instance: |
50 | * |
51 | * std::unique_ptr<dom::parser> parser(new dom::parser{}); |
52 | * auto error = parser->load(f).get(root); |
53 | * |
54 | * You can then move std::unique_ptr safely. |
55 | * |
56 | * @note This is not thread safe: one parser cannot produce two documents at the same time! |
57 | */ |
58 | class parser { |
59 | public: |
60 | /** |
61 | * Create a JSON parser. |
62 | * |
63 | * The new parser will have zero capacity. |
64 | * |
65 | * @param max_capacity The maximum document length the parser can automatically handle. The parser |
66 | * will allocate more capacity on an as needed basis (when it sees documents too big to handle) |
67 | * up to this amount. The parser still starts with zero capacity no matter what this number is: |
68 | * to allocate an initial capacity, call allocate() after constructing the parser. |
69 | * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). |
70 | */ |
71 | simdjson_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; |
72 | /** |
73 | * Take another parser's buffers and state. |
74 | * |
75 | * @param other The parser to take. Its capacity is zeroed. |
76 | */ |
77 | simdjson_inline parser(parser &&other) noexcept; |
78 | parser(const parser &) = delete; ///< @private Disallow copying |
79 | /** |
80 | * Take another parser's buffers and state. |
81 | * |
82 | * @param other The parser to take. Its capacity is zeroed. |
83 | */ |
84 | simdjson_inline parser &operator=(parser &&other) noexcept; |
85 | parser &operator=(const parser &) = delete; ///< @private Disallow copying |
86 | |
87 | /** Deallocate the JSON parser. */ |
88 | ~parser()=default; |
89 | |
90 | /** |
91 | * Load a JSON document from a file and return a reference to it. |
92 | * |
93 | * dom::parser parser; |
94 | * const element doc = parser.load("jsonexamples/twitter.json"); |
95 | * |
96 | * The function is eager: the file's content is loaded in memory inside the parser instance |
97 | * and immediately parsed. The file can be deleted after the `parser.load` call. |
98 | * |
99 | * ### IMPORTANT: Document Lifetime |
100 | * |
101 | * The JSON document still lives in the parser: this is the most efficient way to parse JSON |
102 | * documents because it reuses the same buffers, but you *must* use the document before you |
103 | * destroy the parser or call parse() again. |
104 | * |
105 | * Moving the parser instance is safe, but it invalidates the element instances. You may store |
106 | * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like |
107 | * so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`. |
108 | * |
109 | * ### Parser Capacity |
110 | * |
111 | * If the parser's current capacity is less than the file length, it will allocate enough capacity |
112 | * to handle it (up to max_capacity). |
113 | * |
114 | * @param path The path to load. |
115 | * @return The document, or an error: |
116 | * - IO_ERROR if there was an error opening or reading the file. |
117 | * Be mindful that on some 32-bit systems, |
118 | * the file size might be limited to 2 GB. |
119 | * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. |
120 | * - CAPACITY if the parser does not have enough capacity and len > max_capacity. |
121 | * - other json errors if parsing fails. You should not rely on these errors to always the same for the |
122 | * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). |
123 | */ |
124 | inline simdjson_result<element> load(const std::string &path) & noexcept; |
125 | inline simdjson_result<element> load(const std::string &path) && = delete ; |
126 | /** |
127 | * Parse a JSON document and return a temporary reference to it. |
128 | * |
129 | * dom::parser parser; |
130 | * element doc_root = parser.parse(buf, len); |
131 | * |
132 | * The function eagerly parses the input: the input can be modified and discarded after |
133 | * the `parser.parse(buf, len)` call has completed. |
134 | * |
135 | * ### IMPORTANT: Document Lifetime |
136 | * |
137 | * The JSON document still lives in the parser: this is the most efficient way to parse JSON |
138 | * documents because it reuses the same buffers, but you *must* use the document before you |
139 | * destroy the parser or call parse() again. |
140 | * |
141 | * Moving the parser instance is safe, but it invalidates the element instances. You may store |
142 | * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like |
143 | * so: `std::unique_ptr<dom::parser> parser(new dom::parser{});`. |
144 | * |
145 | * ### REQUIRED: Buffer Padding |
146 | * |
147 | * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what |
148 | * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you |
149 | * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the |
150 | * SIMDJSON_PADDING bytes to avoid runtime warnings. |
151 | * |
152 | * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding, |
153 | * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: |
154 | * |
155 | * const char *json = R"({"key":"value"})"; |
156 | * const size_t json_len = std::strlen(json); |
157 | * simdjson::dom::parser parser; |
158 | * simdjson::dom::element element = parser.parse(json, json_len); |
159 | * |
160 | * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)), |
161 | * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end. |
162 | * The benefit of setting realloc_if_needed to false is that you avoid a temporary |
163 | * memory allocation and a copy. |
164 | * |
165 | * The padded bytes may be read. It is not important how you initialize |
166 | * these bytes though we recommend a sensible default like null character values or spaces. |
167 | * For example, the following low-level code is safe: |
168 | * |
169 | * const char *json = R"({"key":"value"})"; |
170 | * const size_t json_len = std::strlen(json); |
171 | * std::unique_ptr<char[]> padded_json_copy{new char[json_len + SIMDJSON_PADDING]}; |
172 | * std::memcpy(padded_json_copy.get(), json, json_len); |
173 | * std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING); |
174 | * simdjson::dom::parser parser; |
175 | * simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false); |
176 | * |
177 | * ### Parser Capacity |
178 | * |
179 | * If the parser's current capacity is less than len, it will allocate enough capacity |
180 | * to handle it (up to max_capacity). |
181 | * |
182 | * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless |
183 | * realloc_if_needed is true. |
184 | * @param len The length of the JSON. |
185 | * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. |
186 | * @return An element pointing at the root of the document, or an error: |
187 | * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, |
188 | * and memory allocation fails. |
189 | * - CAPACITY if the parser does not have enough capacity and len > max_capacity. |
190 | * - other json errors if parsing fails. You should not rely on these errors to always the same for the |
191 | * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). |
192 | */ |
193 | inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; |
194 | inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; |
195 | /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ |
196 | simdjson_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; |
197 | simdjson_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; |
198 | /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ |
199 | simdjson_inline simdjson_result<element> parse(const std::string &s) & noexcept; |
200 | simdjson_inline simdjson_result<element> parse(const std::string &s) && =delete; |
201 | /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ |
202 | simdjson_inline simdjson_result<element> parse(const padded_string &s) & noexcept; |
203 | simdjson_inline simdjson_result<element> parse(const padded_string &s) && =delete; |
204 | /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ |
205 | simdjson_inline simdjson_result<element> parse(const padded_string_view &v) & noexcept; |
206 | simdjson_inline simdjson_result<element> parse(const padded_string_view &v) && =delete; |
207 | |
208 | /** @private We do not want to allow implicit conversion from C string to std::string. */ |
209 | simdjson_inline simdjson_result<element> parse(const char *buf) noexcept = delete; |
210 | |
211 | /** |
212 | * Parse a JSON document into a provide document instance and return a temporary reference to it. |
213 | * It is similar to the function `parse` except that instead of parsing into the internal |
214 | * `document` instance associated with the parser, it allows the user to provide a document |
215 | * instance. |
216 | * |
217 | * dom::parser parser; |
218 | * dom::document doc; |
219 | * element doc_root = parser.parse_into_document(doc, buf, len); |
220 | * |
221 | * The function eagerly parses the input: the input can be modified and discarded after |
222 | * the `parser.parse(buf, len)` call has completed. |
223 | * |
224 | * ### IMPORTANT: Document Lifetime |
225 | * |
226 | * After the call to parse_into_document, the parser is no longer needed. |
227 | * |
228 | * The JSON document lives in the document instance: you must keep the document |
229 | * instance alive while you navigate through it (i.e., used the returned value from |
230 | * parse_into_document). You are encourage to reuse the document instance |
231 | * many times with new data to avoid reallocations: |
232 | * |
233 | * dom::document doc; |
234 | * element doc_root1 = parser.parse_into_document(doc, buf1, len); |
235 | * //... doc_root1 is a pointer inside doc |
236 | * element doc_root2 = parser.parse_into_document(doc, buf1, len); |
237 | * //... doc_root2 is a pointer inside doc |
238 | * // at this point doc_root1 is no longer safe |
239 | * |
240 | * Moving the document instance is safe, but it invalidates the element instances. After |
241 | * moving a document, you can recover safe access to the document root with its `root()` method. |
242 | * |
243 | * @param doc The document instance where the parsed data will be stored (on success). |
244 | * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless |
245 | * realloc_if_needed is true. |
246 | * @param len The length of the JSON. |
247 | * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. |
248 | * @return An element pointing at the root of document, or an error: |
249 | * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, |
250 | * and memory allocation fails. |
251 | * - CAPACITY if the parser does not have enough capacity and len > max_capacity. |
252 | * - other json errors if parsing fails. You should not rely on these errors to always the same for the |
253 | * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). |
254 | */ |
255 | inline simdjson_result<element> parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; |
256 | inline simdjson_result<element> parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; |
257 | /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ |
258 | simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; |
259 | simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete; |
260 | /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ |
261 | simdjson_inline simdjson_result<element> parse_into_document(document& doc, const std::string &s) & noexcept; |
262 | simdjson_inline simdjson_result<element> parse_into_document(document& doc, const std::string &s) && =delete; |
263 | /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ |
264 | simdjson_inline simdjson_result<element> parse_into_document(document& doc, const padded_string &s) & noexcept; |
265 | simdjson_inline simdjson_result<element> parse_into_document(document& doc, const padded_string &s) && =delete; |
266 | |
267 | /** @private We do not want to allow implicit conversion from C string to std::string. */ |
268 | simdjson_inline simdjson_result<element> parse_into_document(document& doc, const char *buf) noexcept = delete; |
269 | |
270 | /** |
271 | * Load a file containing many JSON documents. |
272 | * |
273 | * dom::parser parser; |
274 | * for (const element doc : parser.load_many(path)) { |
275 | * cout << std::string(doc["title"]) << endl; |
276 | * } |
277 | * |
278 | * The file is loaded in memory and can be safely deleted after the `parser.load_many(path)` |
279 | * function has returned. The memory is held by the `parser` instance. |
280 | * |
281 | * The function is lazy: it may be that no more than one JSON document at a time is parsed. |
282 | * And, possibly, no document many have been parsed when the `parser.load_many(path)` function |
283 | * returned. |
284 | * |
285 | * ### Format |
286 | * |
287 | * The file must contain a series of one or more JSON documents, concatenated into a single |
288 | * buffer, separated by whitespace. It effectively parses until it has a fully valid document, |
289 | * then starts parsing the next document at that point. (It does this with more parallelism and |
290 | * lookahead than you might think, though.) |
291 | * |
292 | * Documents that consist of an object or array may omit the whitespace between them, concatenating |
293 | * with no separator. documents that consist of a single primitive (i.e. documents that are not |
294 | * arrays or objects) MUST be separated with whitespace. |
295 | * |
296 | * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. |
297 | * Setting batch_size to excessively large or excesively small values may impact negatively the |
298 | * performance. |
299 | * |
300 | * ### Error Handling |
301 | * |
302 | * All errors are returned during iteration: if there is a global error such as memory allocation, |
303 | * it will be yielded as the first result. Iteration always stops after the first error. |
304 | * |
305 | * As with all other simdjson methods, non-exception error handling is readily available through |
306 | * the same interface, requiring you to check the error before using the document: |
307 | * |
308 | * dom::parser parser; |
309 | * dom::document_stream docs; |
310 | * auto error = parser.load_many(path).get(docs); |
311 | * if (error) { cerr << error << endl; exit(1); } |
312 | * for (auto doc : docs) { |
313 | * std::string_view title; |
314 | * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } |
315 | * cout << title << endl; |
316 | * } |
317 | * |
318 | * ### Threads |
319 | * |
320 | * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the |
321 | * hood to do some lookahead. |
322 | * |
323 | * ### Parser Capacity |
324 | * |
325 | * If the parser's current capacity is less than batch_size, it will allocate enough capacity |
326 | * to handle it (up to max_capacity). |
327 | * |
328 | * @param path File name pointing at the concatenated JSON to parse. |
329 | * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet |
330 | * spot is cache-related: small enough to fit in cache, yet big enough to |
331 | * parse as many documents as possible in one tight loop. |
332 | * Defaults to 1MB (as simdjson::dom::DEFAULT_BATCH_SIZE), which has been a reasonable sweet |
333 | * spot in our tests. |
334 | * If you set the batch_size to a value smaller than simdjson::dom::MINIMAL_BATCH_SIZE |
335 | * (currently 32B), it will be replaced by simdjson::dom::MINIMAL_BATCH_SIZE. |
336 | * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: |
337 | * - IO_ERROR if there was an error opening or reading the file. |
338 | * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. |
339 | * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. |
340 | * - other json errors if parsing fails. You should not rely on these errors to always the same for the |
341 | * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). |
342 | */ |
343 | inline simdjson_result<document_stream> load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; |
344 | |
345 | /** |
346 | * Parse a buffer containing many JSON documents. |
347 | * |
348 | * dom::parser parser; |
349 | * for (element doc : parser.parse_many(buf, len)) { |
350 | * cout << std::string(doc["title"]) << endl; |
351 | * } |
352 | * |
353 | * No copy of the input buffer is made. |
354 | * |
355 | * The function is lazy: it may be that no more than one JSON document at a time is parsed. |
356 | * And, possibly, no document many have been parsed when the `parser.load_many(path)` function |
357 | * returned. |
358 | * |
359 | * The caller is responsabile to ensure that the input string data remains unchanged and is |
360 | * not deleted during the loop. In particular, the following is unsafe and will not compile: |
361 | * |
362 | * auto docs = parser.parse_many("[\"temporary data\"]"_padded); |
363 | * // here the string "[\"temporary data\"]" may no longer exist in memory |
364 | * // the parser instance may not have even accessed the input yet |
365 | * for (element doc : docs) { |
366 | * cout << std::string(doc["title"]) << endl; |
367 | * } |
368 | * |
369 | * The following is safe: |
370 | * |
371 | * auto json = "[\"temporary data\"]"_padded; |
372 | * auto docs = parser.parse_many(json); |
373 | * for (element doc : docs) { |
374 | * cout << std::string(doc["title"]) << endl; |
375 | * } |
376 | * |
377 | * ### Format |
378 | * |
379 | * The buffer must contain a series of one or more JSON documents, concatenated into a single |
380 | * buffer, separated by whitespace. It effectively parses until it has a fully valid document, |
381 | * then starts parsing the next document at that point. (It does this with more parallelism and |
382 | * lookahead than you might think, though.) |
383 | * |
384 | * documents that consist of an object or array may omit the whitespace between them, concatenating |
385 | * with no separator. documents that consist of a single primitive (i.e. documents that are not |
386 | * arrays or objects) MUST be separated with whitespace. |
387 | * |
388 | * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. |
389 | * Setting batch_size to excessively large or excesively small values may impact negatively the |
390 | * performance. |
391 | * |
392 | * ### Error Handling |
393 | * |
394 | * All errors are returned during iteration: if there is a global error such as memory allocation, |
395 | * it will be yielded as the first result. Iteration always stops after the first error. |
396 | * |
397 | * As with all other simdjson methods, non-exception error handling is readily available through |
398 | * the same interface, requiring you to check the error before using the document: |
399 | * |
400 | * dom::parser parser; |
401 | * dom::document_stream docs; |
402 | * auto error = parser.load_many(path).get(docs); |
403 | * if (error) { cerr << error << endl; exit(1); } |
404 | * for (auto doc : docs) { |
405 | * std::string_view title; |
406 | * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } |
407 | * cout << title << endl; |
408 | * } |
409 | * |
410 | * ### REQUIRED: Buffer Padding |
411 | * |
412 | * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what |
413 | * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you |
414 | * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the |
415 | * SIMDJSON_PADDING bytes to avoid runtime warnings. |
416 | * |
417 | * ### Threads |
418 | * |
419 | * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the |
420 | * hood to do some lookahead. |
421 | * |
422 | * ### Parser Capacity |
423 | * |
424 | * If the parser's current capacity is less than batch_size, it will allocate enough capacity |
425 | * to handle it (up to max_capacity). |
426 | * |
427 | * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. |
428 | * @param len The length of the concatenated JSON. |
429 | * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet |
430 | * spot is cache-related: small enough to fit in cache, yet big enough to |
431 | * parse as many documents as possible in one tight loop. |
432 | * Defaults to 10MB, which has been a reasonable sweet spot in our tests. |
433 | * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: |
434 | * - MEMALLOC if the parser does not have enough capacity and memory allocation fails |
435 | * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. |
436 | * - other json errors if parsing fails. You should not rely on these errors to always the same for the |
437 | * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). |
438 | */ |
439 | inline simdjson_result<document_stream> parse_many(const uint8_t *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; |
440 | /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ |
441 | inline simdjson_result<document_stream> parse_many(const char *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; |
442 | /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ |
443 | inline simdjson_result<document_stream> parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; |
444 | inline simdjson_result<document_stream> parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe |
445 | /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ |
446 | inline simdjson_result<document_stream> parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; |
447 | inline simdjson_result<document_stream> parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe |
448 | |
449 | /** @private We do not want to allow implicit conversion from C string to std::string. */ |
450 | simdjson_result<document_stream> parse_many(const char *buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept = delete; |
451 | |
452 | /** |
453 | * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length |
454 | * and `max_depth` depth. |
455 | * |
456 | * @param capacity The new capacity. |
457 | * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. |
458 | * @return The error, if there is one. |
459 | */ |
460 | simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; |
461 | |
462 | #ifndef SIMDJSON_DISABLE_DEPRECATED_API |
463 | /** |
464 | * @private deprecated because it returns bool instead of error_code, which is our standard for |
465 | * failures. Use allocate() instead. |
466 | * |
467 | * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length |
468 | * and `max_depth` depth. |
469 | * |
470 | * @param capacity The new capacity. |
471 | * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. |
472 | * @return true if successful, false if allocation failed. |
473 | */ |
474 | [[deprecated("Use allocate() instead." )]] |
475 | simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; |
476 | #endif // SIMDJSON_DISABLE_DEPRECATED_API |
477 | /** |
478 | * The largest document this parser can support without reallocating. |
479 | * |
480 | * @return Current capacity, in bytes. |
481 | */ |
482 | simdjson_inline size_t capacity() const noexcept; |
483 | |
484 | /** |
485 | * The largest document this parser can automatically support. |
486 | * |
487 | * The parser may reallocate internal buffers as needed up to this amount. |
488 | * |
489 | * @return Maximum capacity, in bytes. |
490 | */ |
491 | simdjson_inline size_t max_capacity() const noexcept; |
492 | |
493 | /** |
494 | * The maximum level of nested object and arrays supported by this parser. |
495 | * |
496 | * @return Maximum depth, in bytes. |
497 | */ |
498 | simdjson_inline size_t max_depth() const noexcept; |
499 | |
500 | /** |
501 | * Set max_capacity. This is the largest document this parser can automatically support. |
502 | * |
503 | * The parser may reallocate internal buffers as needed up to this amount as documents are passed |
504 | * to it. |
505 | * |
506 | * Note: To avoid limiting the memory to an absurd value, such as zero or two bytes, |
507 | * iff you try to set max_capacity to a value lower than MINIMAL_DOCUMENT_CAPACITY, |
508 | * then the maximal capacity is set to MINIMAL_DOCUMENT_CAPACITY. |
509 | * |
510 | * This call will not allocate or deallocate, even if capacity is currently above max_capacity. |
511 | * |
512 | * @param max_capacity The new maximum capacity, in bytes. |
513 | */ |
514 | simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; |
515 | |
516 | #ifdef SIMDJSON_THREADS_ENABLED |
517 | /** |
518 | * The parser instance can use threads when they are available to speed up some |
519 | * operations. It is enabled by default. Changing this attribute will change the |
520 | * behavior of the parser for future operations. |
521 | */ |
522 | bool threaded{true}; |
523 | #endif |
524 | /** @private Use the new DOM API instead */ |
525 | class Iterator; |
526 | /** @private Use simdjson_error instead */ |
527 | using InvalidJSON [[deprecated("Use simdjson_error instead" )]] = simdjson_error; |
528 | |
529 | /** @private [for benchmarking access] The implementation to use */ |
530 | std::unique_ptr<internal::dom_parser_implementation> implementation{}; |
531 | |
532 | /** @private Use `if (parser.parse(...).error())` instead */ |
533 | bool valid{false}; |
534 | /** @private Use `parser.parse(...).error()` instead */ |
535 | error_code error{UNINITIALIZED}; |
536 | |
537 | /** @private Use `parser.parse(...).value()` instead */ |
538 | document doc{}; |
539 | |
540 | /** @private returns true if the document parsed was valid */ |
541 | [[deprecated("Use the result of parser.parse() instead" )]] |
542 | inline bool is_valid() const noexcept; |
543 | |
544 | /** |
545 | * @private return an error code corresponding to the last parsing attempt, see |
546 | * simdjson.h will return UNINITIALIZED if no parsing was attempted |
547 | */ |
548 | [[deprecated("Use the result of parser.parse() instead" )]] |
549 | inline int get_error_code() const noexcept; |
550 | |
551 | /** @private return the string equivalent of "get_error_code" */ |
552 | [[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error" )]] |
553 | inline std::string get_error_message() const noexcept; |
554 | |
555 | /** @private */ |
556 | [[deprecated("Use cout << on the result of parser.parse() instead" )]] |
557 | inline bool print_json(std::ostream &os) const noexcept; |
558 | |
559 | /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */ |
560 | inline bool dump_raw_tape(std::ostream &os) const noexcept; |
561 | |
562 | |
563 | private: |
564 | /** |
565 | * The maximum document length this parser will automatically support. |
566 | * |
567 | * The parser will not be automatically allocated above this amount. |
568 | */ |
569 | size_t _max_capacity; |
570 | |
571 | /** |
572 | * The loaded buffer (reused each time load() is called) |
573 | */ |
574 | std::unique_ptr<char[]> loaded_bytes; |
575 | |
576 | /** Capacity of loaded_bytes buffer. */ |
577 | size_t _loaded_bytes_capacity{0}; |
578 | |
579 | // all nodes are stored on the doc.tape using a 64-bit word. |
580 | // |
581 | // strings, double and ints are stored as |
582 | // a 64-bit word with a pointer to the actual value |
583 | // |
584 | // |
585 | // |
586 | // for objects or arrays, store [ or { at the beginning and } and ] at the |
587 | // end. For the openings ([ or {), we annotate them with a reference to the |
588 | // location on the doc.tape of the end, and for then closings (} and ]), we |
589 | // annotate them with a reference to the location of the opening |
590 | // |
591 | // |
592 | |
593 | /** |
594 | * Ensure we have enough capacity to handle at least desired_capacity bytes, |
595 | * and auto-allocate if not. This also allocates memory if needed in the |
596 | * internal document. |
597 | */ |
598 | inline error_code ensure_capacity(size_t desired_capacity) noexcept; |
599 | /** |
600 | * Ensure we have enough capacity to handle at least desired_capacity bytes, |
601 | * and auto-allocate if not. This also allocates memory if needed in the |
602 | * provided document. |
603 | */ |
604 | inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept; |
605 | |
606 | /** Read the file into loaded_bytes */ |
607 | inline simdjson_result<size_t> read_file(const std::string &path) noexcept; |
608 | |
609 | friend class parser::Iterator; |
610 | friend class document_stream; |
611 | |
612 | |
613 | }; // class parser |
614 | |
615 | } // namespace dom |
616 | } // namespace simdjson |
617 | |
618 | #endif // SIMDJSON_DOM_PARSER_H |
619 | |