1#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
2#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
3
4#include "simdjson/common_defs.h"
5#include "simdjson/error.h"
6#include <memory>
7
8namespace simdjson {
9
10namespace dom {
11class document;
12} // namespace dom
13
14/**
15* This enum is used with the dom_parser_implementation::stage1 function.
16* 1) The regular mode expects a fully formed JSON document.
17* 2) The streaming_partial mode expects a possibly truncated
18* input within a stream on JSON documents.
19* 3) The stream_final mode allows us to truncate final
20* unterminated strings. It is useful in conjunction with streaming_partial.
21*/
22enum class stage1_mode { regular, streaming_partial, streaming_final};
23
24/**
25 * Returns true if mode == streaming_partial or mode == streaming_final
26 */
27inline bool is_streaming(stage1_mode mode) {
28 // performance note: it is probably faster to check that mode is different
29 // from regular than checking that it is either streaming_partial or streaming_final.
30 return (mode != stage1_mode::regular);
31 // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final);
32}
33
34
35namespace internal {
36
37
38/**
39 * An implementation of simdjson's DOM parser for a particular CPU architecture.
40 *
41 * This class is expected to be accessed only by pointer, and never move in memory (though the
42 * pointer can move).
43 */
44class dom_parser_implementation {
45public:
46
47 /**
48 * @private For internal implementation use
49 *
50 * Run a full JSON parse on a single document (stage1 + stage2).
51 *
52 * Guaranteed only to be called when capacity > document length.
53 *
54 * Overridden by each implementation.
55 *
56 * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
57 * @param len The length of the json document.
58 * @return The error code, or SUCCESS if there was no error.
59 */
60 simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0;
61
62 /**
63 * @private For internal implementation use
64 *
65 * Stage 1 of the document parser.
66 *
67 * Guaranteed only to be called when capacity > document length.
68 *
69 * Overridden by each implementation.
70 *
71 * @param buf The json document to parse.
72 * @param len The length of the json document.
73 * @param streaming Whether this is being called by parser::parse_many.
74 * @return The error code, or SUCCESS if there was no error.
75 */
76 simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0;
77
78 /**
79 * @private For internal implementation use
80 *
81 * Stage 2 of the document parser.
82 *
83 * Called after stage1().
84 *
85 * Overridden by each implementation.
86 *
87 * @param doc The document to output to.
88 * @return The error code, or SUCCESS if there was no error.
89 */
90 simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0;
91
92 /**
93 * @private For internal implementation use
94 *
95 * Stage 2 of the document parser for parser::parse_many.
96 *
97 * Guaranteed only to be called after stage1().
98 * Overridden by each implementation.
99 *
100 * @param doc The document to output to.
101 * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed.
102 */
103 simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0;
104
105 /**
106 * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
107 * must be an unescaped quote terminating the string. It returns the final output
108 * position as pointer. In case of error (e.g., the string has bad escaped codes),
109 * then null_nullptrptr is returned. It is assumed that the output buffer is large
110 * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
111 * SIMDJSON_PADDING bytes.
112 *
113 * Overridden by each implementation.
114 *
115 * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote.
116 * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size.
117 * @param allow_replacement whether we allow a replacement character when the UTF-8 contains unmatched surrogate pairs.
118 * @return end of the of the written region (exclusive) or nullptr in case of error.
119 */
120 simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept = 0;
121
122 /**
123 * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There
124 * must be an unescaped quote terminating the string. It returns the final output
125 * position as pointer. In case of error (e.g., the string has bad escaped codes),
126 * then null_nullptrptr is returned. It is assumed that the output buffer is large
127 * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes +
128 * SIMDJSON_PADDING bytes.
129 *
130 * Overridden by each implementation.
131 *
132 * @param str pointer to the beginning of a possibly invalid UTF-8 JSON string, must end with an unescaped quote.
133 * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size.
134 * @return end of the of the written region (exclusive) or nullptr in case of error.
135 */
136 simdjson_warn_unused virtual uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept = 0;
137
138 /**
139 * Change the capacity of this parser.
140 *
141 * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB)
142 * and an CAPACITY error is returned if it is attempted.
143 *
144 * Generally used for reallocation.
145 *
146 * @param capacity The new capacity.
147 * @param max_depth The new max_depth.
148 * @return The error code, or SUCCESS if there was no error.
149 */
150 virtual error_code set_capacity(size_t capacity) noexcept = 0;
151
152 /**
153 * Change the max depth of this parser.
154 *
155 * Generally used for reallocation.
156 *
157 * @param capacity The new capacity.
158 * @param max_depth The new max_depth.
159 * @return The error code, or SUCCESS if there was no error.
160 */
161 virtual error_code set_max_depth(size_t max_depth) noexcept = 0;
162
163 /**
164 * Deallocate this parser.
165 */
166 virtual ~dom_parser_implementation() = default;
167
168 /** Number of structural indices passed from stage 1 to stage 2 */
169 uint32_t n_structural_indexes{0};
170 /** Structural indices passed from stage 1 to stage 2 */
171 std::unique_ptr<uint32_t[]> structural_indexes{};
172 /** Next structural index to parse */
173 uint32_t next_structural_index{0};
174
175 /**
176 * The largest document this parser can support without reallocating.
177 *
178 * @return Current capacity, in bytes.
179 */
180 simdjson_inline size_t capacity() const noexcept;
181
182 /**
183 * The maximum level of nested object and arrays supported by this parser.
184 *
185 * @return Maximum depth, in bytes.
186 */
187 simdjson_inline size_t max_depth() const noexcept;
188
189 /**
190 * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
191 * and `max_depth` depth.
192 *
193 * @param capacity The new capacity.
194 * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
195 * @return The error, if there is one.
196 */
197 simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept;
198
199
200protected:
201 /**
202 * The maximum document length this parser supports.
203 *
204 * Buffers are large enough to handle any document up to this length.
205 */
206 size_t _capacity{0};
207
208 /**
209 * The maximum depth (number of nested objects and arrays) supported by this parser.
210 *
211 * Defaults to DEFAULT_MAX_DEPTH.
212 */
213 size_t _max_depth{0};
214
215 // Declaring these so that subclasses can use them to implement their constructors.
216 simdjson_inline dom_parser_implementation() noexcept;
217 simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept;
218 simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept;
219
220 simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete;
221 simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete;
222}; // class dom_parser_implementation
223
224simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default;
225simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default;
226simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default;
227
228simdjson_inline size_t dom_parser_implementation::capacity() const noexcept {
229 return _capacity;
230}
231
232simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept {
233 return _max_depth;
234}
235
236simdjson_warn_unused
237inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept {
238 if (this->max_depth() != max_depth) {
239 error_code err = set_max_depth(max_depth);
240 if (err) { return err; }
241 }
242 if (_capacity != capacity) {
243 error_code err = set_capacity(capacity);
244 if (err) { return err; }
245 }
246 return SUCCESS;
247}
248
249} // namespace internal
250} // namespace simdjson
251
252#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H
253