1 | #ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H |
2 | #define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H |
3 | |
4 | #include "simdjson/common_defs.h" |
5 | #include "simdjson/error.h" |
6 | #include <memory> |
7 | |
8 | namespace simdjson { |
9 | |
10 | namespace dom { |
11 | class document; |
12 | } // namespace dom |
13 | |
14 | /** |
15 | * This enum is used with the dom_parser_implementation::stage1 function. |
16 | * 1) The regular mode expects a fully formed JSON document. |
17 | * 2) The streaming_partial mode expects a possibly truncated |
18 | * input within a stream on JSON documents. |
19 | * 3) The stream_final mode allows us to truncate final |
20 | * unterminated strings. It is useful in conjunction with streaming_partial. |
21 | */ |
22 | enum class stage1_mode { regular, streaming_partial, streaming_final}; |
23 | |
24 | /** |
25 | * Returns true if mode == streaming_partial or mode == streaming_final |
26 | */ |
27 | inline bool is_streaming(stage1_mode mode) { |
28 | // performance note: it is probably faster to check that mode is different |
29 | // from regular than checking that it is either streaming_partial or streaming_final. |
30 | return (mode != stage1_mode::regular); |
31 | // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final); |
32 | } |
33 | |
34 | |
35 | namespace internal { |
36 | |
37 | |
38 | /** |
39 | * An implementation of simdjson's DOM parser for a particular CPU architecture. |
40 | * |
41 | * This class is expected to be accessed only by pointer, and never move in memory (though the |
42 | * pointer can move). |
43 | */ |
44 | class dom_parser_implementation { |
45 | public: |
46 | |
47 | /** |
48 | * @private For internal implementation use |
49 | * |
50 | * Run a full JSON parse on a single document (stage1 + stage2). |
51 | * |
52 | * Guaranteed only to be called when capacity > document length. |
53 | * |
54 | * Overridden by each implementation. |
55 | * |
56 | * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. |
57 | * @param len The length of the json document. |
58 | * @return The error code, or SUCCESS if there was no error. |
59 | */ |
60 | simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; |
61 | |
62 | /** |
63 | * @private For internal implementation use |
64 | * |
65 | * Stage 1 of the document parser. |
66 | * |
67 | * Guaranteed only to be called when capacity > document length. |
68 | * |
69 | * Overridden by each implementation. |
70 | * |
71 | * @param buf The json document to parse. |
72 | * @param len The length of the json document. |
73 | * @param streaming Whether this is being called by parser::parse_many. |
74 | * @return The error code, or SUCCESS if there was no error. |
75 | */ |
76 | simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0; |
77 | |
78 | /** |
79 | * @private For internal implementation use |
80 | * |
81 | * Stage 2 of the document parser. |
82 | * |
83 | * Called after stage1(). |
84 | * |
85 | * Overridden by each implementation. |
86 | * |
87 | * @param doc The document to output to. |
88 | * @return The error code, or SUCCESS if there was no error. |
89 | */ |
90 | simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0; |
91 | |
92 | /** |
93 | * @private For internal implementation use |
94 | * |
95 | * Stage 2 of the document parser for parser::parse_many. |
96 | * |
97 | * Guaranteed only to be called after stage1(). |
98 | * Overridden by each implementation. |
99 | * |
100 | * @param doc The document to output to. |
101 | * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. |
102 | */ |
103 | simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0; |
104 | |
105 | /** |
106 | * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There |
107 | * must be an unescaped quote terminating the string. It returns the final output |
108 | * position as pointer. In case of error (e.g., the string has bad escaped codes), |
109 | * then null_nullptrptr is returned. It is assumed that the output buffer is large |
110 | * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + |
111 | * SIMDJSON_PADDING bytes. |
112 | * |
113 | * Overridden by each implementation. |
114 | * |
115 | * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote. |
116 | * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. |
117 | * @param allow_replacement whether we allow a replacement character when the UTF-8 contains unmatched surrogate pairs. |
118 | * @return end of the of the written region (exclusive) or nullptr in case of error. |
119 | */ |
120 | simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept = 0; |
121 | |
122 | /** |
123 | * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There |
124 | * must be an unescaped quote terminating the string. It returns the final output |
125 | * position as pointer. In case of error (e.g., the string has bad escaped codes), |
126 | * then null_nullptrptr is returned. It is assumed that the output buffer is large |
127 | * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + |
128 | * SIMDJSON_PADDING bytes. |
129 | * |
130 | * Overridden by each implementation. |
131 | * |
132 | * @param str pointer to the beginning of a possibly invalid UTF-8 JSON string, must end with an unescaped quote. |
133 | * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. |
134 | * @return end of the of the written region (exclusive) or nullptr in case of error. |
135 | */ |
136 | simdjson_warn_unused virtual uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept = 0; |
137 | |
138 | /** |
139 | * Change the capacity of this parser. |
140 | * |
141 | * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB) |
142 | * and an CAPACITY error is returned if it is attempted. |
143 | * |
144 | * Generally used for reallocation. |
145 | * |
146 | * @param capacity The new capacity. |
147 | * @param max_depth The new max_depth. |
148 | * @return The error code, or SUCCESS if there was no error. |
149 | */ |
150 | virtual error_code set_capacity(size_t capacity) noexcept = 0; |
151 | |
152 | /** |
153 | * Change the max depth of this parser. |
154 | * |
155 | * Generally used for reallocation. |
156 | * |
157 | * @param capacity The new capacity. |
158 | * @param max_depth The new max_depth. |
159 | * @return The error code, or SUCCESS if there was no error. |
160 | */ |
161 | virtual error_code set_max_depth(size_t max_depth) noexcept = 0; |
162 | |
163 | /** |
164 | * Deallocate this parser. |
165 | */ |
166 | virtual ~dom_parser_implementation() = default; |
167 | |
168 | /** Number of structural indices passed from stage 1 to stage 2 */ |
169 | uint32_t n_structural_indexes{0}; |
170 | /** Structural indices passed from stage 1 to stage 2 */ |
171 | std::unique_ptr<uint32_t[]> structural_indexes{}; |
172 | /** Next structural index to parse */ |
173 | uint32_t next_structural_index{0}; |
174 | |
175 | /** |
176 | * The largest document this parser can support without reallocating. |
177 | * |
178 | * @return Current capacity, in bytes. |
179 | */ |
180 | simdjson_inline size_t capacity() const noexcept; |
181 | |
182 | /** |
183 | * The maximum level of nested object and arrays supported by this parser. |
184 | * |
185 | * @return Maximum depth, in bytes. |
186 | */ |
187 | simdjson_inline size_t max_depth() const noexcept; |
188 | |
189 | /** |
190 | * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length |
191 | * and `max_depth` depth. |
192 | * |
193 | * @param capacity The new capacity. |
194 | * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. |
195 | * @return The error, if there is one. |
196 | */ |
197 | simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept; |
198 | |
199 | |
200 | protected: |
201 | /** |
202 | * The maximum document length this parser supports. |
203 | * |
204 | * Buffers are large enough to handle any document up to this length. |
205 | */ |
206 | size_t _capacity{0}; |
207 | |
208 | /** |
209 | * The maximum depth (number of nested objects and arrays) supported by this parser. |
210 | * |
211 | * Defaults to DEFAULT_MAX_DEPTH. |
212 | */ |
213 | size_t _max_depth{0}; |
214 | |
215 | // Declaring these so that subclasses can use them to implement their constructors. |
216 | simdjson_inline dom_parser_implementation() noexcept; |
217 | simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; |
218 | simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; |
219 | |
220 | simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; |
221 | simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; |
222 | }; // class dom_parser_implementation |
223 | |
224 | simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; |
225 | simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; |
226 | simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; |
227 | |
228 | simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { |
229 | return _capacity; |
230 | } |
231 | |
232 | simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { |
233 | return _max_depth; |
234 | } |
235 | |
236 | simdjson_warn_unused |
237 | inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { |
238 | if (this->max_depth() != max_depth) { |
239 | error_code err = set_max_depth(max_depth); |
240 | if (err) { return err; } |
241 | } |
242 | if (_capacity != capacity) { |
243 | error_code err = set_capacity(capacity); |
244 | if (err) { return err; } |
245 | } |
246 | return SUCCESS; |
247 | } |
248 | |
249 | } // namespace internal |
250 | } // namespace simdjson |
251 | |
252 | #endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H |
253 | |