1 | #ifndef SIMDJSON_INLINE_PARSER_H |
2 | #define SIMDJSON_INLINE_PARSER_H |
3 | |
4 | #include "simdjson/dom/document_stream.h" |
5 | #include "simdjson/dom/parser.h" |
6 | #include "simdjson/implementation.h" |
7 | #include "simdjson/internal/jsonformatutils.h" |
8 | #include "simdjson/portability.h" |
9 | #include <cstdio> |
10 | #include <climits> |
11 | |
12 | namespace simdjson { |
13 | namespace dom { |
14 | |
15 | // |
16 | // parser inline implementation |
17 | // |
18 | simdjson_inline parser::parser(size_t max_capacity) noexcept |
19 | : _max_capacity{max_capacity}, |
20 | loaded_bytes(nullptr) { |
21 | } |
22 | simdjson_inline parser::parser(parser &&other) noexcept = default; |
23 | simdjson_inline parser &parser::operator=(parser &&other) noexcept = default; |
24 | |
25 | inline bool parser::is_valid() const noexcept { return valid; } |
26 | inline int parser::get_error_code() const noexcept { return error; } |
27 | inline std::string parser::get_error_message() const noexcept { return error_message(error); } |
28 | |
29 | inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { |
30 | return valid ? doc.dump_raw_tape(os) : false; |
31 | } |
32 | |
33 | inline simdjson_result<size_t> parser::read_file(const std::string &path) noexcept { |
34 | // Open the file |
35 | SIMDJSON_PUSH_DISABLE_WARNINGS |
36 | SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe |
37 | std::FILE *fp = std::fopen(filename: path.c_str(), modes: "rb" ); |
38 | SIMDJSON_POP_DISABLE_WARNINGS |
39 | |
40 | if (fp == nullptr) { |
41 | return IO_ERROR; |
42 | } |
43 | |
44 | // Get the file size |
45 | int ret; |
46 | #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS |
47 | ret = _fseeki64(fp, 0, SEEK_END); |
48 | #else |
49 | ret = std::fseek(stream: fp, off: 0, SEEK_END); |
50 | #endif // _WIN64 |
51 | if(ret < 0) { |
52 | std::fclose(stream: fp); |
53 | return IO_ERROR; |
54 | } |
55 | #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS |
56 | __int64 len = _ftelli64(fp); |
57 | if(len == -1L) { |
58 | std::fclose(fp); |
59 | return IO_ERROR; |
60 | } |
61 | #else |
62 | long len = std::ftell(stream: fp); |
63 | if((len < 0) || (len == LONG_MAX)) { |
64 | std::fclose(stream: fp); |
65 | return IO_ERROR; |
66 | } |
67 | #endif |
68 | |
69 | // Make sure we have enough capacity to load the file |
70 | if (_loaded_bytes_capacity < size_t(len)) { |
71 | loaded_bytes.reset( p: internal::allocate_padded_buffer(length: len) ); |
72 | if (!loaded_bytes) { |
73 | std::fclose(stream: fp); |
74 | return MEMALLOC; |
75 | } |
76 | _loaded_bytes_capacity = len; |
77 | } |
78 | |
79 | // Read the string |
80 | std::rewind(stream: fp); |
81 | size_t bytes_read = std::fread(ptr: loaded_bytes.get(), size: 1, n: len, stream: fp); |
82 | if (std::fclose(stream: fp) != 0 || bytes_read != size_t(len)) { |
83 | return IO_ERROR; |
84 | } |
85 | |
86 | return bytes_read; |
87 | } |
88 | |
89 | inline simdjson_result<element> parser::load(const std::string &path) & noexcept { |
90 | size_t len; |
91 | auto _error = read_file(path).get(value&: len); |
92 | if (_error) { return _error; } |
93 | return parse(buf: loaded_bytes.get(), len, realloc_if_needed: false); |
94 | } |
95 | |
96 | inline simdjson_result<document_stream> parser::load_many(const std::string &path, size_t batch_size) noexcept { |
97 | size_t len; |
98 | auto _error = read_file(path).get(value&: len); |
99 | if (_error) { return _error; } |
100 | if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } |
101 | return document_stream(*this, reinterpret_cast<const uint8_t*>(loaded_bytes.get()), len, batch_size); |
102 | } |
103 | |
104 | inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { |
105 | // Important: we need to ensure that document has enough capacity. |
106 | // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! |
107 | error_code _error = ensure_capacity(doc&: provided_doc, desired_capacity: len); |
108 | if (_error) { return _error; } |
109 | if (realloc_if_needed) { |
110 | // Make sure we have enough capacity to copy len bytes |
111 | if (!loaded_bytes || _loaded_bytes_capacity < len) { |
112 | loaded_bytes.reset( p: internal::allocate_padded_buffer(length: len) ); |
113 | if (!loaded_bytes) { |
114 | return MEMALLOC; |
115 | } |
116 | _loaded_bytes_capacity = len; |
117 | } |
118 | std::memcpy(dest: static_cast<void *>(loaded_bytes.get()), src: buf, n: len); |
119 | } |
120 | _error = implementation->parse(buf: realloc_if_needed ? reinterpret_cast<const uint8_t*>(loaded_bytes.get()): buf, len, doc&: provided_doc); |
121 | |
122 | if (_error) { return _error; } |
123 | |
124 | return provided_doc.root(); |
125 | } |
126 | |
127 | simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { |
128 | return parse_into_document(provided_doc, buf: reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed); |
129 | } |
130 | simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { |
131 | return parse_into_document(provided_doc, buf: s.data(), len: s.length(), realloc_if_needed: s.capacity() - s.length() < SIMDJSON_PADDING); |
132 | } |
133 | simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { |
134 | return parse_into_document(provided_doc, buf: s.data(), len: s.length(), realloc_if_needed: false); |
135 | } |
136 | |
137 | |
138 | inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { |
139 | return parse_into_document(provided_doc&: doc, buf, len, realloc_if_needed); |
140 | } |
141 | |
142 | simdjson_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { |
143 | return parse(buf: reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed); |
144 | } |
145 | simdjson_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept { |
146 | return parse(buf: s.data(), len: s.length(), realloc_if_needed: s.capacity() - s.length() < SIMDJSON_PADDING); |
147 | } |
148 | simdjson_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept { |
149 | return parse(buf: s.data(), len: s.length(), realloc_if_needed: false); |
150 | } |
151 | simdjson_inline simdjson_result<element> parser::parse(const padded_string_view &v) & noexcept { |
152 | return parse(buf: v.data(), len: v.length(), realloc_if_needed: false); |
153 | } |
154 | |
155 | inline simdjson_result<document_stream> parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { |
156 | if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } |
157 | return document_stream(*this, buf, len, batch_size); |
158 | } |
159 | inline simdjson_result<document_stream> parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { |
160 | return parse_many(buf: reinterpret_cast<const uint8_t *>(buf), len, batch_size); |
161 | } |
162 | inline simdjson_result<document_stream> parser::parse_many(const std::string &s, size_t batch_size) noexcept { |
163 | return parse_many(buf: s.data(), len: s.length(), batch_size); |
164 | } |
165 | inline simdjson_result<document_stream> parser::parse_many(const padded_string &s, size_t batch_size) noexcept { |
166 | return parse_many(buf: s.data(), len: s.length(), batch_size); |
167 | } |
168 | |
169 | simdjson_inline size_t parser::capacity() const noexcept { |
170 | return implementation ? implementation->capacity() : 0; |
171 | } |
172 | simdjson_inline size_t parser::max_capacity() const noexcept { |
173 | return _max_capacity; |
174 | } |
175 | simdjson_inline size_t parser::max_depth() const noexcept { |
176 | return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; |
177 | } |
178 | |
179 | simdjson_warn_unused |
180 | inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { |
181 | // |
182 | // Reallocate implementation if needed |
183 | // |
184 | error_code err; |
185 | if (implementation) { |
186 | err = implementation->allocate(capacity, max_depth); |
187 | } else { |
188 | err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, dst&: implementation); |
189 | } |
190 | if (err) { return err; } |
191 | return SUCCESS; |
192 | } |
193 | |
194 | #ifndef SIMDJSON_DISABLE_DEPRECATED_API |
195 | simdjson_warn_unused |
196 | inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { |
197 | return !allocate(capacity, max_depth); |
198 | } |
199 | #endif // SIMDJSON_DISABLE_DEPRECATED_API |
200 | |
201 | inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { |
202 | return ensure_capacity(doc, desired_capacity); |
203 | } |
204 | |
205 | |
206 | inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { |
207 | // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. |
208 | // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. |
209 | if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } |
210 | // If we don't have enough capacity, (try to) automatically bump it. |
211 | // If the document needs allocation, do it too. |
212 | // Both in one if statement to minimize unlikely branching. |
213 | // |
214 | // Note: we must make sure that this function is called if capacity() == 0. We do so because we |
215 | // ensure that desired_capacity > 0. |
216 | if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { |
217 | if (desired_capacity > max_capacity()) { |
218 | return error = CAPACITY; |
219 | } |
220 | error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(capacity: desired_capacity) : SUCCESS; |
221 | error_code err2 = capacity() < desired_capacity ? allocate(capacity: desired_capacity, max_depth: max_depth()) : SUCCESS; |
222 | if(err1 != SUCCESS) { return error = err1; } |
223 | if(err2 != SUCCESS) { return error = err2; } |
224 | } |
225 | return SUCCESS; |
226 | } |
227 | |
228 | simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { |
229 | if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) { |
230 | _max_capacity = max_capacity; |
231 | } else { |
232 | _max_capacity = MINIMAL_DOCUMENT_CAPACITY; |
233 | } |
234 | } |
235 | |
236 | } // namespace dom |
237 | } // namespace simdjson |
238 | |
239 | #endif // SIMDJSON_INLINE_PARSER_H |
240 | |