| 1 | #ifndef SIMDJSON_INLINE_PARSER_H |
| 2 | #define SIMDJSON_INLINE_PARSER_H |
| 3 | |
| 4 | #include "simdjson/dom/document_stream.h" |
| 5 | #include "simdjson/dom/parser.h" |
| 6 | #include "simdjson/implementation.h" |
| 7 | #include "simdjson/internal/jsonformatutils.h" |
| 8 | #include "simdjson/portability.h" |
| 9 | #include <cstdio> |
| 10 | #include <climits> |
| 11 | |
| 12 | namespace simdjson { |
| 13 | namespace dom { |
| 14 | |
| 15 | // |
| 16 | // parser inline implementation |
| 17 | // |
| 18 | simdjson_inline parser::parser(size_t max_capacity) noexcept |
| 19 | : _max_capacity{max_capacity}, |
| 20 | loaded_bytes(nullptr) { |
| 21 | } |
| 22 | simdjson_inline parser::parser(parser &&other) noexcept = default; |
| 23 | simdjson_inline parser &parser::operator=(parser &&other) noexcept = default; |
| 24 | |
| 25 | inline bool parser::is_valid() const noexcept { return valid; } |
| 26 | inline int parser::get_error_code() const noexcept { return error; } |
| 27 | inline std::string parser::get_error_message() const noexcept { return error_message(error); } |
| 28 | |
| 29 | inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { |
| 30 | return valid ? doc.dump_raw_tape(os) : false; |
| 31 | } |
| 32 | |
| 33 | inline simdjson_result<size_t> parser::read_file(const std::string &path) noexcept { |
| 34 | // Open the file |
| 35 | SIMDJSON_PUSH_DISABLE_WARNINGS |
| 36 | SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe |
| 37 | std::FILE *fp = std::fopen(filename: path.c_str(), modes: "rb" ); |
| 38 | SIMDJSON_POP_DISABLE_WARNINGS |
| 39 | |
| 40 | if (fp == nullptr) { |
| 41 | return IO_ERROR; |
| 42 | } |
| 43 | |
| 44 | // Get the file size |
| 45 | int ret; |
| 46 | #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS |
| 47 | ret = _fseeki64(fp, 0, SEEK_END); |
| 48 | #else |
| 49 | ret = std::fseek(stream: fp, off: 0, SEEK_END); |
| 50 | #endif // _WIN64 |
| 51 | if(ret < 0) { |
| 52 | std::fclose(stream: fp); |
| 53 | return IO_ERROR; |
| 54 | } |
| 55 | #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS |
| 56 | __int64 len = _ftelli64(fp); |
| 57 | if(len == -1L) { |
| 58 | std::fclose(fp); |
| 59 | return IO_ERROR; |
| 60 | } |
| 61 | #else |
| 62 | long len = std::ftell(stream: fp); |
| 63 | if((len < 0) || (len == LONG_MAX)) { |
| 64 | std::fclose(stream: fp); |
| 65 | return IO_ERROR; |
| 66 | } |
| 67 | #endif |
| 68 | |
| 69 | // Make sure we have enough capacity to load the file |
| 70 | if (_loaded_bytes_capacity < size_t(len)) { |
| 71 | loaded_bytes.reset( p: internal::allocate_padded_buffer(length: len) ); |
| 72 | if (!loaded_bytes) { |
| 73 | std::fclose(stream: fp); |
| 74 | return MEMALLOC; |
| 75 | } |
| 76 | _loaded_bytes_capacity = len; |
| 77 | } |
| 78 | |
| 79 | // Read the string |
| 80 | std::rewind(stream: fp); |
| 81 | size_t bytes_read = std::fread(ptr: loaded_bytes.get(), size: 1, n: len, stream: fp); |
| 82 | if (std::fclose(stream: fp) != 0 || bytes_read != size_t(len)) { |
| 83 | return IO_ERROR; |
| 84 | } |
| 85 | |
| 86 | return bytes_read; |
| 87 | } |
| 88 | |
| 89 | inline simdjson_result<element> parser::load(const std::string &path) & noexcept { |
| 90 | size_t len; |
| 91 | auto _error = read_file(path).get(value&: len); |
| 92 | if (_error) { return _error; } |
| 93 | return parse(buf: loaded_bytes.get(), len, realloc_if_needed: false); |
| 94 | } |
| 95 | |
| 96 | inline simdjson_result<document_stream> parser::load_many(const std::string &path, size_t batch_size) noexcept { |
| 97 | size_t len; |
| 98 | auto _error = read_file(path).get(value&: len); |
| 99 | if (_error) { return _error; } |
| 100 | if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } |
| 101 | return document_stream(*this, reinterpret_cast<const uint8_t*>(loaded_bytes.get()), len, batch_size); |
| 102 | } |
| 103 | |
| 104 | inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { |
| 105 | // Important: we need to ensure that document has enough capacity. |
| 106 | // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! |
| 107 | error_code _error = ensure_capacity(doc&: provided_doc, desired_capacity: len); |
| 108 | if (_error) { return _error; } |
| 109 | if (realloc_if_needed) { |
| 110 | // Make sure we have enough capacity to copy len bytes |
| 111 | if (!loaded_bytes || _loaded_bytes_capacity < len) { |
| 112 | loaded_bytes.reset( p: internal::allocate_padded_buffer(length: len) ); |
| 113 | if (!loaded_bytes) { |
| 114 | return MEMALLOC; |
| 115 | } |
| 116 | _loaded_bytes_capacity = len; |
| 117 | } |
| 118 | std::memcpy(dest: static_cast<void *>(loaded_bytes.get()), src: buf, n: len); |
| 119 | } |
| 120 | _error = implementation->parse(buf: realloc_if_needed ? reinterpret_cast<const uint8_t*>(loaded_bytes.get()): buf, len, doc&: provided_doc); |
| 121 | |
| 122 | if (_error) { return _error; } |
| 123 | |
| 124 | return provided_doc.root(); |
| 125 | } |
| 126 | |
| 127 | simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { |
| 128 | return parse_into_document(provided_doc, buf: reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed); |
| 129 | } |
| 130 | simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { |
| 131 | return parse_into_document(provided_doc, buf: s.data(), len: s.length(), realloc_if_needed: s.capacity() - s.length() < SIMDJSON_PADDING); |
| 132 | } |
| 133 | simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { |
| 134 | return parse_into_document(provided_doc, buf: s.data(), len: s.length(), realloc_if_needed: false); |
| 135 | } |
| 136 | |
| 137 | |
| 138 | inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { |
| 139 | return parse_into_document(provided_doc&: doc, buf, len, realloc_if_needed); |
| 140 | } |
| 141 | |
| 142 | simdjson_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { |
| 143 | return parse(buf: reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed); |
| 144 | } |
| 145 | simdjson_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept { |
| 146 | return parse(buf: s.data(), len: s.length(), realloc_if_needed: s.capacity() - s.length() < SIMDJSON_PADDING); |
| 147 | } |
| 148 | simdjson_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept { |
| 149 | return parse(buf: s.data(), len: s.length(), realloc_if_needed: false); |
| 150 | } |
| 151 | simdjson_inline simdjson_result<element> parser::parse(const padded_string_view &v) & noexcept { |
| 152 | return parse(buf: v.data(), len: v.length(), realloc_if_needed: false); |
| 153 | } |
| 154 | |
| 155 | inline simdjson_result<document_stream> parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { |
| 156 | if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } |
| 157 | return document_stream(*this, buf, len, batch_size); |
| 158 | } |
| 159 | inline simdjson_result<document_stream> parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { |
| 160 | return parse_many(buf: reinterpret_cast<const uint8_t *>(buf), len, batch_size); |
| 161 | } |
| 162 | inline simdjson_result<document_stream> parser::parse_many(const std::string &s, size_t batch_size) noexcept { |
| 163 | return parse_many(buf: s.data(), len: s.length(), batch_size); |
| 164 | } |
| 165 | inline simdjson_result<document_stream> parser::parse_many(const padded_string &s, size_t batch_size) noexcept { |
| 166 | return parse_many(buf: s.data(), len: s.length(), batch_size); |
| 167 | } |
| 168 | |
| 169 | simdjson_inline size_t parser::capacity() const noexcept { |
| 170 | return implementation ? implementation->capacity() : 0; |
| 171 | } |
| 172 | simdjson_inline size_t parser::max_capacity() const noexcept { |
| 173 | return _max_capacity; |
| 174 | } |
| 175 | simdjson_inline size_t parser::max_depth() const noexcept { |
| 176 | return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; |
| 177 | } |
| 178 | |
| 179 | simdjson_warn_unused |
| 180 | inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { |
| 181 | // |
| 182 | // Reallocate implementation if needed |
| 183 | // |
| 184 | error_code err; |
| 185 | if (implementation) { |
| 186 | err = implementation->allocate(capacity, max_depth); |
| 187 | } else { |
| 188 | err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, dst&: implementation); |
| 189 | } |
| 190 | if (err) { return err; } |
| 191 | return SUCCESS; |
| 192 | } |
| 193 | |
| 194 | #ifndef SIMDJSON_DISABLE_DEPRECATED_API |
| 195 | simdjson_warn_unused |
| 196 | inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { |
| 197 | return !allocate(capacity, max_depth); |
| 198 | } |
| 199 | #endif // SIMDJSON_DISABLE_DEPRECATED_API |
| 200 | |
| 201 | inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { |
| 202 | return ensure_capacity(doc, desired_capacity); |
| 203 | } |
| 204 | |
| 205 | |
| 206 | inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { |
| 207 | // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. |
| 208 | // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. |
| 209 | if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } |
| 210 | // If we don't have enough capacity, (try to) automatically bump it. |
| 211 | // If the document needs allocation, do it too. |
| 212 | // Both in one if statement to minimize unlikely branching. |
| 213 | // |
| 214 | // Note: we must make sure that this function is called if capacity() == 0. We do so because we |
| 215 | // ensure that desired_capacity > 0. |
| 216 | if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { |
| 217 | if (desired_capacity > max_capacity()) { |
| 218 | return error = CAPACITY; |
| 219 | } |
| 220 | error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(capacity: desired_capacity) : SUCCESS; |
| 221 | error_code err2 = capacity() < desired_capacity ? allocate(capacity: desired_capacity, max_depth: max_depth()) : SUCCESS; |
| 222 | if(err1 != SUCCESS) { return error = err1; } |
| 223 | if(err2 != SUCCESS) { return error = err2; } |
| 224 | } |
| 225 | return SUCCESS; |
| 226 | } |
| 227 | |
| 228 | simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { |
| 229 | if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) { |
| 230 | _max_capacity = max_capacity; |
| 231 | } else { |
| 232 | _max_capacity = MINIMAL_DOCUMENT_CAPACITY; |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | } // namespace dom |
| 237 | } // namespace simdjson |
| 238 | |
| 239 | #endif // SIMDJSON_INLINE_PARSER_H |
| 240 | |