| 1 | #ifndef SIMDJSON_IMPLEMENTATION_H |
| 2 | #define SIMDJSON_IMPLEMENTATION_H |
| 3 | |
| 4 | #include "simdjson/common_defs.h" |
| 5 | #include "simdjson/internal/dom_parser_implementation.h" |
| 6 | #include "simdjson/internal/isadetection.h" |
| 7 | #include <string> |
| 8 | #include <atomic> |
| 9 | #include <vector> |
| 10 | |
| 11 | namespace simdjson { |
| 12 | |
| 13 | /** |
| 14 | * Validate the UTF-8 string. |
| 15 | * |
| 16 | * @param buf the string to validate. |
| 17 | * @param len the length of the string in bytes. |
| 18 | * @return true if the string is valid UTF-8. |
| 19 | */ |
| 20 | simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; |
| 21 | /** |
| 22 | * Validate the UTF-8 string. |
| 23 | * |
| 24 | * @param sv the string_view to validate. |
| 25 | * @return true if the string is valid UTF-8. |
| 26 | */ |
| 27 | simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { |
| 28 | return validate_utf8(buf: sv.data(), len: sv.size()); |
| 29 | } |
| 30 | |
| 31 | /** |
| 32 | * Validate the UTF-8 string. |
| 33 | * |
| 34 | * @param p the string to validate. |
| 35 | * @return true if the string is valid UTF-8. |
| 36 | */ |
| 37 | simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { |
| 38 | return validate_utf8(buf: s.data(), len: s.size()); |
| 39 | } |
| 40 | |
| 41 | namespace dom { |
| 42 | class document; |
| 43 | } // namespace dom |
| 44 | |
| 45 | /** |
| 46 | * An implementation of simdjson for a particular CPU architecture. |
| 47 | * |
| 48 | * Also used to maintain the currently active implementation. The active implementation is |
| 49 | * automatically initialized on first use to the most advanced implementation supported by the host. |
| 50 | */ |
| 51 | class implementation { |
| 52 | public: |
| 53 | |
| 54 | /** |
| 55 | * The name of this implementation. |
| 56 | * |
| 57 | * const implementation *impl = simdjson::get_active_implementation(); |
| 58 | * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; |
| 59 | * |
| 60 | * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". |
| 61 | */ |
| 62 | virtual const std::string &name() const { return _name; } |
| 63 | |
| 64 | /** |
| 65 | * The description of this implementation. |
| 66 | * |
| 67 | * const implementation *impl = simdjson::get_active_implementation(); |
| 68 | * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; |
| 69 | * |
| 70 | * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". |
| 71 | */ |
| 72 | virtual const std::string &description() const { return _description; } |
| 73 | |
| 74 | /** |
| 75 | * The instruction sets this implementation is compiled against |
| 76 | * and the current CPU match. This function may poll the current CPU/system |
| 77 | * and should therefore not be called too often if performance is a concern. |
| 78 | * |
| 79 | * @return true if the implementation can be safely used on the current system (determined at runtime). |
| 80 | */ |
| 81 | bool supported_by_runtime_system() const; |
| 82 | |
| 83 | /** |
| 84 | * @private For internal implementation use |
| 85 | * |
| 86 | * The instruction sets this implementation is compiled against. |
| 87 | * |
| 88 | * @return a mask of all required `internal::instruction_set::` values. |
| 89 | */ |
| 90 | virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; } |
| 91 | |
| 92 | /** |
| 93 | * @private For internal implementation use |
| 94 | * |
| 95 | * const implementation *impl = simdjson::get_active_implementation(); |
| 96 | * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; |
| 97 | * |
| 98 | * @param capacity The largest document that will be passed to the parser. |
| 99 | * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. |
| 100 | * @param dst The place to put the resulting parser implementation. |
| 101 | * @return the error code, or SUCCESS if there was no error. |
| 102 | */ |
| 103 | virtual error_code create_dom_parser_implementation( |
| 104 | size_t capacity, |
| 105 | size_t max_depth, |
| 106 | std::unique_ptr<internal::dom_parser_implementation> &dst |
| 107 | ) const noexcept = 0; |
| 108 | |
| 109 | /** |
| 110 | * @private For internal implementation use |
| 111 | * |
| 112 | * Minify the input string assuming that it represents a JSON string, does not parse or validate. |
| 113 | * |
| 114 | * Overridden by each implementation. |
| 115 | * |
| 116 | * @param buf the json document to minify. |
| 117 | * @param len the length of the json document. |
| 118 | * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. |
| 119 | * @param dst_len the number of bytes written. Output only. |
| 120 | * @return the error code, or SUCCESS if there was no error. |
| 121 | */ |
| 122 | simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; |
| 123 | |
| 124 | |
| 125 | /** |
| 126 | * Validate the UTF-8 string. |
| 127 | * |
| 128 | * Overridden by each implementation. |
| 129 | * |
| 130 | * @param buf the string to validate. |
| 131 | * @param len the length of the string in bytes. |
| 132 | * @return true if and only if the string is valid UTF-8. |
| 133 | */ |
| 134 | simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; |
| 135 | |
| 136 | protected: |
| 137 | /** @private Construct an implementation with the given name and description. For subclasses. */ |
| 138 | simdjson_inline implementation( |
| 139 | std::string_view name, |
| 140 | std::string_view description, |
| 141 | uint32_t required_instruction_sets |
| 142 | ) : |
| 143 | _name(name), |
| 144 | _description(description), |
| 145 | _required_instruction_sets(required_instruction_sets) |
| 146 | { |
| 147 | } |
| 148 | virtual ~implementation()=default; |
| 149 | |
| 150 | private: |
| 151 | /** |
| 152 | * The name of this implementation. |
| 153 | */ |
| 154 | const std::string _name; |
| 155 | |
| 156 | /** |
| 157 | * The description of this implementation. |
| 158 | */ |
| 159 | const std::string _description; |
| 160 | |
| 161 | /** |
| 162 | * Instruction sets required for this implementation. |
| 163 | */ |
| 164 | const uint32_t _required_instruction_sets; |
| 165 | }; |
| 166 | |
| 167 | /** @private */ |
| 168 | namespace internal { |
| 169 | |
| 170 | /** |
| 171 | * The list of available implementations compiled into simdjson. |
| 172 | */ |
| 173 | class available_implementation_list { |
| 174 | public: |
| 175 | /** Get the list of available implementations compiled into simdjson */ |
| 176 | simdjson_inline available_implementation_list() {} |
| 177 | /** Number of implementations */ |
| 178 | size_t size() const noexcept; |
| 179 | /** STL const begin() iterator */ |
| 180 | const implementation * const *begin() const noexcept; |
| 181 | /** STL const end() iterator */ |
| 182 | const implementation * const *end() const noexcept; |
| 183 | |
| 184 | /** |
| 185 | * Get the implementation with the given name. |
| 186 | * |
| 187 | * Case sensitive. |
| 188 | * |
| 189 | * const implementation *impl = simdjson::get_available_implementations()["westmere"]; |
| 190 | * if (!impl) { exit(1); } |
| 191 | * if (!imp->supported_by_runtime_system()) { exit(1); } |
| 192 | * simdjson::get_active_implementation() = impl; |
| 193 | * |
| 194 | * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" |
| 195 | * @return the implementation, or nullptr if the parse failed. |
| 196 | */ |
| 197 | const implementation * operator[](const std::string_view &name) const noexcept { |
| 198 | for (const implementation * impl : *this) { |
| 199 | if (impl->name() == name) { return impl; } |
| 200 | } |
| 201 | return nullptr; |
| 202 | } |
| 203 | |
| 204 | /** |
| 205 | * Detect the most advanced implementation supported by the current host. |
| 206 | * |
| 207 | * This is used to initialize the implementation on startup. |
| 208 | * |
| 209 | * const implementation *impl = simdjson::available_implementation::detect_best_supported(); |
| 210 | * simdjson::get_active_implementation() = impl; |
| 211 | * |
| 212 | * @return the most advanced supported implementation for the current host, or an |
| 213 | * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported |
| 214 | * implementation. Will never return nullptr. |
| 215 | */ |
| 216 | const implementation *detect_best_supported() const noexcept; |
| 217 | }; |
| 218 | |
| 219 | template<typename T> |
| 220 | class atomic_ptr { |
| 221 | public: |
| 222 | atomic_ptr(T *_ptr) : ptr{_ptr} {} |
| 223 | |
| 224 | operator const T*() const { return ptr.load(); } |
| 225 | const T& operator*() const { return *ptr; } |
| 226 | const T* operator->() const { return ptr.load(); } |
| 227 | |
| 228 | operator T*() { return ptr.load(); } |
| 229 | T& operator*() { return *ptr; } |
| 230 | T* operator->() { return ptr.load(); } |
| 231 | atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } |
| 232 | |
| 233 | private: |
| 234 | std::atomic<T*> ptr; |
| 235 | }; |
| 236 | |
| 237 | } // namespace internal |
| 238 | |
| 239 | /** |
| 240 | * The list of available implementations compiled into simdjson. |
| 241 | */ |
| 242 | extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); |
| 243 | |
| 244 | /** |
| 245 | * The active implementation. |
| 246 | * |
| 247 | * Automatically initialized on first use to the most advanced implementation supported by this hardware. |
| 248 | */ |
| 249 | extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation(); |
| 250 | |
| 251 | } // namespace simdjson |
| 252 | |
| 253 | #endif // SIMDJSON_IMPLEMENTATION_H |
| 254 | |