1 | #ifndef SIMDJSON_IMPLEMENTATION_H |
2 | #define SIMDJSON_IMPLEMENTATION_H |
3 | |
4 | #include "simdjson/common_defs.h" |
5 | #include "simdjson/internal/dom_parser_implementation.h" |
6 | #include "simdjson/internal/isadetection.h" |
7 | #include <string> |
8 | #include <atomic> |
9 | #include <vector> |
10 | |
11 | namespace simdjson { |
12 | |
13 | /** |
14 | * Validate the UTF-8 string. |
15 | * |
16 | * @param buf the string to validate. |
17 | * @param len the length of the string in bytes. |
18 | * @return true if the string is valid UTF-8. |
19 | */ |
20 | simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; |
21 | /** |
22 | * Validate the UTF-8 string. |
23 | * |
24 | * @param sv the string_view to validate. |
25 | * @return true if the string is valid UTF-8. |
26 | */ |
27 | simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { |
28 | return validate_utf8(buf: sv.data(), len: sv.size()); |
29 | } |
30 | |
31 | /** |
32 | * Validate the UTF-8 string. |
33 | * |
34 | * @param p the string to validate. |
35 | * @return true if the string is valid UTF-8. |
36 | */ |
37 | simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { |
38 | return validate_utf8(buf: s.data(), len: s.size()); |
39 | } |
40 | |
41 | namespace dom { |
42 | class document; |
43 | } // namespace dom |
44 | |
45 | /** |
46 | * An implementation of simdjson for a particular CPU architecture. |
47 | * |
48 | * Also used to maintain the currently active implementation. The active implementation is |
49 | * automatically initialized on first use to the most advanced implementation supported by the host. |
50 | */ |
51 | class implementation { |
52 | public: |
53 | |
54 | /** |
55 | * The name of this implementation. |
56 | * |
57 | * const implementation *impl = simdjson::get_active_implementation(); |
58 | * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; |
59 | * |
60 | * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". |
61 | */ |
62 | virtual const std::string &name() const { return _name; } |
63 | |
64 | /** |
65 | * The description of this implementation. |
66 | * |
67 | * const implementation *impl = simdjson::get_active_implementation(); |
68 | * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; |
69 | * |
70 | * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". |
71 | */ |
72 | virtual const std::string &description() const { return _description; } |
73 | |
74 | /** |
75 | * The instruction sets this implementation is compiled against |
76 | * and the current CPU match. This function may poll the current CPU/system |
77 | * and should therefore not be called too often if performance is a concern. |
78 | * |
79 | * @return true if the implementation can be safely used on the current system (determined at runtime). |
80 | */ |
81 | bool supported_by_runtime_system() const; |
82 | |
83 | /** |
84 | * @private For internal implementation use |
85 | * |
86 | * The instruction sets this implementation is compiled against. |
87 | * |
88 | * @return a mask of all required `internal::instruction_set::` values. |
89 | */ |
90 | virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; } |
91 | |
92 | /** |
93 | * @private For internal implementation use |
94 | * |
95 | * const implementation *impl = simdjson::get_active_implementation(); |
96 | * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; |
97 | * |
98 | * @param capacity The largest document that will be passed to the parser. |
99 | * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. |
100 | * @param dst The place to put the resulting parser implementation. |
101 | * @return the error code, or SUCCESS if there was no error. |
102 | */ |
103 | virtual error_code create_dom_parser_implementation( |
104 | size_t capacity, |
105 | size_t max_depth, |
106 | std::unique_ptr<internal::dom_parser_implementation> &dst |
107 | ) const noexcept = 0; |
108 | |
109 | /** |
110 | * @private For internal implementation use |
111 | * |
112 | * Minify the input string assuming that it represents a JSON string, does not parse or validate. |
113 | * |
114 | * Overridden by each implementation. |
115 | * |
116 | * @param buf the json document to minify. |
117 | * @param len the length of the json document. |
118 | * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. |
119 | * @param dst_len the number of bytes written. Output only. |
120 | * @return the error code, or SUCCESS if there was no error. |
121 | */ |
122 | simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; |
123 | |
124 | |
125 | /** |
126 | * Validate the UTF-8 string. |
127 | * |
128 | * Overridden by each implementation. |
129 | * |
130 | * @param buf the string to validate. |
131 | * @param len the length of the string in bytes. |
132 | * @return true if and only if the string is valid UTF-8. |
133 | */ |
134 | simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; |
135 | |
136 | protected: |
137 | /** @private Construct an implementation with the given name and description. For subclasses. */ |
138 | simdjson_inline implementation( |
139 | std::string_view name, |
140 | std::string_view description, |
141 | uint32_t required_instruction_sets |
142 | ) : |
143 | _name(name), |
144 | _description(description), |
145 | _required_instruction_sets(required_instruction_sets) |
146 | { |
147 | } |
148 | virtual ~implementation()=default; |
149 | |
150 | private: |
151 | /** |
152 | * The name of this implementation. |
153 | */ |
154 | const std::string _name; |
155 | |
156 | /** |
157 | * The description of this implementation. |
158 | */ |
159 | const std::string _description; |
160 | |
161 | /** |
162 | * Instruction sets required for this implementation. |
163 | */ |
164 | const uint32_t _required_instruction_sets; |
165 | }; |
166 | |
167 | /** @private */ |
168 | namespace internal { |
169 | |
170 | /** |
171 | * The list of available implementations compiled into simdjson. |
172 | */ |
173 | class available_implementation_list { |
174 | public: |
175 | /** Get the list of available implementations compiled into simdjson */ |
176 | simdjson_inline available_implementation_list() {} |
177 | /** Number of implementations */ |
178 | size_t size() const noexcept; |
179 | /** STL const begin() iterator */ |
180 | const implementation * const *begin() const noexcept; |
181 | /** STL const end() iterator */ |
182 | const implementation * const *end() const noexcept; |
183 | |
184 | /** |
185 | * Get the implementation with the given name. |
186 | * |
187 | * Case sensitive. |
188 | * |
189 | * const implementation *impl = simdjson::get_available_implementations()["westmere"]; |
190 | * if (!impl) { exit(1); } |
191 | * if (!imp->supported_by_runtime_system()) { exit(1); } |
192 | * simdjson::get_active_implementation() = impl; |
193 | * |
194 | * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" |
195 | * @return the implementation, or nullptr if the parse failed. |
196 | */ |
197 | const implementation * operator[](const std::string_view &name) const noexcept { |
198 | for (const implementation * impl : *this) { |
199 | if (impl->name() == name) { return impl; } |
200 | } |
201 | return nullptr; |
202 | } |
203 | |
204 | /** |
205 | * Detect the most advanced implementation supported by the current host. |
206 | * |
207 | * This is used to initialize the implementation on startup. |
208 | * |
209 | * const implementation *impl = simdjson::available_implementation::detect_best_supported(); |
210 | * simdjson::get_active_implementation() = impl; |
211 | * |
212 | * @return the most advanced supported implementation for the current host, or an |
213 | * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported |
214 | * implementation. Will never return nullptr. |
215 | */ |
216 | const implementation *detect_best_supported() const noexcept; |
217 | }; |
218 | |
219 | template<typename T> |
220 | class atomic_ptr { |
221 | public: |
222 | atomic_ptr(T *_ptr) : ptr{_ptr} {} |
223 | |
224 | operator const T*() const { return ptr.load(); } |
225 | const T& operator*() const { return *ptr; } |
226 | const T* operator->() const { return ptr.load(); } |
227 | |
228 | operator T*() { return ptr.load(); } |
229 | T& operator*() { return *ptr; } |
230 | T* operator->() { return ptr.load(); } |
231 | atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } |
232 | |
233 | private: |
234 | std::atomic<T*> ptr; |
235 | }; |
236 | |
237 | } // namespace internal |
238 | |
239 | /** |
240 | * The list of available implementations compiled into simdjson. |
241 | */ |
242 | extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); |
243 | |
244 | /** |
245 | * The active implementation. |
246 | * |
247 | * Automatically initialized on first use to the most advanced implementation supported by this hardware. |
248 | */ |
249 | extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation(); |
250 | |
251 | } // namespace simdjson |
252 | |
253 | #endif // SIMDJSON_IMPLEMENTATION_H |
254 | |