1#ifndef SIMDJSON_IMPLEMENTATION_H
2#define SIMDJSON_IMPLEMENTATION_H
3
4#include "simdjson/common_defs.h"
5#include "simdjson/internal/dom_parser_implementation.h"
6#include "simdjson/internal/isadetection.h"
7#include <string>
8#include <atomic>
9#include <vector>
10
11namespace simdjson {
12
13/**
14 * Validate the UTF-8 string.
15 *
16 * @param buf the string to validate.
17 * @param len the length of the string in bytes.
18 * @return true if the string is valid UTF-8.
19 */
20simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept;
21/**
22 * Validate the UTF-8 string.
23 *
24 * @param sv the string_view to validate.
25 * @return true if the string is valid UTF-8.
26 */
27simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept {
28 return validate_utf8(buf: sv.data(), len: sv.size());
29}
30
31/**
32 * Validate the UTF-8 string.
33 *
34 * @param p the string to validate.
35 * @return true if the string is valid UTF-8.
36 */
37simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept {
38 return validate_utf8(buf: s.data(), len: s.size());
39}
40
41namespace dom {
42 class document;
43} // namespace dom
44
45/**
46 * An implementation of simdjson for a particular CPU architecture.
47 *
48 * Also used to maintain the currently active implementation. The active implementation is
49 * automatically initialized on first use to the most advanced implementation supported by the host.
50 */
51class implementation {
52public:
53
54 /**
55 * The name of this implementation.
56 *
57 * const implementation *impl = simdjson::get_active_implementation();
58 * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
59 *
60 * @return the name of the implementation, e.g. "haswell", "westmere", "arm64".
61 */
62 virtual const std::string &name() const { return _name; }
63
64 /**
65 * The description of this implementation.
66 *
67 * const implementation *impl = simdjson::get_active_implementation();
68 * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
69 *
70 * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON".
71 */
72 virtual const std::string &description() const { return _description; }
73
74 /**
75 * The instruction sets this implementation is compiled against
76 * and the current CPU match. This function may poll the current CPU/system
77 * and should therefore not be called too often if performance is a concern.
78 *
79 * @return true if the implementation can be safely used on the current system (determined at runtime).
80 */
81 bool supported_by_runtime_system() const;
82
83 /**
84 * @private For internal implementation use
85 *
86 * The instruction sets this implementation is compiled against.
87 *
88 * @return a mask of all required `internal::instruction_set::` values.
89 */
90 virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }
91
92 /**
93 * @private For internal implementation use
94 *
95 * const implementation *impl = simdjson::get_active_implementation();
96 * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl;
97 *
98 * @param capacity The largest document that will be passed to the parser.
99 * @param max_depth The maximum JSON object/array nesting this parser is expected to handle.
100 * @param dst The place to put the resulting parser implementation.
101 * @return the error code, or SUCCESS if there was no error.
102 */
103 virtual error_code create_dom_parser_implementation(
104 size_t capacity,
105 size_t max_depth,
106 std::unique_ptr<internal::dom_parser_implementation> &dst
107 ) const noexcept = 0;
108
109 /**
110 * @private For internal implementation use
111 *
112 * Minify the input string assuming that it represents a JSON string, does not parse or validate.
113 *
114 * Overridden by each implementation.
115 *
116 * @param buf the json document to minify.
117 * @param len the length of the json document.
118 * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes.
119 * @param dst_len the number of bytes written. Output only.
120 * @return the error code, or SUCCESS if there was no error.
121 */
122 simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0;
123
124
125 /**
126 * Validate the UTF-8 string.
127 *
128 * Overridden by each implementation.
129 *
130 * @param buf the string to validate.
131 * @param len the length of the string in bytes.
132 * @return true if and only if the string is valid UTF-8.
133 */
134 simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0;
135
136protected:
137 /** @private Construct an implementation with the given name and description. For subclasses. */
138 simdjson_inline implementation(
139 std::string_view name,
140 std::string_view description,
141 uint32_t required_instruction_sets
142 ) :
143 _name(name),
144 _description(description),
145 _required_instruction_sets(required_instruction_sets)
146 {
147 }
148 virtual ~implementation()=default;
149
150private:
151 /**
152 * The name of this implementation.
153 */
154 const std::string _name;
155
156 /**
157 * The description of this implementation.
158 */
159 const std::string _description;
160
161 /**
162 * Instruction sets required for this implementation.
163 */
164 const uint32_t _required_instruction_sets;
165};
166
167/** @private */
168namespace internal {
169
170/**
171 * The list of available implementations compiled into simdjson.
172 */
173class available_implementation_list {
174public:
175 /** Get the list of available implementations compiled into simdjson */
176 simdjson_inline available_implementation_list() {}
177 /** Number of implementations */
178 size_t size() const noexcept;
179 /** STL const begin() iterator */
180 const implementation * const *begin() const noexcept;
181 /** STL const end() iterator */
182 const implementation * const *end() const noexcept;
183
184 /**
185 * Get the implementation with the given name.
186 *
187 * Case sensitive.
188 *
189 * const implementation *impl = simdjson::get_available_implementations()["westmere"];
190 * if (!impl) { exit(1); }
191 * if (!imp->supported_by_runtime_system()) { exit(1); }
192 * simdjson::get_active_implementation() = impl;
193 *
194 * @param name the implementation to find, e.g. "westmere", "haswell", "arm64"
195 * @return the implementation, or nullptr if the parse failed.
196 */
197 const implementation * operator[](const std::string_view &name) const noexcept {
198 for (const implementation * impl : *this) {
199 if (impl->name() == name) { return impl; }
200 }
201 return nullptr;
202 }
203
204 /**
205 * Detect the most advanced implementation supported by the current host.
206 *
207 * This is used to initialize the implementation on startup.
208 *
209 * const implementation *impl = simdjson::available_implementation::detect_best_supported();
210 * simdjson::get_active_implementation() = impl;
211 *
212 * @return the most advanced supported implementation for the current host, or an
213 * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported
214 * implementation. Will never return nullptr.
215 */
216 const implementation *detect_best_supported() const noexcept;
217};
218
219template<typename T>
220class atomic_ptr {
221public:
222 atomic_ptr(T *_ptr) : ptr{_ptr} {}
223
224 operator const T*() const { return ptr.load(); }
225 const T& operator*() const { return *ptr; }
226 const T* operator->() const { return ptr.load(); }
227
228 operator T*() { return ptr.load(); }
229 T& operator*() { return *ptr; }
230 T* operator->() { return ptr.load(); }
231 atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; }
232
233private:
234 std::atomic<T*> ptr;
235};
236
237} // namespace internal
238
239/**
240 * The list of available implementations compiled into simdjson.
241 */
242extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations();
243
244/**
245 * The active implementation.
246 *
247 * Automatically initialized on first use to the most advanced implementation supported by this hardware.
248 */
249extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation();
250
251} // namespace simdjson
252
253#endif // SIMDJSON_IMPLEMENTATION_H
254