1 | #include "simdjson/error.h" |
2 | |
3 | namespace simdjson { |
4 | namespace SIMDJSON_IMPLEMENTATION { |
5 | namespace ondemand { |
6 | |
7 | class object; |
8 | class parser; |
9 | class json_iterator; |
10 | |
11 | /** |
12 | * A string escaped per JSON rules, terminated with quote ("). They are used to represent |
13 | * unescaped keys inside JSON documents. |
14 | * |
15 | * (In other words, a pointer to the beginning of a string, just after the start quote, inside a |
16 | * JSON file.) |
17 | * |
18 | * This class is deliberately simplistic and has little functionality. You can |
19 | * compare a raw_json_string instance with an unescaped C string, but |
20 | * that is nearly all you can do. |
21 | * |
22 | * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own |
23 | * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser |
24 | * instance. Doing so requires you to have a sufficiently large buffer. |
25 | * |
26 | * The raw_json_string instances originate typically from field instance which in turn represent |
27 | * key-value pairs from object instances. From a field instance, you get the raw_json_string |
28 | * instance by calling key(). You can, if you want a more usable string_view instance, call |
29 | * the unescaped_key() method on the field instance. You may also create a raw_json_string from |
30 | * any other string value, with the value.get_raw_json_string() method. Again, you can get |
31 | * a more usable string_view instance by calling get_string(). |
32 | * |
33 | */ |
34 | class raw_json_string { |
35 | public: |
36 | /** |
37 | * Create a new invalid raw_json_string. |
38 | * |
39 | * Exists so you can declare a variable and later assign to it before use. |
40 | */ |
41 | simdjson_inline raw_json_string() noexcept = default; |
42 | |
43 | /** |
44 | * Create a new invalid raw_json_string pointed at the given location in the JSON. |
45 | * |
46 | * The given location must be just *after* the beginning quote (") in the JSON file. |
47 | * |
48 | * It *must* be terminated by a ", and be a valid JSON string. |
49 | */ |
50 | simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; |
51 | /** |
52 | * Get the raw pointer to the beginning of the string in the JSON (just after the "). |
53 | * |
54 | * It is possible for this function to return a null pointer if the instance |
55 | * has outlived its existence. |
56 | */ |
57 | simdjson_inline const char * raw() const noexcept; |
58 | |
59 | /** |
60 | * This compares the current instance to the std::string_view target: returns true if |
61 | * they are byte-by-byte equal (no escaping is done) on target.size() characters, |
62 | * and if the raw_json_string instance has a quote character at byte index target.size(). |
63 | * We never read more than length + 1 bytes in the raw_json_string instance. |
64 | * If length is smaller than target.size(), this will return false. |
65 | * |
66 | * The std::string_view instance may contain any characters. However, the caller |
67 | * is responsible for setting length so that length bytes may be read in the |
68 | * raw_json_string. |
69 | * |
70 | * Performance: the comparison may be done using memcmp which may be efficient |
71 | * for long strings. |
72 | */ |
73 | simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; |
74 | |
75 | /** |
76 | * This compares the current instance to the std::string_view target: returns true if |
77 | * they are byte-by-byte equal (no escaping is done). |
78 | * The std::string_view instance should not contain unescaped quote characters: |
79 | * the caller is responsible for this check. See is_free_from_unescaped_quote. |
80 | * |
81 | * Performance: the comparison is done byte-by-byte which might be inefficient for |
82 | * long strings. |
83 | * |
84 | * If target is a compile-time constant, and your compiler likes you, |
85 | * you should be able to do the following without performance penalty... |
86 | * |
87 | * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); |
88 | * s.unsafe_is_equal(target); |
89 | */ |
90 | simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; |
91 | |
92 | /** |
93 | * This compares the current instance to the C string target: returns true if |
94 | * they are byte-by-byte equal (no escaping is done). |
95 | * The provided C string should not contain an unescaped quote character: |
96 | * the caller is responsible for this check. See is_free_from_unescaped_quote. |
97 | * |
98 | * If target is a compile-time constant, and your compiler likes you, |
99 | * you should be able to do the following without performance penalty... |
100 | * |
101 | * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); |
102 | * s.unsafe_is_equal(target); |
103 | */ |
104 | simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; |
105 | |
106 | /** |
107 | * This compares the current instance to the std::string_view target: returns true if |
108 | * they are byte-by-byte equal (no escaping is done). |
109 | */ |
110 | simdjson_inline bool is_equal(std::string_view target) const noexcept; |
111 | |
112 | /** |
113 | * This compares the current instance to the C string target: returns true if |
114 | * they are byte-by-byte equal (no escaping is done). |
115 | */ |
116 | simdjson_inline bool is_equal(const char* target) const noexcept; |
117 | |
118 | /** |
119 | * Returns true if target is free from unescaped quote. If target is known at |
120 | * compile-time, we might expect the computation to happen at compile time with |
121 | * many compilers (not all!). |
122 | */ |
123 | static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; |
124 | static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; |
125 | |
126 | private: |
127 | |
128 | |
129 | /** |
130 | * This will set the inner pointer to zero, effectively making |
131 | * this instance unusable. |
132 | */ |
133 | simdjson_inline void consume() noexcept { buf = nullptr; } |
134 | |
135 | /** |
136 | * Checks whether the inner pointer is non-null and thus usable. |
137 | */ |
138 | simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } |
139 | |
140 | /** |
141 | * Unescape this JSON string, replacing \\ with \, \n with newline, etc. |
142 | * The result will be a valid UTF-8. |
143 | * |
144 | * ## IMPORTANT: string_view lifetime |
145 | * |
146 | * The string_view is only valid until the next parse() call on the parser. |
147 | * |
148 | * @param iter A json_iterator, which contains a buffer where the string will be written. |
149 | * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. |
150 | */ |
151 | simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(json_iterator &iter, bool allow_replacement) const noexcept; |
152 | |
153 | /** |
154 | * Unescape this JSON string, replacing \\ with \, \n with newline, etc. |
155 | * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ |
156 | * |
157 | * ## IMPORTANT: string_view lifetime |
158 | * |
159 | * The string_view is only valid until the next parse() call on the parser. |
160 | * |
161 | * @param iter A json_iterator, which contains a buffer where the string will be written. |
162 | */ |
163 | simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape_wobbly(json_iterator &iter) const noexcept; |
164 | const uint8_t * buf{}; |
165 | friend class object; |
166 | friend class field; |
167 | friend class parser; |
168 | friend struct simdjson_result<raw_json_string>; |
169 | }; |
170 | |
171 | simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; |
172 | |
173 | /** |
174 | * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible |
175 | * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. |
176 | */ |
177 | simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; |
178 | simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; |
179 | simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; |
180 | simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; |
181 | |
182 | |
183 | } // namespace ondemand |
184 | } // namespace SIMDJSON_IMPLEMENTATION |
185 | } // namespace simdjson |
186 | |
187 | namespace simdjson { |
188 | |
189 | template<> |
190 | struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> { |
191 | public: |
192 | simdjson_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private |
193 | simdjson_inline simdjson_result(error_code error) noexcept; ///< @private |
194 | simdjson_inline simdjson_result() noexcept = default; |
195 | simdjson_inline ~simdjson_result() noexcept = default; ///< @private |
196 | |
197 | simdjson_inline simdjson_result<const char *> raw() const noexcept; |
198 | simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; |
199 | simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape_wobbly(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept; |
200 | }; |
201 | |
202 | } // namespace simdjson |
203 | |