1#include "simdjson/error.h"
2
3namespace simdjson {
4namespace SIMDJSON_IMPLEMENTATION {
5namespace ondemand {
6
7class object;
8class parser;
9class json_iterator;
10
11/**
12 * A string escaped per JSON rules, terminated with quote ("). They are used to represent
13 * unescaped keys inside JSON documents.
14 *
15 * (In other words, a pointer to the beginning of a string, just after the start quote, inside a
16 * JSON file.)
17 *
18 * This class is deliberately simplistic and has little functionality. You can
19 * compare a raw_json_string instance with an unescaped C string, but
20 * that is nearly all you can do.
21 *
22 * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own
23 * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser
24 * instance. Doing so requires you to have a sufficiently large buffer.
25 *
26 * The raw_json_string instances originate typically from field instance which in turn represent
27 * key-value pairs from object instances. From a field instance, you get the raw_json_string
28 * instance by calling key(). You can, if you want a more usable string_view instance, call
29 * the unescaped_key() method on the field instance. You may also create a raw_json_string from
30 * any other string value, with the value.get_raw_json_string() method. Again, you can get
31 * a more usable string_view instance by calling get_string().
32 *
33 */
34class raw_json_string {
35public:
36 /**
37 * Create a new invalid raw_json_string.
38 *
39 * Exists so you can declare a variable and later assign to it before use.
40 */
41 simdjson_inline raw_json_string() noexcept = default;
42
43 /**
44 * Create a new invalid raw_json_string pointed at the given location in the JSON.
45 *
46 * The given location must be just *after* the beginning quote (") in the JSON file.
47 *
48 * It *must* be terminated by a ", and be a valid JSON string.
49 */
50 simdjson_inline raw_json_string(const uint8_t * _buf) noexcept;
51 /**
52 * Get the raw pointer to the beginning of the string in the JSON (just after the ").
53 *
54 * It is possible for this function to return a null pointer if the instance
55 * has outlived its existence.
56 */
57 simdjson_inline const char * raw() const noexcept;
58
59 /**
60 * This compares the current instance to the std::string_view target: returns true if
61 * they are byte-by-byte equal (no escaping is done) on target.size() characters,
62 * and if the raw_json_string instance has a quote character at byte index target.size().
63 * We never read more than length + 1 bytes in the raw_json_string instance.
64 * If length is smaller than target.size(), this will return false.
65 *
66 * The std::string_view instance may contain any characters. However, the caller
67 * is responsible for setting length so that length bytes may be read in the
68 * raw_json_string.
69 *
70 * Performance: the comparison may be done using memcmp which may be efficient
71 * for long strings.
72 */
73 simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept;
74
75 /**
76 * This compares the current instance to the std::string_view target: returns true if
77 * they are byte-by-byte equal (no escaping is done).
78 * The std::string_view instance should not contain unescaped quote characters:
79 * the caller is responsible for this check. See is_free_from_unescaped_quote.
80 *
81 * Performance: the comparison is done byte-by-byte which might be inefficient for
82 * long strings.
83 *
84 * If target is a compile-time constant, and your compiler likes you,
85 * you should be able to do the following without performance penalty...
86 *
87 * static_assert(raw_json_string::is_free_from_unescaped_quote(target), "");
88 * s.unsafe_is_equal(target);
89 */
90 simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept;
91
92 /**
93 * This compares the current instance to the C string target: returns true if
94 * they are byte-by-byte equal (no escaping is done).
95 * The provided C string should not contain an unescaped quote character:
96 * the caller is responsible for this check. See is_free_from_unescaped_quote.
97 *
98 * If target is a compile-time constant, and your compiler likes you,
99 * you should be able to do the following without performance penalty...
100 *
101 * static_assert(raw_json_string::is_free_from_unescaped_quote(target), "");
102 * s.unsafe_is_equal(target);
103 */
104 simdjson_inline bool unsafe_is_equal(const char* target) const noexcept;
105
106 /**
107 * This compares the current instance to the std::string_view target: returns true if
108 * they are byte-by-byte equal (no escaping is done).
109 */
110 simdjson_inline bool is_equal(std::string_view target) const noexcept;
111
112 /**
113 * This compares the current instance to the C string target: returns true if
114 * they are byte-by-byte equal (no escaping is done).
115 */
116 simdjson_inline bool is_equal(const char* target) const noexcept;
117
118 /**
119 * Returns true if target is free from unescaped quote. If target is known at
120 * compile-time, we might expect the computation to happen at compile time with
121 * many compilers (not all!).
122 */
123 static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept;
124 static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept;
125
126private:
127
128
129 /**
130 * This will set the inner pointer to zero, effectively making
131 * this instance unusable.
132 */
133 simdjson_inline void consume() noexcept { buf = nullptr; }
134
135 /**
136 * Checks whether the inner pointer is non-null and thus usable.
137 */
138 simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; }
139
140 /**
141 * Unescape this JSON string, replacing \\ with \, \n with newline, etc.
142 * The result will be a valid UTF-8.
143 *
144 * ## IMPORTANT: string_view lifetime
145 *
146 * The string_view is only valid until the next parse() call on the parser.
147 *
148 * @param iter A json_iterator, which contains a buffer where the string will be written.
149 * @param allow_replacement Whether we allow replacement of invalid surrogate pairs.
150 */
151 simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(json_iterator &iter, bool allow_replacement) const noexcept;
152
153 /**
154 * Unescape this JSON string, replacing \\ with \, \n with newline, etc.
155 * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/
156 *
157 * ## IMPORTANT: string_view lifetime
158 *
159 * The string_view is only valid until the next parse() call on the parser.
160 *
161 * @param iter A json_iterator, which contains a buffer where the string will be written.
162 */
163 simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape_wobbly(json_iterator &iter) const noexcept;
164 const uint8_t * buf{};
165 friend class object;
166 friend class field;
167 friend class parser;
168 friend struct simdjson_result<raw_json_string>;
169};
170
171simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept;
172
173/**
174 * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible
175 * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings.
176 */
177simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept;
178simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept;
179simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept;
180simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept;
181
182
183} // namespace ondemand
184} // namespace SIMDJSON_IMPLEMENTATION
185} // namespace simdjson
186
187namespace simdjson {
188
189template<>
190struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> {
191public:
192 simdjson_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private
193 simdjson_inline simdjson_result(error_code error) noexcept; ///< @private
194 simdjson_inline simdjson_result() noexcept = default;
195 simdjson_inline ~simdjson_result() noexcept = default; ///< @private
196
197 simdjson_inline simdjson_result<const char *> raw() const noexcept;
198 simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &iter, bool allow_replacement) const noexcept;
199 simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape_wobbly(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept;
200};
201
202} // namespace simdjson
203