| 1 | #include "simdjson/error.h" |
| 2 | |
| 3 | namespace simdjson { |
| 4 | namespace SIMDJSON_IMPLEMENTATION { |
| 5 | namespace ondemand { |
| 6 | |
| 7 | class object; |
| 8 | class parser; |
| 9 | class json_iterator; |
| 10 | |
| 11 | /** |
| 12 | * A string escaped per JSON rules, terminated with quote ("). They are used to represent |
| 13 | * unescaped keys inside JSON documents. |
| 14 | * |
| 15 | * (In other words, a pointer to the beginning of a string, just after the start quote, inside a |
| 16 | * JSON file.) |
| 17 | * |
| 18 | * This class is deliberately simplistic and has little functionality. You can |
| 19 | * compare a raw_json_string instance with an unescaped C string, but |
| 20 | * that is nearly all you can do. |
| 21 | * |
| 22 | * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own |
| 23 | * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser |
| 24 | * instance. Doing so requires you to have a sufficiently large buffer. |
| 25 | * |
| 26 | * The raw_json_string instances originate typically from field instance which in turn represent |
| 27 | * key-value pairs from object instances. From a field instance, you get the raw_json_string |
| 28 | * instance by calling key(). You can, if you want a more usable string_view instance, call |
| 29 | * the unescaped_key() method on the field instance. You may also create a raw_json_string from |
| 30 | * any other string value, with the value.get_raw_json_string() method. Again, you can get |
| 31 | * a more usable string_view instance by calling get_string(). |
| 32 | * |
| 33 | */ |
| 34 | class raw_json_string { |
| 35 | public: |
| 36 | /** |
| 37 | * Create a new invalid raw_json_string. |
| 38 | * |
| 39 | * Exists so you can declare a variable and later assign to it before use. |
| 40 | */ |
| 41 | simdjson_inline raw_json_string() noexcept = default; |
| 42 | |
| 43 | /** |
| 44 | * Create a new invalid raw_json_string pointed at the given location in the JSON. |
| 45 | * |
| 46 | * The given location must be just *after* the beginning quote (") in the JSON file. |
| 47 | * |
| 48 | * It *must* be terminated by a ", and be a valid JSON string. |
| 49 | */ |
| 50 | simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; |
| 51 | /** |
| 52 | * Get the raw pointer to the beginning of the string in the JSON (just after the "). |
| 53 | * |
| 54 | * It is possible for this function to return a null pointer if the instance |
| 55 | * has outlived its existence. |
| 56 | */ |
| 57 | simdjson_inline const char * raw() const noexcept; |
| 58 | |
| 59 | /** |
| 60 | * This compares the current instance to the std::string_view target: returns true if |
| 61 | * they are byte-by-byte equal (no escaping is done) on target.size() characters, |
| 62 | * and if the raw_json_string instance has a quote character at byte index target.size(). |
| 63 | * We never read more than length + 1 bytes in the raw_json_string instance. |
| 64 | * If length is smaller than target.size(), this will return false. |
| 65 | * |
| 66 | * The std::string_view instance may contain any characters. However, the caller |
| 67 | * is responsible for setting length so that length bytes may be read in the |
| 68 | * raw_json_string. |
| 69 | * |
| 70 | * Performance: the comparison may be done using memcmp which may be efficient |
| 71 | * for long strings. |
| 72 | */ |
| 73 | simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; |
| 74 | |
| 75 | /** |
| 76 | * This compares the current instance to the std::string_view target: returns true if |
| 77 | * they are byte-by-byte equal (no escaping is done). |
| 78 | * The std::string_view instance should not contain unescaped quote characters: |
| 79 | * the caller is responsible for this check. See is_free_from_unescaped_quote. |
| 80 | * |
| 81 | * Performance: the comparison is done byte-by-byte which might be inefficient for |
| 82 | * long strings. |
| 83 | * |
| 84 | * If target is a compile-time constant, and your compiler likes you, |
| 85 | * you should be able to do the following without performance penalty... |
| 86 | * |
| 87 | * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); |
| 88 | * s.unsafe_is_equal(target); |
| 89 | */ |
| 90 | simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; |
| 91 | |
| 92 | /** |
| 93 | * This compares the current instance to the C string target: returns true if |
| 94 | * they are byte-by-byte equal (no escaping is done). |
| 95 | * The provided C string should not contain an unescaped quote character: |
| 96 | * the caller is responsible for this check. See is_free_from_unescaped_quote. |
| 97 | * |
| 98 | * If target is a compile-time constant, and your compiler likes you, |
| 99 | * you should be able to do the following without performance penalty... |
| 100 | * |
| 101 | * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); |
| 102 | * s.unsafe_is_equal(target); |
| 103 | */ |
| 104 | simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; |
| 105 | |
| 106 | /** |
| 107 | * This compares the current instance to the std::string_view target: returns true if |
| 108 | * they are byte-by-byte equal (no escaping is done). |
| 109 | */ |
| 110 | simdjson_inline bool is_equal(std::string_view target) const noexcept; |
| 111 | |
| 112 | /** |
| 113 | * This compares the current instance to the C string target: returns true if |
| 114 | * they are byte-by-byte equal (no escaping is done). |
| 115 | */ |
| 116 | simdjson_inline bool is_equal(const char* target) const noexcept; |
| 117 | |
| 118 | /** |
| 119 | * Returns true if target is free from unescaped quote. If target is known at |
| 120 | * compile-time, we might expect the computation to happen at compile time with |
| 121 | * many compilers (not all!). |
| 122 | */ |
| 123 | static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; |
| 124 | static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; |
| 125 | |
| 126 | private: |
| 127 | |
| 128 | |
| 129 | /** |
| 130 | * This will set the inner pointer to zero, effectively making |
| 131 | * this instance unusable. |
| 132 | */ |
| 133 | simdjson_inline void consume() noexcept { buf = nullptr; } |
| 134 | |
| 135 | /** |
| 136 | * Checks whether the inner pointer is non-null and thus usable. |
| 137 | */ |
| 138 | simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } |
| 139 | |
| 140 | /** |
| 141 | * Unescape this JSON string, replacing \\ with \, \n with newline, etc. |
| 142 | * The result will be a valid UTF-8. |
| 143 | * |
| 144 | * ## IMPORTANT: string_view lifetime |
| 145 | * |
| 146 | * The string_view is only valid until the next parse() call on the parser. |
| 147 | * |
| 148 | * @param iter A json_iterator, which contains a buffer where the string will be written. |
| 149 | * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. |
| 150 | */ |
| 151 | simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(json_iterator &iter, bool allow_replacement) const noexcept; |
| 152 | |
| 153 | /** |
| 154 | * Unescape this JSON string, replacing \\ with \, \n with newline, etc. |
| 155 | * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ |
| 156 | * |
| 157 | * ## IMPORTANT: string_view lifetime |
| 158 | * |
| 159 | * The string_view is only valid until the next parse() call on the parser. |
| 160 | * |
| 161 | * @param iter A json_iterator, which contains a buffer where the string will be written. |
| 162 | */ |
| 163 | simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape_wobbly(json_iterator &iter) const noexcept; |
| 164 | const uint8_t * buf{}; |
| 165 | friend class object; |
| 166 | friend class field; |
| 167 | friend class parser; |
| 168 | friend struct simdjson_result<raw_json_string>; |
| 169 | }; |
| 170 | |
| 171 | simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; |
| 172 | |
| 173 | /** |
| 174 | * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible |
| 175 | * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. |
| 176 | */ |
| 177 | simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; |
| 178 | simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; |
| 179 | simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; |
| 180 | simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; |
| 181 | |
| 182 | |
| 183 | } // namespace ondemand |
| 184 | } // namespace SIMDJSON_IMPLEMENTATION |
| 185 | } // namespace simdjson |
| 186 | |
| 187 | namespace simdjson { |
| 188 | |
| 189 | template<> |
| 190 | struct simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> : public SIMDJSON_IMPLEMENTATION::implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string> { |
| 191 | public: |
| 192 | simdjson_inline simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private |
| 193 | simdjson_inline simdjson_result(error_code error) noexcept; ///< @private |
| 194 | simdjson_inline simdjson_result() noexcept = default; |
| 195 | simdjson_inline ~simdjson_result() noexcept = default; ///< @private |
| 196 | |
| 197 | simdjson_inline simdjson_result<const char *> raw() const noexcept; |
| 198 | simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; |
| 199 | simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> unescape_wobbly(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept; |
| 200 | }; |
| 201 | |
| 202 | } // namespace simdjson |
| 203 | |