| 1 | #pragma once |
| 2 | |
| 3 | #include <array> // array |
| 4 | #include <cassert> // assert |
| 5 | #include <cstddef> // size_t |
| 6 | #include <cstdio> //FILE * |
| 7 | #include <cstring> // strlen |
| 8 | #include <istream> // istream |
| 9 | #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next |
| 10 | #include <memory> // shared_ptr, make_shared, addressof |
| 11 | #include <numeric> // accumulate |
| 12 | #include <string> // string, char_traits |
| 13 | #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer |
| 14 | #include <utility> // pair, declval |
| 15 | |
| 16 | #include <nlohmann/detail/iterators/iterator_traits.hpp> |
| 17 | #include <nlohmann/detail/macro_scope.hpp> |
| 18 | |
| 19 | namespace nlohmann |
| 20 | { |
| 21 | namespace detail |
| 22 | { |
| 23 | /// the supported input formats |
| 24 | enum class input_format_t { json, cbor, msgpack, ubjson, bson }; |
| 25 | |
| 26 | //////////////////// |
| 27 | // input adapters // |
| 28 | //////////////////// |
| 29 | |
| 30 | /*! |
| 31 | @brief abstract input adapter interface |
| 32 | |
| 33 | Produces a stream of std::char_traits<char>::int_type characters from a |
| 34 | std::istream, a buffer, or some other input type. Accepts the return of |
| 35 | exactly one non-EOF character for future input. The int_type characters |
| 36 | returned consist of all valid char values as positive values (typically |
| 37 | unsigned char), plus an EOF value outside that range, specified by the value |
| 38 | of the function std::char_traits<char>::eof(). This value is typically -1, but |
| 39 | could be any arbitrary value which is not a valid char value. |
| 40 | */ |
| 41 | struct input_adapter_protocol |
| 42 | { |
| 43 | /// get a character [0,255] or std::char_traits<char>::eof(). |
| 44 | virtual std::char_traits<char>::int_type get_character() = 0; |
| 45 | virtual ~input_adapter_protocol() = default; |
| 46 | }; |
| 47 | |
| 48 | /// a type to simplify interfaces |
| 49 | using input_adapter_t = std::shared_ptr<input_adapter_protocol>; |
| 50 | |
| 51 | /*! |
| 52 | Input adapter for stdio file access. This adapter read only 1 byte and do not use any |
| 53 | buffer. This adapter is a very low level adapter. |
| 54 | */ |
| 55 | class file_input_adapter : public input_adapter_protocol |
| 56 | { |
| 57 | public: |
| 58 | JSON_HEDLEY_NON_NULL(2) |
| 59 | explicit file_input_adapter(std::FILE* f) noexcept |
| 60 | : m_file(f) |
| 61 | {} |
| 62 | |
| 63 | // make class move-only |
| 64 | file_input_adapter(const file_input_adapter&) = delete; |
| 65 | file_input_adapter(file_input_adapter&&) = default; |
| 66 | file_input_adapter& operator=(const file_input_adapter&) = delete; |
| 67 | file_input_adapter& operator=(file_input_adapter&&) = default; |
| 68 | ~file_input_adapter() override = default; |
| 69 | |
| 70 | std::char_traits<char>::int_type get_character() noexcept override |
| 71 | { |
| 72 | return std::fgetc(m_file); |
| 73 | } |
| 74 | |
| 75 | private: |
| 76 | /// the file pointer to read from |
| 77 | std::FILE* m_file; |
| 78 | }; |
| 79 | |
| 80 | |
| 81 | /*! |
| 82 | Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at |
| 83 | beginning of input. Does not support changing the underlying std::streambuf |
| 84 | in mid-input. Maintains underlying std::istream and std::streambuf to support |
| 85 | subsequent use of standard std::istream operations to process any input |
| 86 | characters following those used in parsing the JSON input. Clears the |
| 87 | std::istream flags; any input errors (e.g., EOF) will be detected by the first |
| 88 | subsequent call for input from the std::istream. |
| 89 | */ |
| 90 | class input_stream_adapter : public input_adapter_protocol |
| 91 | { |
| 92 | public: |
| 93 | ~input_stream_adapter() override |
| 94 | { |
| 95 | // clear stream flags; we use underlying streambuf I/O, do not |
| 96 | // maintain ifstream flags, except eof |
| 97 | is.clear(is.rdstate() & std::ios::eofbit); |
| 98 | } |
| 99 | |
| 100 | explicit input_stream_adapter(std::istream& i) |
| 101 | : is(i), sb(*i.rdbuf()) |
| 102 | {} |
| 103 | |
| 104 | // delete because of pointer members |
| 105 | input_stream_adapter(const input_stream_adapter&) = delete; |
| 106 | input_stream_adapter& operator=(input_stream_adapter&) = delete; |
| 107 | input_stream_adapter(input_stream_adapter&&) = delete; |
| 108 | input_stream_adapter& operator=(input_stream_adapter&&) = delete; |
| 109 | |
| 110 | // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to |
| 111 | // ensure that std::char_traits<char>::eof() and the character 0xFF do not |
| 112 | // end up as the same value, eg. 0xFFFFFFFF. |
| 113 | std::char_traits<char>::int_type get_character() override |
| 114 | { |
| 115 | auto res = sb.sbumpc(); |
| 116 | // set eof manually, as we don't use the istream interface. |
| 117 | if (res == EOF) |
| 118 | { |
| 119 | is.clear(is.rdstate() | std::ios::eofbit); |
| 120 | } |
| 121 | return res; |
| 122 | } |
| 123 | |
| 124 | private: |
| 125 | /// the associated input stream |
| 126 | std::istream& is; |
| 127 | std::streambuf& sb; |
| 128 | }; |
| 129 | |
| 130 | /// input adapter for buffer input |
| 131 | class input_buffer_adapter : public input_adapter_protocol |
| 132 | { |
| 133 | public: |
| 134 | input_buffer_adapter(const char* b, const std::size_t l) noexcept |
| 135 | : cursor(b), limit(b == nullptr ? nullptr : (b + l)) |
| 136 | {} |
| 137 | |
| 138 | // delete because of pointer members |
| 139 | input_buffer_adapter(const input_buffer_adapter&) = delete; |
| 140 | input_buffer_adapter& operator=(input_buffer_adapter&) = delete; |
| 141 | input_buffer_adapter(input_buffer_adapter&&) = delete; |
| 142 | input_buffer_adapter& operator=(input_buffer_adapter&&) = delete; |
| 143 | ~input_buffer_adapter() override = default; |
| 144 | |
| 145 | std::char_traits<char>::int_type get_character() noexcept override |
| 146 | { |
| 147 | if (JSON_HEDLEY_LIKELY(cursor < limit)) |
| 148 | { |
| 149 | assert(cursor != nullptr and limit != nullptr); |
| 150 | return std::char_traits<char>::to_int_type(*(cursor++)); |
| 151 | } |
| 152 | |
| 153 | return std::char_traits<char>::eof(); |
| 154 | } |
| 155 | |
| 156 | private: |
| 157 | /// pointer to the current character |
| 158 | const char* cursor; |
| 159 | /// pointer past the last character |
| 160 | const char* const limit; |
| 161 | }; |
| 162 | |
| 163 | template<typename WideStringType, size_t T> |
| 164 | struct wide_string_input_helper |
| 165 | { |
| 166 | // UTF-32 |
| 167 | static void fill_buffer(const WideStringType& str, |
| 168 | size_t& current_wchar, |
| 169 | std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, |
| 170 | size_t& utf8_bytes_index, |
| 171 | size_t& utf8_bytes_filled) |
| 172 | { |
| 173 | utf8_bytes_index = 0; |
| 174 | |
| 175 | if (current_wchar == str.size()) |
| 176 | { |
| 177 | utf8_bytes[0] = std::char_traits<char>::eof(); |
| 178 | utf8_bytes_filled = 1; |
| 179 | } |
| 180 | else |
| 181 | { |
| 182 | // get the current character |
| 183 | const auto wc = static_cast<unsigned int>(str[current_wchar++]); |
| 184 | |
| 185 | // UTF-32 to UTF-8 encoding |
| 186 | if (wc < 0x80) |
| 187 | { |
| 188 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); |
| 189 | utf8_bytes_filled = 1; |
| 190 | } |
| 191 | else if (wc <= 0x7FF) |
| 192 | { |
| 193 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u) & 0x1Fu)); |
| 194 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); |
| 195 | utf8_bytes_filled = 2; |
| 196 | } |
| 197 | else if (wc <= 0xFFFF) |
| 198 | { |
| 199 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u) & 0x0Fu)); |
| 200 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); |
| 201 | utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); |
| 202 | utf8_bytes_filled = 3; |
| 203 | } |
| 204 | else if (wc <= 0x10FFFF) |
| 205 | { |
| 206 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((wc >> 18u) & 0x07u)); |
| 207 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 12u) & 0x3Fu)); |
| 208 | utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); |
| 209 | utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); |
| 210 | utf8_bytes_filled = 4; |
| 211 | } |
| 212 | else |
| 213 | { |
| 214 | // unknown character |
| 215 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); |
| 216 | utf8_bytes_filled = 1; |
| 217 | } |
| 218 | } |
| 219 | } |
| 220 | }; |
| 221 | |
| 222 | template<typename WideStringType> |
| 223 | struct wide_string_input_helper<WideStringType, 2> |
| 224 | { |
| 225 | // UTF-16 |
| 226 | static void fill_buffer(const WideStringType& str, |
| 227 | size_t& current_wchar, |
| 228 | std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, |
| 229 | size_t& utf8_bytes_index, |
| 230 | size_t& utf8_bytes_filled) |
| 231 | { |
| 232 | utf8_bytes_index = 0; |
| 233 | |
| 234 | if (current_wchar == str.size()) |
| 235 | { |
| 236 | utf8_bytes[0] = std::char_traits<char>::eof(); |
| 237 | utf8_bytes_filled = 1; |
| 238 | } |
| 239 | else |
| 240 | { |
| 241 | // get the current character |
| 242 | const auto wc = static_cast<unsigned int>(str[current_wchar++]); |
| 243 | |
| 244 | // UTF-16 to UTF-8 encoding |
| 245 | if (wc < 0x80) |
| 246 | { |
| 247 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); |
| 248 | utf8_bytes_filled = 1; |
| 249 | } |
| 250 | else if (wc <= 0x7FF) |
| 251 | { |
| 252 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u))); |
| 253 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); |
| 254 | utf8_bytes_filled = 2; |
| 255 | } |
| 256 | else if (0xD800 > wc or wc >= 0xE000) |
| 257 | { |
| 258 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u))); |
| 259 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); |
| 260 | utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); |
| 261 | utf8_bytes_filled = 3; |
| 262 | } |
| 263 | else |
| 264 | { |
| 265 | if (current_wchar < str.size()) |
| 266 | { |
| 267 | const auto wc2 = static_cast<unsigned int>(str[current_wchar++]); |
| 268 | const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu)); |
| 269 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u)); |
| 270 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu)); |
| 271 | utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu)); |
| 272 | utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu)); |
| 273 | utf8_bytes_filled = 4; |
| 274 | } |
| 275 | else |
| 276 | { |
| 277 | // unknown character |
| 278 | ++current_wchar; |
| 279 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); |
| 280 | utf8_bytes_filled = 1; |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | } |
| 285 | }; |
| 286 | |
| 287 | template<typename WideStringType> |
| 288 | class wide_string_input_adapter : public input_adapter_protocol |
| 289 | { |
| 290 | public: |
| 291 | explicit wide_string_input_adapter(const WideStringType& w) noexcept |
| 292 | : str(w) |
| 293 | {} |
| 294 | |
| 295 | std::char_traits<char>::int_type get_character() noexcept override |
| 296 | { |
| 297 | // check if buffer needs to be filled |
| 298 | if (utf8_bytes_index == utf8_bytes_filled) |
| 299 | { |
| 300 | fill_buffer<sizeof(typename WideStringType::value_type)>(); |
| 301 | |
| 302 | assert(utf8_bytes_filled > 0); |
| 303 | assert(utf8_bytes_index == 0); |
| 304 | } |
| 305 | |
| 306 | // use buffer |
| 307 | assert(utf8_bytes_filled > 0); |
| 308 | assert(utf8_bytes_index < utf8_bytes_filled); |
| 309 | return utf8_bytes[utf8_bytes_index++]; |
| 310 | } |
| 311 | |
| 312 | private: |
| 313 | template<size_t T> |
| 314 | void fill_buffer() |
| 315 | { |
| 316 | wide_string_input_helper<WideStringType, T>::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); |
| 317 | } |
| 318 | |
| 319 | /// the wstring to process |
| 320 | const WideStringType& str; |
| 321 | |
| 322 | /// index of the current wchar in str |
| 323 | std::size_t current_wchar = 0; |
| 324 | |
| 325 | /// a buffer for UTF-8 bytes |
| 326 | std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; |
| 327 | |
| 328 | /// index to the utf8_codes array for the next valid byte |
| 329 | std::size_t utf8_bytes_index = 0; |
| 330 | /// number of valid bytes in the utf8_codes array |
| 331 | std::size_t utf8_bytes_filled = 0; |
| 332 | }; |
| 333 | |
| 334 | class input_adapter |
| 335 | { |
| 336 | public: |
| 337 | // native support |
| 338 | JSON_HEDLEY_NON_NULL(2) |
| 339 | input_adapter(std::FILE* file) |
| 340 | : ia(std::make_shared<file_input_adapter>(file)) {} |
| 341 | /// input adapter for input stream |
| 342 | input_adapter(std::istream& i) |
| 343 | : ia(std::make_shared<input_stream_adapter>(i)) {} |
| 344 | |
| 345 | /// input adapter for input stream |
| 346 | input_adapter(std::istream&& i) |
| 347 | : ia(std::make_shared<input_stream_adapter>(i)) {} |
| 348 | |
| 349 | input_adapter(const std::wstring& ws) |
| 350 | : ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {} |
| 351 | |
| 352 | input_adapter(const std::u16string& ws) |
| 353 | : ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {} |
| 354 | |
| 355 | input_adapter(const std::u32string& ws) |
| 356 | : ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {} |
| 357 | |
| 358 | /// input adapter for buffer |
| 359 | template<typename CharT, |
| 360 | typename std::enable_if< |
| 361 | std::is_pointer<CharT>::value and |
| 362 | std::is_integral<typename std::remove_pointer<CharT>::type>::value and |
| 363 | sizeof(typename std::remove_pointer<CharT>::type) == 1, |
| 364 | int>::type = 0> |
| 365 | input_adapter(CharT b, std::size_t l) |
| 366 | : ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {} |
| 367 | |
| 368 | // derived support |
| 369 | |
| 370 | /// input adapter for string literal |
| 371 | template<typename CharT, |
| 372 | typename std::enable_if< |
| 373 | std::is_pointer<CharT>::value and |
| 374 | std::is_integral<typename std::remove_pointer<CharT>::type>::value and |
| 375 | sizeof(typename std::remove_pointer<CharT>::type) == 1, |
| 376 | int>::type = 0> |
| 377 | input_adapter(CharT b) |
| 378 | : input_adapter(reinterpret_cast<const char*>(b), |
| 379 | std::strlen(reinterpret_cast<const char*>(b))) {} |
| 380 | |
| 381 | /// input adapter for iterator range with contiguous storage |
| 382 | template<class IteratorType, |
| 383 | typename std::enable_if< |
| 384 | std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value, |
| 385 | int>::type = 0> |
| 386 | input_adapter(IteratorType first, IteratorType last) |
| 387 | { |
| 388 | #ifndef NDEBUG |
| 389 | // assertion to check that the iterator range is indeed contiguous, |
| 390 | // see http://stackoverflow.com/a/35008842/266378 for more discussion |
| 391 | const auto is_contiguous = std::accumulate( |
| 392 | first, last, std::pair<bool, int>(true, 0), |
| 393 | [&first](std::pair<bool, int> res, decltype(*first) val) |
| 394 | { |
| 395 | res.first &= (val == *(std::next(std::addressof(*first), res.second++))); |
| 396 | return res; |
| 397 | }).first; |
| 398 | assert(is_contiguous); |
| 399 | #endif |
| 400 | |
| 401 | // assertion to check that each element is 1 byte long |
| 402 | static_assert( |
| 403 | sizeof(typename iterator_traits<IteratorType>::value_type) == 1, |
| 404 | "each element in the iterator range must have the size of 1 byte" ); |
| 405 | |
| 406 | const auto len = static_cast<size_t>(std::distance(first, last)); |
| 407 | if (JSON_HEDLEY_LIKELY(len > 0)) |
| 408 | { |
| 409 | // there is at least one element: use the address of first |
| 410 | ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len); |
| 411 | } |
| 412 | else |
| 413 | { |
| 414 | // the address of first cannot be used: use nullptr |
| 415 | ia = std::make_shared<input_buffer_adapter>(nullptr, len); |
| 416 | } |
| 417 | } |
| 418 | |
| 419 | /// input adapter for array |
| 420 | template<class T, std::size_t N> |
| 421 | input_adapter(T (&array)[N]) |
| 422 | : input_adapter(std::begin(array), std::end(array)) {} |
| 423 | |
| 424 | /// input adapter for contiguous container |
| 425 | template<class ContiguousContainer, typename |
| 426 | std::enable_if<not std::is_pointer<ContiguousContainer>::value and |
| 427 | std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value, |
| 428 | int>::type = 0> |
| 429 | input_adapter(const ContiguousContainer& c) |
| 430 | : input_adapter(std::begin(c), std::end(c)) {} |
| 431 | |
| 432 | operator input_adapter_t() |
| 433 | { |
| 434 | return ia; |
| 435 | } |
| 436 | |
| 437 | private: |
| 438 | /// the actual adapter |
| 439 | input_adapter_t ia = nullptr; |
| 440 | }; |
| 441 | } // namespace detail |
| 442 | } // namespace nlohmann |
| 443 | |