1 | #pragma once |
2 | |
3 | #include <array> // array |
4 | #include <cassert> // assert |
5 | #include <cstddef> // size_t |
6 | #include <cstdio> //FILE * |
7 | #include <cstring> // strlen |
8 | #include <istream> // istream |
9 | #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next |
10 | #include <memory> // shared_ptr, make_shared, addressof |
11 | #include <numeric> // accumulate |
12 | #include <string> // string, char_traits |
13 | #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer |
14 | #include <utility> // pair, declval |
15 | |
16 | #include <nlohmann/detail/iterators/iterator_traits.hpp> |
17 | #include <nlohmann/detail/macro_scope.hpp> |
18 | |
19 | namespace nlohmann |
20 | { |
21 | namespace detail |
22 | { |
23 | /// the supported input formats |
24 | enum class input_format_t { json, cbor, msgpack, ubjson, bson }; |
25 | |
26 | //////////////////// |
27 | // input adapters // |
28 | //////////////////// |
29 | |
30 | /*! |
31 | @brief abstract input adapter interface |
32 | |
33 | Produces a stream of std::char_traits<char>::int_type characters from a |
34 | std::istream, a buffer, or some other input type. Accepts the return of |
35 | exactly one non-EOF character for future input. The int_type characters |
36 | returned consist of all valid char values as positive values (typically |
37 | unsigned char), plus an EOF value outside that range, specified by the value |
38 | of the function std::char_traits<char>::eof(). This value is typically -1, but |
39 | could be any arbitrary value which is not a valid char value. |
40 | */ |
41 | struct input_adapter_protocol |
42 | { |
43 | /// get a character [0,255] or std::char_traits<char>::eof(). |
44 | virtual std::char_traits<char>::int_type get_character() = 0; |
45 | virtual ~input_adapter_protocol() = default; |
46 | }; |
47 | |
48 | /// a type to simplify interfaces |
49 | using input_adapter_t = std::shared_ptr<input_adapter_protocol>; |
50 | |
51 | /*! |
52 | Input adapter for stdio file access. This adapter read only 1 byte and do not use any |
53 | buffer. This adapter is a very low level adapter. |
54 | */ |
55 | class file_input_adapter : public input_adapter_protocol |
56 | { |
57 | public: |
58 | JSON_HEDLEY_NON_NULL(2) |
59 | explicit file_input_adapter(std::FILE* f) noexcept |
60 | : m_file(f) |
61 | {} |
62 | |
63 | // make class move-only |
64 | file_input_adapter(const file_input_adapter&) = delete; |
65 | file_input_adapter(file_input_adapter&&) = default; |
66 | file_input_adapter& operator=(const file_input_adapter&) = delete; |
67 | file_input_adapter& operator=(file_input_adapter&&) = default; |
68 | ~file_input_adapter() override = default; |
69 | |
70 | std::char_traits<char>::int_type get_character() noexcept override |
71 | { |
72 | return std::fgetc(m_file); |
73 | } |
74 | |
75 | private: |
76 | /// the file pointer to read from |
77 | std::FILE* m_file; |
78 | }; |
79 | |
80 | |
81 | /*! |
82 | Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at |
83 | beginning of input. Does not support changing the underlying std::streambuf |
84 | in mid-input. Maintains underlying std::istream and std::streambuf to support |
85 | subsequent use of standard std::istream operations to process any input |
86 | characters following those used in parsing the JSON input. Clears the |
87 | std::istream flags; any input errors (e.g., EOF) will be detected by the first |
88 | subsequent call for input from the std::istream. |
89 | */ |
90 | class input_stream_adapter : public input_adapter_protocol |
91 | { |
92 | public: |
93 | ~input_stream_adapter() override |
94 | { |
95 | // clear stream flags; we use underlying streambuf I/O, do not |
96 | // maintain ifstream flags, except eof |
97 | is.clear(is.rdstate() & std::ios::eofbit); |
98 | } |
99 | |
100 | explicit input_stream_adapter(std::istream& i) |
101 | : is(i), sb(*i.rdbuf()) |
102 | {} |
103 | |
104 | // delete because of pointer members |
105 | input_stream_adapter(const input_stream_adapter&) = delete; |
106 | input_stream_adapter& operator=(input_stream_adapter&) = delete; |
107 | input_stream_adapter(input_stream_adapter&&) = delete; |
108 | input_stream_adapter& operator=(input_stream_adapter&&) = delete; |
109 | |
110 | // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to |
111 | // ensure that std::char_traits<char>::eof() and the character 0xFF do not |
112 | // end up as the same value, eg. 0xFFFFFFFF. |
113 | std::char_traits<char>::int_type get_character() override |
114 | { |
115 | auto res = sb.sbumpc(); |
116 | // set eof manually, as we don't use the istream interface. |
117 | if (res == EOF) |
118 | { |
119 | is.clear(is.rdstate() | std::ios::eofbit); |
120 | } |
121 | return res; |
122 | } |
123 | |
124 | private: |
125 | /// the associated input stream |
126 | std::istream& is; |
127 | std::streambuf& sb; |
128 | }; |
129 | |
130 | /// input adapter for buffer input |
131 | class input_buffer_adapter : public input_adapter_protocol |
132 | { |
133 | public: |
134 | input_buffer_adapter(const char* b, const std::size_t l) noexcept |
135 | : cursor(b), limit(b == nullptr ? nullptr : (b + l)) |
136 | {} |
137 | |
138 | // delete because of pointer members |
139 | input_buffer_adapter(const input_buffer_adapter&) = delete; |
140 | input_buffer_adapter& operator=(input_buffer_adapter&) = delete; |
141 | input_buffer_adapter(input_buffer_adapter&&) = delete; |
142 | input_buffer_adapter& operator=(input_buffer_adapter&&) = delete; |
143 | ~input_buffer_adapter() override = default; |
144 | |
145 | std::char_traits<char>::int_type get_character() noexcept override |
146 | { |
147 | if (JSON_HEDLEY_LIKELY(cursor < limit)) |
148 | { |
149 | assert(cursor != nullptr and limit != nullptr); |
150 | return std::char_traits<char>::to_int_type(*(cursor++)); |
151 | } |
152 | |
153 | return std::char_traits<char>::eof(); |
154 | } |
155 | |
156 | private: |
157 | /// pointer to the current character |
158 | const char* cursor; |
159 | /// pointer past the last character |
160 | const char* const limit; |
161 | }; |
162 | |
163 | template<typename WideStringType, size_t T> |
164 | struct wide_string_input_helper |
165 | { |
166 | // UTF-32 |
167 | static void fill_buffer(const WideStringType& str, |
168 | size_t& current_wchar, |
169 | std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, |
170 | size_t& utf8_bytes_index, |
171 | size_t& utf8_bytes_filled) |
172 | { |
173 | utf8_bytes_index = 0; |
174 | |
175 | if (current_wchar == str.size()) |
176 | { |
177 | utf8_bytes[0] = std::char_traits<char>::eof(); |
178 | utf8_bytes_filled = 1; |
179 | } |
180 | else |
181 | { |
182 | // get the current character |
183 | const auto wc = static_cast<unsigned int>(str[current_wchar++]); |
184 | |
185 | // UTF-32 to UTF-8 encoding |
186 | if (wc < 0x80) |
187 | { |
188 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); |
189 | utf8_bytes_filled = 1; |
190 | } |
191 | else if (wc <= 0x7FF) |
192 | { |
193 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u) & 0x1Fu)); |
194 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); |
195 | utf8_bytes_filled = 2; |
196 | } |
197 | else if (wc <= 0xFFFF) |
198 | { |
199 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u) & 0x0Fu)); |
200 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); |
201 | utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); |
202 | utf8_bytes_filled = 3; |
203 | } |
204 | else if (wc <= 0x10FFFF) |
205 | { |
206 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((wc >> 18u) & 0x07u)); |
207 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 12u) & 0x3Fu)); |
208 | utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); |
209 | utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); |
210 | utf8_bytes_filled = 4; |
211 | } |
212 | else |
213 | { |
214 | // unknown character |
215 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); |
216 | utf8_bytes_filled = 1; |
217 | } |
218 | } |
219 | } |
220 | }; |
221 | |
222 | template<typename WideStringType> |
223 | struct wide_string_input_helper<WideStringType, 2> |
224 | { |
225 | // UTF-16 |
226 | static void fill_buffer(const WideStringType& str, |
227 | size_t& current_wchar, |
228 | std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, |
229 | size_t& utf8_bytes_index, |
230 | size_t& utf8_bytes_filled) |
231 | { |
232 | utf8_bytes_index = 0; |
233 | |
234 | if (current_wchar == str.size()) |
235 | { |
236 | utf8_bytes[0] = std::char_traits<char>::eof(); |
237 | utf8_bytes_filled = 1; |
238 | } |
239 | else |
240 | { |
241 | // get the current character |
242 | const auto wc = static_cast<unsigned int>(str[current_wchar++]); |
243 | |
244 | // UTF-16 to UTF-8 encoding |
245 | if (wc < 0x80) |
246 | { |
247 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); |
248 | utf8_bytes_filled = 1; |
249 | } |
250 | else if (wc <= 0x7FF) |
251 | { |
252 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u))); |
253 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); |
254 | utf8_bytes_filled = 2; |
255 | } |
256 | else if (0xD800 > wc or wc >= 0xE000) |
257 | { |
258 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u))); |
259 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu)); |
260 | utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu)); |
261 | utf8_bytes_filled = 3; |
262 | } |
263 | else |
264 | { |
265 | if (current_wchar < str.size()) |
266 | { |
267 | const auto wc2 = static_cast<unsigned int>(str[current_wchar++]); |
268 | const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu)); |
269 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u)); |
270 | utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu)); |
271 | utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu)); |
272 | utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu)); |
273 | utf8_bytes_filled = 4; |
274 | } |
275 | else |
276 | { |
277 | // unknown character |
278 | ++current_wchar; |
279 | utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); |
280 | utf8_bytes_filled = 1; |
281 | } |
282 | } |
283 | } |
284 | } |
285 | }; |
286 | |
287 | template<typename WideStringType> |
288 | class wide_string_input_adapter : public input_adapter_protocol |
289 | { |
290 | public: |
291 | explicit wide_string_input_adapter(const WideStringType& w) noexcept |
292 | : str(w) |
293 | {} |
294 | |
295 | std::char_traits<char>::int_type get_character() noexcept override |
296 | { |
297 | // check if buffer needs to be filled |
298 | if (utf8_bytes_index == utf8_bytes_filled) |
299 | { |
300 | fill_buffer<sizeof(typename WideStringType::value_type)>(); |
301 | |
302 | assert(utf8_bytes_filled > 0); |
303 | assert(utf8_bytes_index == 0); |
304 | } |
305 | |
306 | // use buffer |
307 | assert(utf8_bytes_filled > 0); |
308 | assert(utf8_bytes_index < utf8_bytes_filled); |
309 | return utf8_bytes[utf8_bytes_index++]; |
310 | } |
311 | |
312 | private: |
313 | template<size_t T> |
314 | void fill_buffer() |
315 | { |
316 | wide_string_input_helper<WideStringType, T>::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); |
317 | } |
318 | |
319 | /// the wstring to process |
320 | const WideStringType& str; |
321 | |
322 | /// index of the current wchar in str |
323 | std::size_t current_wchar = 0; |
324 | |
325 | /// a buffer for UTF-8 bytes |
326 | std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; |
327 | |
328 | /// index to the utf8_codes array for the next valid byte |
329 | std::size_t utf8_bytes_index = 0; |
330 | /// number of valid bytes in the utf8_codes array |
331 | std::size_t utf8_bytes_filled = 0; |
332 | }; |
333 | |
334 | class input_adapter |
335 | { |
336 | public: |
337 | // native support |
338 | JSON_HEDLEY_NON_NULL(2) |
339 | input_adapter(std::FILE* file) |
340 | : ia(std::make_shared<file_input_adapter>(file)) {} |
341 | /// input adapter for input stream |
342 | input_adapter(std::istream& i) |
343 | : ia(std::make_shared<input_stream_adapter>(i)) {} |
344 | |
345 | /// input adapter for input stream |
346 | input_adapter(std::istream&& i) |
347 | : ia(std::make_shared<input_stream_adapter>(i)) {} |
348 | |
349 | input_adapter(const std::wstring& ws) |
350 | : ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {} |
351 | |
352 | input_adapter(const std::u16string& ws) |
353 | : ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {} |
354 | |
355 | input_adapter(const std::u32string& ws) |
356 | : ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {} |
357 | |
358 | /// input adapter for buffer |
359 | template<typename CharT, |
360 | typename std::enable_if< |
361 | std::is_pointer<CharT>::value and |
362 | std::is_integral<typename std::remove_pointer<CharT>::type>::value and |
363 | sizeof(typename std::remove_pointer<CharT>::type) == 1, |
364 | int>::type = 0> |
365 | input_adapter(CharT b, std::size_t l) |
366 | : ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {} |
367 | |
368 | // derived support |
369 | |
370 | /// input adapter for string literal |
371 | template<typename CharT, |
372 | typename std::enable_if< |
373 | std::is_pointer<CharT>::value and |
374 | std::is_integral<typename std::remove_pointer<CharT>::type>::value and |
375 | sizeof(typename std::remove_pointer<CharT>::type) == 1, |
376 | int>::type = 0> |
377 | input_adapter(CharT b) |
378 | : input_adapter(reinterpret_cast<const char*>(b), |
379 | std::strlen(reinterpret_cast<const char*>(b))) {} |
380 | |
381 | /// input adapter for iterator range with contiguous storage |
382 | template<class IteratorType, |
383 | typename std::enable_if< |
384 | std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value, |
385 | int>::type = 0> |
386 | input_adapter(IteratorType first, IteratorType last) |
387 | { |
388 | #ifndef NDEBUG |
389 | // assertion to check that the iterator range is indeed contiguous, |
390 | // see http://stackoverflow.com/a/35008842/266378 for more discussion |
391 | const auto is_contiguous = std::accumulate( |
392 | first, last, std::pair<bool, int>(true, 0), |
393 | [&first](std::pair<bool, int> res, decltype(*first) val) |
394 | { |
395 | res.first &= (val == *(std::next(std::addressof(*first), res.second++))); |
396 | return res; |
397 | }).first; |
398 | assert(is_contiguous); |
399 | #endif |
400 | |
401 | // assertion to check that each element is 1 byte long |
402 | static_assert( |
403 | sizeof(typename iterator_traits<IteratorType>::value_type) == 1, |
404 | "each element in the iterator range must have the size of 1 byte" ); |
405 | |
406 | const auto len = static_cast<size_t>(std::distance(first, last)); |
407 | if (JSON_HEDLEY_LIKELY(len > 0)) |
408 | { |
409 | // there is at least one element: use the address of first |
410 | ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len); |
411 | } |
412 | else |
413 | { |
414 | // the address of first cannot be used: use nullptr |
415 | ia = std::make_shared<input_buffer_adapter>(nullptr, len); |
416 | } |
417 | } |
418 | |
419 | /// input adapter for array |
420 | template<class T, std::size_t N> |
421 | input_adapter(T (&array)[N]) |
422 | : input_adapter(std::begin(array), std::end(array)) {} |
423 | |
424 | /// input adapter for contiguous container |
425 | template<class ContiguousContainer, typename |
426 | std::enable_if<not std::is_pointer<ContiguousContainer>::value and |
427 | std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value, |
428 | int>::type = 0> |
429 | input_adapter(const ContiguousContainer& c) |
430 | : input_adapter(std::begin(c), std::end(c)) {} |
431 | |
432 | operator input_adapter_t() |
433 | { |
434 | return ia; |
435 | } |
436 | |
437 | private: |
438 | /// the actual adapter |
439 | input_adapter_t ia = nullptr; |
440 | }; |
441 | } // namespace detail |
442 | } // namespace nlohmann |
443 | |