1#pragma once
2
3#include <array> // array
4#include <cassert> // assert
5#include <cstddef> // size_t
6#include <cstdio> //FILE *
7#include <cstring> // strlen
8#include <istream> // istream
9#include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
10#include <memory> // shared_ptr, make_shared, addressof
11#include <numeric> // accumulate
12#include <string> // string, char_traits
13#include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
14#include <utility> // pair, declval
15
16#include <nlohmann/detail/iterators/iterator_traits.hpp>
17#include <nlohmann/detail/macro_scope.hpp>
18
19namespace nlohmann
20{
21namespace detail
22{
23/// the supported input formats
24enum class input_format_t { json, cbor, msgpack, ubjson, bson };
25
26////////////////////
27// input adapters //
28////////////////////
29
30/*!
31@brief abstract input adapter interface
32
33Produces a stream of std::char_traits<char>::int_type characters from a
34std::istream, a buffer, or some other input type. Accepts the return of
35exactly one non-EOF character for future input. The int_type characters
36returned consist of all valid char values as positive values (typically
37unsigned char), plus an EOF value outside that range, specified by the value
38of the function std::char_traits<char>::eof(). This value is typically -1, but
39could be any arbitrary value which is not a valid char value.
40*/
41struct input_adapter_protocol
42{
43 /// get a character [0,255] or std::char_traits<char>::eof().
44 virtual std::char_traits<char>::int_type get_character() = 0;
45 virtual ~input_adapter_protocol() = default;
46};
47
48/// a type to simplify interfaces
49using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
50
51/*!
52Input adapter for stdio file access. This adapter read only 1 byte and do not use any
53 buffer. This adapter is a very low level adapter.
54*/
55class file_input_adapter : public input_adapter_protocol
56{
57 public:
58 JSON_HEDLEY_NON_NULL(2)
59 explicit file_input_adapter(std::FILE* f) noexcept
60 : m_file(f)
61 {}
62
63 // make class move-only
64 file_input_adapter(const file_input_adapter&) = delete;
65 file_input_adapter(file_input_adapter&&) = default;
66 file_input_adapter& operator=(const file_input_adapter&) = delete;
67 file_input_adapter& operator=(file_input_adapter&&) = default;
68 ~file_input_adapter() override = default;
69
70 std::char_traits<char>::int_type get_character() noexcept override
71 {
72 return std::fgetc(m_file);
73 }
74
75 private:
76 /// the file pointer to read from
77 std::FILE* m_file;
78};
79
80
81/*!
82Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
83beginning of input. Does not support changing the underlying std::streambuf
84in mid-input. Maintains underlying std::istream and std::streambuf to support
85subsequent use of standard std::istream operations to process any input
86characters following those used in parsing the JSON input. Clears the
87std::istream flags; any input errors (e.g., EOF) will be detected by the first
88subsequent call for input from the std::istream.
89*/
90class input_stream_adapter : public input_adapter_protocol
91{
92 public:
93 ~input_stream_adapter() override
94 {
95 // clear stream flags; we use underlying streambuf I/O, do not
96 // maintain ifstream flags, except eof
97 is.clear(is.rdstate() & std::ios::eofbit);
98 }
99
100 explicit input_stream_adapter(std::istream& i)
101 : is(i), sb(*i.rdbuf())
102 {}
103
104 // delete because of pointer members
105 input_stream_adapter(const input_stream_adapter&) = delete;
106 input_stream_adapter& operator=(input_stream_adapter&) = delete;
107 input_stream_adapter(input_stream_adapter&&) = delete;
108 input_stream_adapter& operator=(input_stream_adapter&&) = delete;
109
110 // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
111 // ensure that std::char_traits<char>::eof() and the character 0xFF do not
112 // end up as the same value, eg. 0xFFFFFFFF.
113 std::char_traits<char>::int_type get_character() override
114 {
115 auto res = sb.sbumpc();
116 // set eof manually, as we don't use the istream interface.
117 if (res == EOF)
118 {
119 is.clear(is.rdstate() | std::ios::eofbit);
120 }
121 return res;
122 }
123
124 private:
125 /// the associated input stream
126 std::istream& is;
127 std::streambuf& sb;
128};
129
130/// input adapter for buffer input
131class input_buffer_adapter : public input_adapter_protocol
132{
133 public:
134 input_buffer_adapter(const char* b, const std::size_t l) noexcept
135 : cursor(b), limit(b == nullptr ? nullptr : (b + l))
136 {}
137
138 // delete because of pointer members
139 input_buffer_adapter(const input_buffer_adapter&) = delete;
140 input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
141 input_buffer_adapter(input_buffer_adapter&&) = delete;
142 input_buffer_adapter& operator=(input_buffer_adapter&&) = delete;
143 ~input_buffer_adapter() override = default;
144
145 std::char_traits<char>::int_type get_character() noexcept override
146 {
147 if (JSON_HEDLEY_LIKELY(cursor < limit))
148 {
149 assert(cursor != nullptr and limit != nullptr);
150 return std::char_traits<char>::to_int_type(*(cursor++));
151 }
152
153 return std::char_traits<char>::eof();
154 }
155
156 private:
157 /// pointer to the current character
158 const char* cursor;
159 /// pointer past the last character
160 const char* const limit;
161};
162
163template<typename WideStringType, size_t T>
164struct wide_string_input_helper
165{
166 // UTF-32
167 static void fill_buffer(const WideStringType& str,
168 size_t& current_wchar,
169 std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
170 size_t& utf8_bytes_index,
171 size_t& utf8_bytes_filled)
172 {
173 utf8_bytes_index = 0;
174
175 if (current_wchar == str.size())
176 {
177 utf8_bytes[0] = std::char_traits<char>::eof();
178 utf8_bytes_filled = 1;
179 }
180 else
181 {
182 // get the current character
183 const auto wc = static_cast<unsigned int>(str[current_wchar++]);
184
185 // UTF-32 to UTF-8 encoding
186 if (wc < 0x80)
187 {
188 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
189 utf8_bytes_filled = 1;
190 }
191 else if (wc <= 0x7FF)
192 {
193 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u) & 0x1Fu));
194 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
195 utf8_bytes_filled = 2;
196 }
197 else if (wc <= 0xFFFF)
198 {
199 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u) & 0x0Fu));
200 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
201 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
202 utf8_bytes_filled = 3;
203 }
204 else if (wc <= 0x10FFFF)
205 {
206 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((wc >> 18u) & 0x07u));
207 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 12u) & 0x3Fu));
208 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
209 utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
210 utf8_bytes_filled = 4;
211 }
212 else
213 {
214 // unknown character
215 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
216 utf8_bytes_filled = 1;
217 }
218 }
219 }
220};
221
222template<typename WideStringType>
223struct wide_string_input_helper<WideStringType, 2>
224{
225 // UTF-16
226 static void fill_buffer(const WideStringType& str,
227 size_t& current_wchar,
228 std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
229 size_t& utf8_bytes_index,
230 size_t& utf8_bytes_filled)
231 {
232 utf8_bytes_index = 0;
233
234 if (current_wchar == str.size())
235 {
236 utf8_bytes[0] = std::char_traits<char>::eof();
237 utf8_bytes_filled = 1;
238 }
239 else
240 {
241 // get the current character
242 const auto wc = static_cast<unsigned int>(str[current_wchar++]);
243
244 // UTF-16 to UTF-8 encoding
245 if (wc < 0x80)
246 {
247 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
248 utf8_bytes_filled = 1;
249 }
250 else if (wc <= 0x7FF)
251 {
252 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u)));
253 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
254 utf8_bytes_filled = 2;
255 }
256 else if (0xD800 > wc or wc >= 0xE000)
257 {
258 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u)));
259 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
260 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
261 utf8_bytes_filled = 3;
262 }
263 else
264 {
265 if (current_wchar < str.size())
266 {
267 const auto wc2 = static_cast<unsigned int>(str[current_wchar++]);
268 const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
269 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
270 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
271 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
272 utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
273 utf8_bytes_filled = 4;
274 }
275 else
276 {
277 // unknown character
278 ++current_wchar;
279 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
280 utf8_bytes_filled = 1;
281 }
282 }
283 }
284 }
285};
286
287template<typename WideStringType>
288class wide_string_input_adapter : public input_adapter_protocol
289{
290 public:
291 explicit wide_string_input_adapter(const WideStringType& w) noexcept
292 : str(w)
293 {}
294
295 std::char_traits<char>::int_type get_character() noexcept override
296 {
297 // check if buffer needs to be filled
298 if (utf8_bytes_index == utf8_bytes_filled)
299 {
300 fill_buffer<sizeof(typename WideStringType::value_type)>();
301
302 assert(utf8_bytes_filled > 0);
303 assert(utf8_bytes_index == 0);
304 }
305
306 // use buffer
307 assert(utf8_bytes_filled > 0);
308 assert(utf8_bytes_index < utf8_bytes_filled);
309 return utf8_bytes[utf8_bytes_index++];
310 }
311
312 private:
313 template<size_t T>
314 void fill_buffer()
315 {
316 wide_string_input_helper<WideStringType, T>::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
317 }
318
319 /// the wstring to process
320 const WideStringType& str;
321
322 /// index of the current wchar in str
323 std::size_t current_wchar = 0;
324
325 /// a buffer for UTF-8 bytes
326 std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
327
328 /// index to the utf8_codes array for the next valid byte
329 std::size_t utf8_bytes_index = 0;
330 /// number of valid bytes in the utf8_codes array
331 std::size_t utf8_bytes_filled = 0;
332};
333
334class input_adapter
335{
336 public:
337 // native support
338 JSON_HEDLEY_NON_NULL(2)
339 input_adapter(std::FILE* file)
340 : ia(std::make_shared<file_input_adapter>(file)) {}
341 /// input adapter for input stream
342 input_adapter(std::istream& i)
343 : ia(std::make_shared<input_stream_adapter>(i)) {}
344
345 /// input adapter for input stream
346 input_adapter(std::istream&& i)
347 : ia(std::make_shared<input_stream_adapter>(i)) {}
348
349 input_adapter(const std::wstring& ws)
350 : ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {}
351
352 input_adapter(const std::u16string& ws)
353 : ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {}
354
355 input_adapter(const std::u32string& ws)
356 : ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {}
357
358 /// input adapter for buffer
359 template<typename CharT,
360 typename std::enable_if<
361 std::is_pointer<CharT>::value and
362 std::is_integral<typename std::remove_pointer<CharT>::type>::value and
363 sizeof(typename std::remove_pointer<CharT>::type) == 1,
364 int>::type = 0>
365 input_adapter(CharT b, std::size_t l)
366 : ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {}
367
368 // derived support
369
370 /// input adapter for string literal
371 template<typename CharT,
372 typename std::enable_if<
373 std::is_pointer<CharT>::value and
374 std::is_integral<typename std::remove_pointer<CharT>::type>::value and
375 sizeof(typename std::remove_pointer<CharT>::type) == 1,
376 int>::type = 0>
377 input_adapter(CharT b)
378 : input_adapter(reinterpret_cast<const char*>(b),
379 std::strlen(reinterpret_cast<const char*>(b))) {}
380
381 /// input adapter for iterator range with contiguous storage
382 template<class IteratorType,
383 typename std::enable_if<
384 std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
385 int>::type = 0>
386 input_adapter(IteratorType first, IteratorType last)
387 {
388#ifndef NDEBUG
389 // assertion to check that the iterator range is indeed contiguous,
390 // see http://stackoverflow.com/a/35008842/266378 for more discussion
391 const auto is_contiguous = std::accumulate(
392 first, last, std::pair<bool, int>(true, 0),
393 [&first](std::pair<bool, int> res, decltype(*first) val)
394 {
395 res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
396 return res;
397 }).first;
398 assert(is_contiguous);
399#endif
400
401 // assertion to check that each element is 1 byte long
402 static_assert(
403 sizeof(typename iterator_traits<IteratorType>::value_type) == 1,
404 "each element in the iterator range must have the size of 1 byte");
405
406 const auto len = static_cast<size_t>(std::distance(first, last));
407 if (JSON_HEDLEY_LIKELY(len > 0))
408 {
409 // there is at least one element: use the address of first
410 ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len);
411 }
412 else
413 {
414 // the address of first cannot be used: use nullptr
415 ia = std::make_shared<input_buffer_adapter>(nullptr, len);
416 }
417 }
418
419 /// input adapter for array
420 template<class T, std::size_t N>
421 input_adapter(T (&array)[N])
422 : input_adapter(std::begin(array), std::end(array)) {}
423
424 /// input adapter for contiguous container
425 template<class ContiguousContainer, typename
426 std::enable_if<not std::is_pointer<ContiguousContainer>::value and
427 std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
428 int>::type = 0>
429 input_adapter(const ContiguousContainer& c)
430 : input_adapter(std::begin(c), std::end(c)) {}
431
432 operator input_adapter_t()
433 {
434 return ia;
435 }
436
437 private:
438 /// the actual adapter
439 input_adapter_t ia = nullptr;
440};
441} // namespace detail
442} // namespace nlohmann
443