1 | // wstring_convert implementation -*- C++ -*- |
2 | |
3 | // Copyright (C) 2015-2018 Free Software Foundation, Inc. |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free |
6 | // software; you can redistribute it and/or modify it under the |
7 | // terms of the GNU General Public License as published by the |
8 | // Free Software Foundation; either version 3, or (at your option) |
9 | // any later version. |
10 | |
11 | // This library is distributed in the hope that it will be useful, |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | // GNU General Public License for more details. |
15 | |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version |
18 | // 3.1, as published by the Free Software Foundation. |
19 | |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
23 | // <http://www.gnu.org/licenses/>. |
24 | |
25 | /** @file bits/locale_conv.h |
26 | * This is an internal header file, included by other library headers. |
27 | * Do not attempt to use it directly. @headername{locale} |
28 | */ |
29 | |
30 | #ifndef _LOCALE_CONV_H |
31 | #define _LOCALE_CONV_H 1 |
32 | |
33 | #if __cplusplus < 201103L |
34 | # include <bits/c++0x_warning.h> |
35 | #else |
36 | |
37 | #include <streambuf> |
38 | #include "stringfwd.h" |
39 | #include "allocator.h" |
40 | #include "codecvt.h" |
41 | #include "unique_ptr.h" |
42 | |
43 | namespace std _GLIBCXX_VISIBILITY(default) |
44 | { |
45 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |
46 | |
47 | /** |
48 | * @addtogroup locales |
49 | * @{ |
50 | */ |
51 | |
52 | template<typename _OutStr, typename _InChar, typename _Codecvt, |
53 | typename _State, typename _Fn> |
54 | bool |
55 | __do_str_codecvt(const _InChar* __first, const _InChar* __last, |
56 | _OutStr& __outstr, const _Codecvt& __cvt, _State& __state, |
57 | size_t& __count, _Fn __fn) |
58 | { |
59 | if (__first == __last) |
60 | { |
61 | __outstr.clear(); |
62 | __count = 0; |
63 | return true; |
64 | } |
65 | |
66 | size_t __outchars = 0; |
67 | auto __next = __first; |
68 | const auto __maxlen = __cvt.max_length() + 1; |
69 | |
70 | codecvt_base::result __result; |
71 | do |
72 | { |
73 | __outstr.resize(__outstr.size() + (__last - __next) * __maxlen); |
74 | auto __outnext = &__outstr.front() + __outchars; |
75 | auto const __outlast = &__outstr.back() + 1; |
76 | __result = (__cvt.*__fn)(__state, __next, __last, __next, |
77 | __outnext, __outlast, __outnext); |
78 | __outchars = __outnext - &__outstr.front(); |
79 | } |
80 | while (__result == codecvt_base::partial && __next != __last |
81 | && (__outstr.size() - __outchars) < __maxlen); |
82 | |
83 | if (__result == codecvt_base::error) |
84 | { |
85 | __count = __next - __first; |
86 | return false; |
87 | } |
88 | |
89 | if (__result == codecvt_base::noconv) |
90 | { |
91 | __outstr.assign(__first, __last); |
92 | __count = __last - __first; |
93 | } |
94 | else |
95 | { |
96 | __outstr.resize(__outchars); |
97 | __count = __next - __first; |
98 | } |
99 | |
100 | return true; |
101 | } |
102 | |
103 | // Convert narrow character string to wide. |
104 | template<typename _CharT, typename _Traits, typename _Alloc, typename _State> |
105 | inline bool |
106 | __str_codecvt_in(const char* __first, const char* __last, |
107 | basic_string<_CharT, _Traits, _Alloc>& __outstr, |
108 | const codecvt<_CharT, char, _State>& __cvt, |
109 | _State& __state, size_t& __count) |
110 | { |
111 | using _Codecvt = codecvt<_CharT, char, _State>; |
112 | using _ConvFn |
113 | = codecvt_base::result |
114 | (_Codecvt::*)(_State&, const char*, const char*, const char*&, |
115 | _CharT*, _CharT*, _CharT*&) const; |
116 | _ConvFn __fn = &codecvt<_CharT, char, _State>::in; |
117 | return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, |
118 | __count, __fn); |
119 | } |
120 | |
121 | template<typename _CharT, typename _Traits, typename _Alloc, typename _State> |
122 | inline bool |
123 | __str_codecvt_in(const char* __first, const char* __last, |
124 | basic_string<_CharT, _Traits, _Alloc>& __outstr, |
125 | const codecvt<_CharT, char, _State>& __cvt) |
126 | { |
127 | _State __state = {}; |
128 | size_t __n; |
129 | return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n); |
130 | } |
131 | |
132 | // Convert wide character string to narrow. |
133 | template<typename _CharT, typename _Traits, typename _Alloc, typename _State> |
134 | inline bool |
135 | __str_codecvt_out(const _CharT* __first, const _CharT* __last, |
136 | basic_string<char, _Traits, _Alloc>& __outstr, |
137 | const codecvt<_CharT, char, _State>& __cvt, |
138 | _State& __state, size_t& __count) |
139 | { |
140 | using _Codecvt = codecvt<_CharT, char, _State>; |
141 | using _ConvFn |
142 | = codecvt_base::result |
143 | (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, |
144 | char*, char*, char*&) const; |
145 | _ConvFn __fn = &codecvt<_CharT, char, _State>::out; |
146 | return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, |
147 | __count, __fn); |
148 | } |
149 | |
150 | template<typename _CharT, typename _Traits, typename _Alloc, typename _State> |
151 | inline bool |
152 | __str_codecvt_out(const _CharT* __first, const _CharT* __last, |
153 | basic_string<char, _Traits, _Alloc>& __outstr, |
154 | const codecvt<_CharT, char, _State>& __cvt) |
155 | { |
156 | _State __state = {}; |
157 | size_t __n; |
158 | return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); |
159 | } |
160 | |
161 | #ifdef _GLIBCXX_USE_WCHAR_T |
162 | |
163 | _GLIBCXX_BEGIN_NAMESPACE_CXX11 |
164 | |
165 | /// String conversions |
166 | template<typename _Codecvt, typename _Elem = wchar_t, |
167 | typename _Wide_alloc = allocator<_Elem>, |
168 | typename _Byte_alloc = allocator<char>> |
169 | class wstring_convert |
170 | { |
171 | public: |
172 | typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string; |
173 | typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string; |
174 | typedef typename _Codecvt::state_type state_type; |
175 | typedef typename wide_string::traits_type::int_type int_type; |
176 | |
177 | /** Default constructor. |
178 | * |
179 | * @param __pcvt The facet to use for conversions. |
180 | * |
181 | * Takes ownership of @p __pcvt and will delete it in the destructor. |
182 | */ |
183 | explicit |
184 | wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt) |
185 | { |
186 | if (!_M_cvt) |
187 | __throw_logic_error("wstring_convert" ); |
188 | } |
189 | |
190 | /** Construct with an initial converstion state. |
191 | * |
192 | * @param __pcvt The facet to use for conversions. |
193 | * @param __state Initial conversion state. |
194 | * |
195 | * Takes ownership of @p __pcvt and will delete it in the destructor. |
196 | * The object's conversion state will persist between conversions. |
197 | */ |
198 | wstring_convert(_Codecvt* __pcvt, state_type __state) |
199 | : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true) |
200 | { |
201 | if (!_M_cvt) |
202 | __throw_logic_error("wstring_convert" ); |
203 | } |
204 | |
205 | /** Construct with error strings. |
206 | * |
207 | * @param __byte_err A string to return on failed conversions. |
208 | * @param __wide_err A wide string to return on failed conversions. |
209 | */ |
210 | explicit |
211 | wstring_convert(const byte_string& __byte_err, |
212 | const wide_string& __wide_err = wide_string()) |
213 | : _M_cvt(new _Codecvt), |
214 | _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err), |
215 | _M_with_strings(true) |
216 | { |
217 | if (!_M_cvt) |
218 | __throw_logic_error("wstring_convert" ); |
219 | } |
220 | |
221 | ~wstring_convert() = default; |
222 | |
223 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
224 | // 2176. Special members for wstring_convert and wbuffer_convert |
225 | wstring_convert(const wstring_convert&) = delete; |
226 | wstring_convert& operator=(const wstring_convert&) = delete; |
227 | |
228 | /// @{ Convert from bytes. |
229 | wide_string |
230 | from_bytes(char __byte) |
231 | { |
232 | char __bytes[2] = { __byte }; |
233 | return from_bytes(__bytes, __bytes+1); |
234 | } |
235 | |
236 | wide_string |
237 | from_bytes(const char* __ptr) |
238 | { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); } |
239 | |
240 | wide_string |
241 | from_bytes(const byte_string& __str) |
242 | { |
243 | auto __ptr = __str.data(); |
244 | return from_bytes(__ptr, __ptr + __str.size()); |
245 | } |
246 | |
247 | wide_string |
248 | from_bytes(const char* __first, const char* __last) |
249 | { |
250 | if (!_M_with_cvtstate) |
251 | _M_state = state_type(); |
252 | wide_string __out{ _M_wide_err_string.get_allocator() }; |
253 | if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state, |
254 | _M_count)) |
255 | return __out; |
256 | if (_M_with_strings) |
257 | return _M_wide_err_string; |
258 | __throw_range_error("wstring_convert::from_bytes" ); |
259 | } |
260 | /// @} |
261 | |
262 | /// @{ Convert to bytes. |
263 | byte_string |
264 | to_bytes(_Elem __wchar) |
265 | { |
266 | _Elem __wchars[2] = { __wchar }; |
267 | return to_bytes(__wchars, __wchars+1); |
268 | } |
269 | |
270 | byte_string |
271 | to_bytes(const _Elem* __ptr) |
272 | { |
273 | return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr)); |
274 | } |
275 | |
276 | byte_string |
277 | to_bytes(const wide_string& __wstr) |
278 | { |
279 | auto __ptr = __wstr.data(); |
280 | return to_bytes(__ptr, __ptr + __wstr.size()); |
281 | } |
282 | |
283 | byte_string |
284 | to_bytes(const _Elem* __first, const _Elem* __last) |
285 | { |
286 | if (!_M_with_cvtstate) |
287 | _M_state = state_type(); |
288 | byte_string __out{ _M_byte_err_string.get_allocator() }; |
289 | if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state, |
290 | _M_count)) |
291 | return __out; |
292 | if (_M_with_strings) |
293 | return _M_byte_err_string; |
294 | __throw_range_error("wstring_convert::to_bytes" ); |
295 | } |
296 | /// @} |
297 | |
298 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
299 | // 2174. wstring_convert::converted() should be noexcept |
300 | /// The number of elements successfully converted in the last conversion. |
301 | size_t converted() const noexcept { return _M_count; } |
302 | |
303 | /// The final conversion state of the last conversion. |
304 | state_type state() const { return _M_state; } |
305 | |
306 | private: |
307 | unique_ptr<_Codecvt> _M_cvt; |
308 | byte_string _M_byte_err_string; |
309 | wide_string _M_wide_err_string; |
310 | state_type _M_state = state_type(); |
311 | size_t _M_count = 0; |
312 | bool _M_with_cvtstate = false; |
313 | bool _M_with_strings = false; |
314 | }; |
315 | |
316 | _GLIBCXX_END_NAMESPACE_CXX11 |
317 | |
318 | /// Buffer conversions |
319 | template<typename _Codecvt, typename _Elem = wchar_t, |
320 | typename _Tr = char_traits<_Elem>> |
321 | class wbuffer_convert : public basic_streambuf<_Elem, _Tr> |
322 | { |
323 | typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf; |
324 | |
325 | public: |
326 | typedef typename _Codecvt::state_type state_type; |
327 | |
328 | /** Default constructor. |
329 | * |
330 | * @param __bytebuf The underlying byte stream buffer. |
331 | * @param __pcvt The facet to use for conversions. |
332 | * @param __state Initial conversion state. |
333 | * |
334 | * Takes ownership of @p __pcvt and will delete it in the destructor. |
335 | */ |
336 | explicit |
337 | wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt, |
338 | state_type __state = state_type()) |
339 | : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state) |
340 | { |
341 | if (!_M_cvt) |
342 | __throw_logic_error("wbuffer_convert" ); |
343 | |
344 | _M_always_noconv = _M_cvt->always_noconv(); |
345 | |
346 | if (_M_buf) |
347 | { |
348 | this->setp(_M_put_area, _M_put_area + _S_buffer_length); |
349 | this->setg(_M_get_area + _S_putback_length, |
350 | _M_get_area + _S_putback_length, |
351 | _M_get_area + _S_putback_length); |
352 | } |
353 | } |
354 | |
355 | ~wbuffer_convert() = default; |
356 | |
357 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
358 | // 2176. Special members for wstring_convert and wbuffer_convert |
359 | wbuffer_convert(const wbuffer_convert&) = delete; |
360 | wbuffer_convert& operator=(const wbuffer_convert&) = delete; |
361 | |
362 | streambuf* rdbuf() const noexcept { return _M_buf; } |
363 | |
364 | streambuf* |
365 | rdbuf(streambuf *__bytebuf) noexcept |
366 | { |
367 | auto __prev = _M_buf; |
368 | _M_buf = __bytebuf; |
369 | return __prev; |
370 | } |
371 | |
372 | /// The conversion state following the last conversion. |
373 | state_type state() const noexcept { return _M_state; } |
374 | |
375 | protected: |
376 | int |
377 | sync() |
378 | { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; } |
379 | |
380 | typename _Wide_streambuf::int_type |
381 | overflow(typename _Wide_streambuf::int_type __out) |
382 | { |
383 | if (!_M_buf || !_M_conv_put()) |
384 | return _Tr::eof(); |
385 | else if (!_Tr::eq_int_type(__out, _Tr::eof())) |
386 | return this->sputc(__out); |
387 | return _Tr::not_eof(__out); |
388 | } |
389 | |
390 | typename _Wide_streambuf::int_type |
391 | underflow() |
392 | { |
393 | if (!_M_buf) |
394 | return _Tr::eof(); |
395 | |
396 | if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get())) |
397 | return _Tr::to_int_type(*this->gptr()); |
398 | else |
399 | return _Tr::eof(); |
400 | } |
401 | |
402 | streamsize |
403 | xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n) |
404 | { |
405 | if (!_M_buf || __n == 0) |
406 | return 0; |
407 | streamsize __done = 0; |
408 | do |
409 | { |
410 | auto __nn = std::min<streamsize>(this->epptr() - this->pptr(), |
411 | __n - __done); |
412 | _Tr::copy(this->pptr(), __s + __done, __nn); |
413 | this->pbump(__nn); |
414 | __done += __nn; |
415 | } while (__done < __n && _M_conv_put()); |
416 | return __done; |
417 | } |
418 | |
419 | private: |
420 | // fill the get area from converted contents of the byte stream buffer |
421 | bool |
422 | _M_conv_get() |
423 | { |
424 | const streamsize __pb1 = this->gptr() - this->eback(); |
425 | const streamsize __pb2 = _S_putback_length; |
426 | const streamsize __npb = std::min(__pb1, __pb2); |
427 | |
428 | _Tr::move(_M_get_area + _S_putback_length - __npb, |
429 | this->gptr() - __npb, __npb); |
430 | |
431 | streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv; |
432 | __nbytes = std::min(__nbytes, _M_buf->in_avail()); |
433 | if (__nbytes < 1) |
434 | __nbytes = 1; |
435 | __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes); |
436 | if (__nbytes < 1) |
437 | return false; |
438 | __nbytes += _M_unconv; |
439 | |
440 | // convert _M_get_buf into _M_get_area |
441 | |
442 | _Elem* __outbuf = _M_get_area + _S_putback_length; |
443 | _Elem* __outnext = __outbuf; |
444 | const char* __bnext = _M_get_buf; |
445 | |
446 | codecvt_base::result __result; |
447 | if (_M_always_noconv) |
448 | __result = codecvt_base::noconv; |
449 | else |
450 | { |
451 | _Elem* __outend = _M_get_area + _S_buffer_length; |
452 | |
453 | __result = _M_cvt->in(_M_state, |
454 | __bnext, __bnext + __nbytes, __bnext, |
455 | __outbuf, __outend, __outnext); |
456 | } |
457 | |
458 | if (__result == codecvt_base::noconv) |
459 | { |
460 | // cast is safe because noconv means _Elem is same type as char |
461 | auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf); |
462 | _Tr::copy(__outbuf, __get_buf, __nbytes); |
463 | _M_unconv = 0; |
464 | return true; |
465 | } |
466 | |
467 | if ((_M_unconv = _M_get_buf + __nbytes - __bnext)) |
468 | char_traits<char>::move(_M_get_buf, __bnext, _M_unconv); |
469 | |
470 | this->setg(__outbuf, __outbuf, __outnext); |
471 | |
472 | return __result != codecvt_base::error; |
473 | } |
474 | |
475 | // unused |
476 | bool |
477 | _M_put(...) |
478 | { return false; } |
479 | |
480 | bool |
481 | _M_put(const char* __p, streamsize __n) |
482 | { |
483 | if (_M_buf->sputn(__p, __n) < __n) |
484 | return false; |
485 | return true; |
486 | } |
487 | |
488 | // convert the put area and write to the byte stream buffer |
489 | bool |
490 | _M_conv_put() |
491 | { |
492 | _Elem* const __first = this->pbase(); |
493 | const _Elem* const __last = this->pptr(); |
494 | const streamsize __pending = __last - __first; |
495 | |
496 | if (_M_always_noconv) |
497 | return _M_put(__first, __pending); |
498 | |
499 | char __outbuf[2 * _S_buffer_length]; |
500 | |
501 | const _Elem* __next = __first; |
502 | const _Elem* __start; |
503 | do |
504 | { |
505 | __start = __next; |
506 | char* __outnext = __outbuf; |
507 | char* const __outlast = __outbuf + sizeof(__outbuf); |
508 | auto __result = _M_cvt->out(_M_state, __next, __last, __next, |
509 | __outnext, __outlast, __outnext); |
510 | if (__result == codecvt_base::error) |
511 | return false; |
512 | else if (__result == codecvt_base::noconv) |
513 | return _M_put(__next, __pending); |
514 | |
515 | if (!_M_put(__outbuf, __outnext - __outbuf)) |
516 | return false; |
517 | } |
518 | while (__next != __last && __next != __start); |
519 | |
520 | if (__next != __last) |
521 | _Tr::move(__first, __next, __last - __next); |
522 | |
523 | this->pbump(__first - __next); |
524 | return __next != __first; |
525 | } |
526 | |
527 | streambuf* _M_buf; |
528 | unique_ptr<_Codecvt> _M_cvt; |
529 | state_type _M_state; |
530 | |
531 | static const streamsize _S_buffer_length = 32; |
532 | static const streamsize _S_putback_length = 3; |
533 | _Elem _M_put_area[_S_buffer_length]; |
534 | _Elem _M_get_area[_S_buffer_length]; |
535 | streamsize _M_unconv = 0; |
536 | char _M_get_buf[_S_buffer_length-_S_putback_length]; |
537 | bool _M_always_noconv; |
538 | }; |
539 | |
540 | #endif // _GLIBCXX_USE_WCHAR_T |
541 | |
542 | /// @} group locales |
543 | |
544 | _GLIBCXX_END_NAMESPACE_VERSION |
545 | } // namespace |
546 | |
547 | #endif // __cplusplus |
548 | |
549 | #endif /* _LOCALE_CONV_H */ |
550 | |