1 | // wstring_convert implementation -*- C++ -*- |
2 | |
3 | // Copyright (C) 2015-2019 Free Software Foundation, Inc. |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free |
6 | // software; you can redistribute it and/or modify it under the |
7 | // terms of the GNU General Public License as published by the |
8 | // Free Software Foundation; either version 3, or (at your option) |
9 | // any later version. |
10 | |
11 | // This library is distributed in the hope that it will be useful, |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | // GNU General Public License for more details. |
15 | |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version |
18 | // 3.1, as published by the Free Software Foundation. |
19 | |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
23 | // <http://www.gnu.org/licenses/>. |
24 | |
25 | /** @file bits/locale_conv.h |
26 | * This is an internal header file, included by other library headers. |
27 | * Do not attempt to use it directly. @headername{locale} |
28 | */ |
29 | |
30 | #ifndef _LOCALE_CONV_H |
31 | #define _LOCALE_CONV_H 1 |
32 | |
33 | #if __cplusplus < 201103L |
34 | # include <bits/c++0x_warning.h> |
35 | #else |
36 | |
37 | #include <streambuf> |
38 | #include <bits/stringfwd.h> |
39 | #include <bits/allocator.h> |
40 | #include <bits/codecvt.h> |
41 | #include <bits/unique_ptr.h> |
42 | |
43 | namespace std _GLIBCXX_VISIBILITY(default) |
44 | { |
45 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |
46 | |
47 | /** |
48 | * @addtogroup locales |
49 | * @{ |
50 | */ |
51 | |
52 | template<typename _OutStr, typename _InChar, typename _Codecvt, |
53 | typename _State, typename _Fn> |
54 | bool |
55 | __do_str_codecvt(const _InChar* __first, const _InChar* __last, |
56 | _OutStr& __outstr, const _Codecvt& __cvt, _State& __state, |
57 | size_t& __count, _Fn __fn) |
58 | { |
59 | if (__first == __last) |
60 | { |
61 | __outstr.clear(); |
62 | __count = 0; |
63 | return true; |
64 | } |
65 | |
66 | size_t __outchars = 0; |
67 | auto __next = __first; |
68 | const auto __maxlen = __cvt.max_length() + 1; |
69 | |
70 | codecvt_base::result __result; |
71 | do |
72 | { |
73 | __outstr.resize(__outstr.size() + (__last - __next) * __maxlen); |
74 | auto __outnext = &__outstr.front() + __outchars; |
75 | auto const __outlast = &__outstr.back() + 1; |
76 | __result = (__cvt.*__fn)(__state, __next, __last, __next, |
77 | __outnext, __outlast, __outnext); |
78 | __outchars = __outnext - &__outstr.front(); |
79 | } |
80 | while (__result == codecvt_base::partial && __next != __last |
81 | && (__outstr.size() - __outchars) < __maxlen); |
82 | |
83 | if (__result == codecvt_base::error) |
84 | { |
85 | __count = __next - __first; |
86 | return false; |
87 | } |
88 | |
89 | if (__result == codecvt_base::noconv) |
90 | { |
91 | __outstr.assign(__first, __last); |
92 | __count = __last - __first; |
93 | } |
94 | else |
95 | { |
96 | __outstr.resize(__outchars); |
97 | __count = __next - __first; |
98 | } |
99 | |
100 | return true; |
101 | } |
102 | |
103 | // Convert narrow character string to wide. |
104 | template<typename _CharT, typename _Traits, typename _Alloc, typename _State> |
105 | inline bool |
106 | __str_codecvt_in(const char* __first, const char* __last, |
107 | basic_string<_CharT, _Traits, _Alloc>& __outstr, |
108 | const codecvt<_CharT, char, _State>& __cvt, |
109 | _State& __state, size_t& __count) |
110 | { |
111 | using _Codecvt = codecvt<_CharT, char, _State>; |
112 | using _ConvFn |
113 | = codecvt_base::result |
114 | (_Codecvt::*)(_State&, const char*, const char*, const char*&, |
115 | _CharT*, _CharT*, _CharT*&) const; |
116 | _ConvFn __fn = &codecvt<_CharT, char, _State>::in; |
117 | return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, |
118 | __count, __fn); |
119 | } |
120 | |
121 | template<typename _CharT, typename _Traits, typename _Alloc, typename _State> |
122 | inline bool |
123 | __str_codecvt_in(const char* __first, const char* __last, |
124 | basic_string<_CharT, _Traits, _Alloc>& __outstr, |
125 | const codecvt<_CharT, char, _State>& __cvt) |
126 | { |
127 | _State __state = {}; |
128 | size_t __n; |
129 | return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n); |
130 | } |
131 | |
132 | // Convert wide character string to narrow. |
133 | template<typename _CharT, typename _Traits, typename _Alloc, typename _State> |
134 | inline bool |
135 | __str_codecvt_out(const _CharT* __first, const _CharT* __last, |
136 | basic_string<char, _Traits, _Alloc>& __outstr, |
137 | const codecvt<_CharT, char, _State>& __cvt, |
138 | _State& __state, size_t& __count) |
139 | { |
140 | using _Codecvt = codecvt<_CharT, char, _State>; |
141 | using _ConvFn |
142 | = codecvt_base::result |
143 | (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, |
144 | char*, char*, char*&) const; |
145 | _ConvFn __fn = &codecvt<_CharT, char, _State>::out; |
146 | return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, |
147 | __count, __fn); |
148 | } |
149 | |
150 | template<typename _CharT, typename _Traits, typename _Alloc, typename _State> |
151 | inline bool |
152 | __str_codecvt_out(const _CharT* __first, const _CharT* __last, |
153 | basic_string<char, _Traits, _Alloc>& __outstr, |
154 | const codecvt<_CharT, char, _State>& __cvt) |
155 | { |
156 | _State __state = {}; |
157 | size_t __n; |
158 | return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); |
159 | } |
160 | |
161 | #ifdef _GLIBCXX_USE_CHAR8_T |
162 | |
163 | // Convert wide character string to narrow. |
164 | template<typename _CharT, typename _Traits, typename _Alloc, typename _State> |
165 | inline bool |
166 | __str_codecvt_out(const _CharT* __first, const _CharT* __last, |
167 | basic_string<char8_t, _Traits, _Alloc>& __outstr, |
168 | const codecvt<_CharT, char8_t, _State>& __cvt, |
169 | _State& __state, size_t& __count) |
170 | { |
171 | using _Codecvt = codecvt<_CharT, char8_t, _State>; |
172 | using _ConvFn |
173 | = codecvt_base::result |
174 | (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, |
175 | char8_t*, char8_t*, char8_t*&) const; |
176 | _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out; |
177 | return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, |
178 | __count, __fn); |
179 | } |
180 | |
181 | template<typename _CharT, typename _Traits, typename _Alloc, typename _State> |
182 | inline bool |
183 | __str_codecvt_out(const _CharT* __first, const _CharT* __last, |
184 | basic_string<char8_t, _Traits, _Alloc>& __outstr, |
185 | const codecvt<_CharT, char8_t, _State>& __cvt) |
186 | { |
187 | _State __state = {}; |
188 | size_t __n; |
189 | return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); |
190 | } |
191 | |
192 | #endif // _GLIBCXX_USE_CHAR8_T |
193 | |
194 | #ifdef _GLIBCXX_USE_WCHAR_T |
195 | |
196 | _GLIBCXX_BEGIN_NAMESPACE_CXX11 |
197 | |
198 | /// String conversions |
199 | template<typename _Codecvt, typename _Elem = wchar_t, |
200 | typename _Wide_alloc = allocator<_Elem>, |
201 | typename _Byte_alloc = allocator<char>> |
202 | class wstring_convert |
203 | { |
204 | public: |
205 | typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string; |
206 | typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string; |
207 | typedef typename _Codecvt::state_type state_type; |
208 | typedef typename wide_string::traits_type::int_type int_type; |
209 | |
210 | /// Default constructor. |
211 | wstring_convert() : _M_cvt(new _Codecvt()) { } |
212 | |
213 | /** Constructor. |
214 | * |
215 | * @param __pcvt The facet to use for conversions. |
216 | * |
217 | * Takes ownership of @p __pcvt and will delete it in the destructor. |
218 | */ |
219 | explicit |
220 | wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt) |
221 | { |
222 | if (!_M_cvt) |
223 | __throw_logic_error("wstring_convert" ); |
224 | } |
225 | |
226 | /** Construct with an initial converstion state. |
227 | * |
228 | * @param __pcvt The facet to use for conversions. |
229 | * @param __state Initial conversion state. |
230 | * |
231 | * Takes ownership of @p __pcvt and will delete it in the destructor. |
232 | * The object's conversion state will persist between conversions. |
233 | */ |
234 | wstring_convert(_Codecvt* __pcvt, state_type __state) |
235 | : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true) |
236 | { |
237 | if (!_M_cvt) |
238 | __throw_logic_error("wstring_convert" ); |
239 | } |
240 | |
241 | /** Construct with error strings. |
242 | * |
243 | * @param __byte_err A string to return on failed conversions. |
244 | * @param __wide_err A wide string to return on failed conversions. |
245 | */ |
246 | explicit |
247 | wstring_convert(const byte_string& __byte_err, |
248 | const wide_string& __wide_err = wide_string()) |
249 | : _M_cvt(new _Codecvt), |
250 | _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err), |
251 | _M_with_strings(true) |
252 | { |
253 | if (!_M_cvt) |
254 | __throw_logic_error("wstring_convert" ); |
255 | } |
256 | |
257 | ~wstring_convert() = default; |
258 | |
259 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
260 | // 2176. Special members for wstring_convert and wbuffer_convert |
261 | wstring_convert(const wstring_convert&) = delete; |
262 | wstring_convert& operator=(const wstring_convert&) = delete; |
263 | |
264 | /// @{ Convert from bytes. |
265 | wide_string |
266 | from_bytes(char __byte) |
267 | { |
268 | char __bytes[2] = { __byte }; |
269 | return from_bytes(__bytes, __bytes+1); |
270 | } |
271 | |
272 | wide_string |
273 | from_bytes(const char* __ptr) |
274 | { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); } |
275 | |
276 | wide_string |
277 | from_bytes(const byte_string& __str) |
278 | { |
279 | auto __ptr = __str.data(); |
280 | return from_bytes(__ptr, __ptr + __str.size()); |
281 | } |
282 | |
283 | wide_string |
284 | from_bytes(const char* __first, const char* __last) |
285 | { |
286 | if (!_M_with_cvtstate) |
287 | _M_state = state_type(); |
288 | wide_string __out{ _M_wide_err_string.get_allocator() }; |
289 | if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state, |
290 | _M_count)) |
291 | return __out; |
292 | if (_M_with_strings) |
293 | return _M_wide_err_string; |
294 | __throw_range_error("wstring_convert::from_bytes" ); |
295 | } |
296 | /// @} |
297 | |
298 | /// @{ Convert to bytes. |
299 | byte_string |
300 | to_bytes(_Elem __wchar) |
301 | { |
302 | _Elem __wchars[2] = { __wchar }; |
303 | return to_bytes(__wchars, __wchars+1); |
304 | } |
305 | |
306 | byte_string |
307 | to_bytes(const _Elem* __ptr) |
308 | { |
309 | return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr)); |
310 | } |
311 | |
312 | byte_string |
313 | to_bytes(const wide_string& __wstr) |
314 | { |
315 | auto __ptr = __wstr.data(); |
316 | return to_bytes(__ptr, __ptr + __wstr.size()); |
317 | } |
318 | |
319 | byte_string |
320 | to_bytes(const _Elem* __first, const _Elem* __last) |
321 | { |
322 | if (!_M_with_cvtstate) |
323 | _M_state = state_type(); |
324 | byte_string __out{ _M_byte_err_string.get_allocator() }; |
325 | if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state, |
326 | _M_count)) |
327 | return __out; |
328 | if (_M_with_strings) |
329 | return _M_byte_err_string; |
330 | __throw_range_error("wstring_convert::to_bytes" ); |
331 | } |
332 | /// @} |
333 | |
334 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
335 | // 2174. wstring_convert::converted() should be noexcept |
336 | /// The number of elements successfully converted in the last conversion. |
337 | size_t converted() const noexcept { return _M_count; } |
338 | |
339 | /// The final conversion state of the last conversion. |
340 | state_type state() const { return _M_state; } |
341 | |
342 | private: |
343 | unique_ptr<_Codecvt> _M_cvt; |
344 | byte_string _M_byte_err_string; |
345 | wide_string _M_wide_err_string; |
346 | state_type _M_state = state_type(); |
347 | size_t _M_count = 0; |
348 | bool _M_with_cvtstate = false; |
349 | bool _M_with_strings = false; |
350 | }; |
351 | |
352 | _GLIBCXX_END_NAMESPACE_CXX11 |
353 | |
354 | /// Buffer conversions |
355 | template<typename _Codecvt, typename _Elem = wchar_t, |
356 | typename _Tr = char_traits<_Elem>> |
357 | class wbuffer_convert : public basic_streambuf<_Elem, _Tr> |
358 | { |
359 | typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf; |
360 | |
361 | public: |
362 | typedef typename _Codecvt::state_type state_type; |
363 | |
364 | /// Default constructor. |
365 | wbuffer_convert() : wbuffer_convert(nullptr) { } |
366 | |
367 | /** Constructor. |
368 | * |
369 | * @param __bytebuf The underlying byte stream buffer. |
370 | * @param __pcvt The facet to use for conversions. |
371 | * @param __state Initial conversion state. |
372 | * |
373 | * Takes ownership of @p __pcvt and will delete it in the destructor. |
374 | */ |
375 | explicit |
376 | wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt, |
377 | state_type __state = state_type()) |
378 | : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state) |
379 | { |
380 | if (!_M_cvt) |
381 | __throw_logic_error("wbuffer_convert" ); |
382 | |
383 | _M_always_noconv = _M_cvt->always_noconv(); |
384 | |
385 | if (_M_buf) |
386 | { |
387 | this->setp(_M_put_area, _M_put_area + _S_buffer_length); |
388 | this->setg(_M_get_area + _S_putback_length, |
389 | _M_get_area + _S_putback_length, |
390 | _M_get_area + _S_putback_length); |
391 | } |
392 | } |
393 | |
394 | ~wbuffer_convert() = default; |
395 | |
396 | // _GLIBCXX_RESOLVE_LIB_DEFECTS |
397 | // 2176. Special members for wstring_convert and wbuffer_convert |
398 | wbuffer_convert(const wbuffer_convert&) = delete; |
399 | wbuffer_convert& operator=(const wbuffer_convert&) = delete; |
400 | |
401 | streambuf* rdbuf() const noexcept { return _M_buf; } |
402 | |
403 | streambuf* |
404 | rdbuf(streambuf *__bytebuf) noexcept |
405 | { |
406 | auto __prev = _M_buf; |
407 | _M_buf = __bytebuf; |
408 | return __prev; |
409 | } |
410 | |
411 | /// The conversion state following the last conversion. |
412 | state_type state() const noexcept { return _M_state; } |
413 | |
414 | protected: |
415 | int |
416 | sync() |
417 | { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; } |
418 | |
419 | typename _Wide_streambuf::int_type |
420 | overflow(typename _Wide_streambuf::int_type __out) |
421 | { |
422 | if (!_M_buf || !_M_conv_put()) |
423 | return _Tr::eof(); |
424 | else if (!_Tr::eq_int_type(__out, _Tr::eof())) |
425 | return this->sputc(__out); |
426 | return _Tr::not_eof(__out); |
427 | } |
428 | |
429 | typename _Wide_streambuf::int_type |
430 | underflow() |
431 | { |
432 | if (!_M_buf) |
433 | return _Tr::eof(); |
434 | |
435 | if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get())) |
436 | return _Tr::to_int_type(*this->gptr()); |
437 | else |
438 | return _Tr::eof(); |
439 | } |
440 | |
441 | streamsize |
442 | xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n) |
443 | { |
444 | if (!_M_buf || __n == 0) |
445 | return 0; |
446 | streamsize __done = 0; |
447 | do |
448 | { |
449 | auto __nn = std::min<streamsize>(this->epptr() - this->pptr(), |
450 | __n - __done); |
451 | _Tr::copy(this->pptr(), __s + __done, __nn); |
452 | this->pbump(__nn); |
453 | __done += __nn; |
454 | } while (__done < __n && _M_conv_put()); |
455 | return __done; |
456 | } |
457 | |
458 | private: |
459 | // fill the get area from converted contents of the byte stream buffer |
460 | bool |
461 | _M_conv_get() |
462 | { |
463 | const streamsize __pb1 = this->gptr() - this->eback(); |
464 | const streamsize __pb2 = _S_putback_length; |
465 | const streamsize __npb = std::min(__pb1, __pb2); |
466 | |
467 | _Tr::move(_M_get_area + _S_putback_length - __npb, |
468 | this->gptr() - __npb, __npb); |
469 | |
470 | streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv; |
471 | __nbytes = std::min(__nbytes, _M_buf->in_avail()); |
472 | if (__nbytes < 1) |
473 | __nbytes = 1; |
474 | __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes); |
475 | if (__nbytes < 1) |
476 | return false; |
477 | __nbytes += _M_unconv; |
478 | |
479 | // convert _M_get_buf into _M_get_area |
480 | |
481 | _Elem* __outbuf = _M_get_area + _S_putback_length; |
482 | _Elem* __outnext = __outbuf; |
483 | const char* __bnext = _M_get_buf; |
484 | |
485 | codecvt_base::result __result; |
486 | if (_M_always_noconv) |
487 | __result = codecvt_base::noconv; |
488 | else |
489 | { |
490 | _Elem* __outend = _M_get_area + _S_buffer_length; |
491 | |
492 | __result = _M_cvt->in(_M_state, |
493 | __bnext, __bnext + __nbytes, __bnext, |
494 | __outbuf, __outend, __outnext); |
495 | } |
496 | |
497 | if (__result == codecvt_base::noconv) |
498 | { |
499 | // cast is safe because noconv means _Elem is same type as char |
500 | auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf); |
501 | _Tr::copy(__outbuf, __get_buf, __nbytes); |
502 | _M_unconv = 0; |
503 | return true; |
504 | } |
505 | |
506 | if ((_M_unconv = _M_get_buf + __nbytes - __bnext)) |
507 | char_traits<char>::move(_M_get_buf, __bnext, _M_unconv); |
508 | |
509 | this->setg(__outbuf, __outbuf, __outnext); |
510 | |
511 | return __result != codecvt_base::error; |
512 | } |
513 | |
514 | // unused |
515 | bool |
516 | _M_put(...) |
517 | { return false; } |
518 | |
519 | bool |
520 | _M_put(const char* __p, streamsize __n) |
521 | { |
522 | if (_M_buf->sputn(__p, __n) < __n) |
523 | return false; |
524 | return true; |
525 | } |
526 | |
527 | // convert the put area and write to the byte stream buffer |
528 | bool |
529 | _M_conv_put() |
530 | { |
531 | _Elem* const __first = this->pbase(); |
532 | const _Elem* const __last = this->pptr(); |
533 | const streamsize __pending = __last - __first; |
534 | |
535 | if (_M_always_noconv) |
536 | return _M_put(__first, __pending); |
537 | |
538 | char __outbuf[2 * _S_buffer_length]; |
539 | |
540 | const _Elem* __next = __first; |
541 | const _Elem* __start; |
542 | do |
543 | { |
544 | __start = __next; |
545 | char* __outnext = __outbuf; |
546 | char* const __outlast = __outbuf + sizeof(__outbuf); |
547 | auto __result = _M_cvt->out(_M_state, __next, __last, __next, |
548 | __outnext, __outlast, __outnext); |
549 | if (__result == codecvt_base::error) |
550 | return false; |
551 | else if (__result == codecvt_base::noconv) |
552 | return _M_put(__next, __pending); |
553 | |
554 | if (!_M_put(__outbuf, __outnext - __outbuf)) |
555 | return false; |
556 | } |
557 | while (__next != __last && __next != __start); |
558 | |
559 | if (__next != __last) |
560 | _Tr::move(__first, __next, __last - __next); |
561 | |
562 | this->pbump(__first - __next); |
563 | return __next != __first; |
564 | } |
565 | |
566 | streambuf* _M_buf; |
567 | unique_ptr<_Codecvt> _M_cvt; |
568 | state_type _M_state; |
569 | |
570 | static const streamsize _S_buffer_length = 32; |
571 | static const streamsize _S_putback_length = 3; |
572 | _Elem _M_put_area[_S_buffer_length]; |
573 | _Elem _M_get_area[_S_buffer_length]; |
574 | streamsize _M_unconv = 0; |
575 | char _M_get_buf[_S_buffer_length-_S_putback_length]; |
576 | bool _M_always_noconv; |
577 | }; |
578 | |
579 | #endif // _GLIBCXX_USE_WCHAR_T |
580 | |
581 | /// @} group locales |
582 | |
583 | _GLIBCXX_END_NAMESPACE_VERSION |
584 | } // namespace |
585 | |
586 | #endif // __cplusplus |
587 | |
588 | #endif /* _LOCALE_CONV_H */ |
589 | |