1// wstring_convert implementation -*- C++ -*-
2
3// Copyright (C) 2015-2021 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file bits/locale_conv.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{locale}
28 */
29
30#ifndef _LOCALE_CONV_H
31#define _LOCALE_CONV_H 1
32
33#if __cplusplus < 201103L
34# include <bits/c++0x_warning.h>
35#else
36
37#include <streambuf>
38#include <bits/stringfwd.h>
39#include <bits/allocator.h>
40#include <bits/codecvt.h>
41#include <bits/unique_ptr.h>
42
43namespace std _GLIBCXX_VISIBILITY(default)
44{
45_GLIBCXX_BEGIN_NAMESPACE_VERSION
46
47 /**
48 * @addtogroup locales
49 * @{
50 */
51
52 template<typename _OutStr, typename _InChar, typename _Codecvt,
53 typename _State, typename _Fn>
54 bool
55 __do_str_codecvt(const _InChar* __first, const _InChar* __last,
56 _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
57 size_t& __count, _Fn __fn)
58 {
59 if (__first == __last)
60 {
61 __outstr.clear();
62 __count = 0;
63 return true;
64 }
65
66 size_t __outchars = 0;
67 auto __next = __first;
68 const auto __maxlen = __cvt.max_length() + 1;
69
70 codecvt_base::result __result;
71 do
72 {
73 __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
74 auto __outnext = &__outstr.front() + __outchars;
75 auto const __outlast = &__outstr.back() + 1;
76 __result = (__cvt.*__fn)(__state, __next, __last, __next,
77 __outnext, __outlast, __outnext);
78 __outchars = __outnext - &__outstr.front();
79 }
80 while (__result == codecvt_base::partial && __next != __last
81 && ptrdiff_t(__outstr.size() - __outchars) < __maxlen);
82
83 if (__result == codecvt_base::error)
84 {
85 __count = __next - __first;
86 return false;
87 }
88
89 // The codecvt facet will only return noconv when the types are
90 // the same, so avoid instantiating basic_string::assign otherwise
91 if _GLIBCXX17_CONSTEXPR (is_same<typename _Codecvt::intern_type,
92 typename _Codecvt::extern_type>())
93 if (__result == codecvt_base::noconv)
94 {
95 __outstr.assign(__first, __last);
96 __count = __last - __first;
97 return true;
98 }
99
100 __outstr.resize(__outchars);
101 __count = __next - __first;
102 return true;
103 }
104
105 // Convert narrow character string to wide.
106 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
107 inline bool
108 __str_codecvt_in(const char* __first, const char* __last,
109 basic_string<_CharT, _Traits, _Alloc>& __outstr,
110 const codecvt<_CharT, char, _State>& __cvt,
111 _State& __state, size_t& __count)
112 {
113 using _Codecvt = codecvt<_CharT, char, _State>;
114 using _ConvFn
115 = codecvt_base::result
116 (_Codecvt::*)(_State&, const char*, const char*, const char*&,
117 _CharT*, _CharT*, _CharT*&) const;
118 _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
119 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
120 __count, __fn);
121 }
122
123 // As above, but with no __count parameter
124 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
125 inline bool
126 __str_codecvt_in(const char* __first, const char* __last,
127 basic_string<_CharT, _Traits, _Alloc>& __outstr,
128 const codecvt<_CharT, char, _State>& __cvt)
129 {
130 _State __state = {};
131 size_t __n;
132 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
133 }
134
135 // As above, but returns false for partial conversion
136 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
137 inline bool
138 __str_codecvt_in_all(const char* __first, const char* __last,
139 basic_string<_CharT, _Traits, _Alloc>& __outstr,
140 const codecvt<_CharT, char, _State>& __cvt)
141 {
142 _State __state = {};
143 size_t __n;
144 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n)
145 && (__n == size_t(__last - __first));
146 }
147
148 // Convert wide character string to narrow.
149 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
150 inline bool
151 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
152 basic_string<char, _Traits, _Alloc>& __outstr,
153 const codecvt<_CharT, char, _State>& __cvt,
154 _State& __state, size_t& __count)
155 {
156 using _Codecvt = codecvt<_CharT, char, _State>;
157 using _ConvFn
158 = codecvt_base::result
159 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
160 char*, char*, char*&) const;
161 _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
162 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
163 __count, __fn);
164 }
165
166 // As above, but with no __count parameter
167 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
168 inline bool
169 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
170 basic_string<char, _Traits, _Alloc>& __outstr,
171 const codecvt<_CharT, char, _State>& __cvt)
172 {
173 _State __state = {};
174 size_t __n;
175 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
176 }
177
178 // As above, but returns false for partial conversions
179 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
180 inline bool
181 __str_codecvt_out_all(const _CharT* __first, const _CharT* __last,
182 basic_string<char, _Traits, _Alloc>& __outstr,
183 const codecvt<_CharT, char, _State>& __cvt)
184 {
185 _State __state = {};
186 size_t __n;
187 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n)
188 && (__n == size_t(__last - __first));
189 }
190
191#ifdef _GLIBCXX_USE_CHAR8_T
192
193 // Convert wide character string to narrow.
194 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
195 inline bool
196 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
197 basic_string<char8_t, _Traits, _Alloc>& __outstr,
198 const codecvt<_CharT, char8_t, _State>& __cvt,
199 _State& __state, size_t& __count)
200 {
201 using _Codecvt = codecvt<_CharT, char8_t, _State>;
202 using _ConvFn
203 = codecvt_base::result
204 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
205 char8_t*, char8_t*, char8_t*&) const;
206 _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out;
207 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
208 __count, __fn);
209 }
210
211 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
212 inline bool
213 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
214 basic_string<char8_t, _Traits, _Alloc>& __outstr,
215 const codecvt<_CharT, char8_t, _State>& __cvt)
216 {
217 _State __state = {};
218 size_t __n;
219 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
220 }
221
222#endif // _GLIBCXX_USE_CHAR8_T
223
224#ifdef _GLIBCXX_USE_WCHAR_T
225
226_GLIBCXX_BEGIN_NAMESPACE_CXX11
227
228 /// String conversions
229 template<typename _Codecvt, typename _Elem = wchar_t,
230 typename _Wide_alloc = allocator<_Elem>,
231 typename _Byte_alloc = allocator<char>>
232 class wstring_convert
233 {
234 public:
235 typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string;
236 typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
237 typedef typename _Codecvt::state_type state_type;
238 typedef typename wide_string::traits_type::int_type int_type;
239
240 /// Default constructor.
241 wstring_convert() : _M_cvt(new _Codecvt()) { }
242
243 /** Constructor.
244 *
245 * @param __pcvt The facet to use for conversions.
246 *
247 * Takes ownership of @p __pcvt and will delete it in the destructor.
248 */
249 explicit
250 wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt)
251 {
252 if (!_M_cvt)
253 __throw_logic_error("wstring_convert");
254 }
255
256 /** Construct with an initial converstion state.
257 *
258 * @param __pcvt The facet to use for conversions.
259 * @param __state Initial conversion state.
260 *
261 * Takes ownership of @p __pcvt and will delete it in the destructor.
262 * The object's conversion state will persist between conversions.
263 */
264 wstring_convert(_Codecvt* __pcvt, state_type __state)
265 : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
266 {
267 if (!_M_cvt)
268 __throw_logic_error("wstring_convert");
269 }
270
271 /** Construct with error strings.
272 *
273 * @param __byte_err A string to return on failed conversions.
274 * @param __wide_err A wide string to return on failed conversions.
275 */
276 explicit
277 wstring_convert(const byte_string& __byte_err,
278 const wide_string& __wide_err = wide_string())
279 : _M_cvt(new _Codecvt),
280 _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
281 _M_with_strings(true)
282 {
283 if (!_M_cvt)
284 __throw_logic_error("wstring_convert");
285 }
286
287 ~wstring_convert() = default;
288
289 // _GLIBCXX_RESOLVE_LIB_DEFECTS
290 // 2176. Special members for wstring_convert and wbuffer_convert
291 wstring_convert(const wstring_convert&) = delete;
292 wstring_convert& operator=(const wstring_convert&) = delete;
293
294 /// @{ Convert from bytes.
295 wide_string
296 from_bytes(char __byte)
297 {
298 char __bytes[2] = { __byte };
299 return from_bytes(__bytes, __bytes+1);
300 }
301
302 wide_string
303 from_bytes(const char* __ptr)
304 { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
305
306 wide_string
307 from_bytes(const byte_string& __str)
308 {
309 auto __ptr = __str.data();
310 return from_bytes(__ptr, __ptr + __str.size());
311 }
312
313 wide_string
314 from_bytes(const char* __first, const char* __last)
315 {
316 if (!_M_with_cvtstate)
317 _M_state = state_type();
318 wide_string __out{ _M_wide_err_string.get_allocator() };
319 if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
320 _M_count))
321 return __out;
322 if (_M_with_strings)
323 return _M_wide_err_string;
324 __throw_range_error("wstring_convert::from_bytes");
325 }
326 /// @}
327
328 /// @{ Convert to bytes.
329 byte_string
330 to_bytes(_Elem __wchar)
331 {
332 _Elem __wchars[2] = { __wchar };
333 return to_bytes(__wchars, __wchars+1);
334 }
335
336 byte_string
337 to_bytes(const _Elem* __ptr)
338 {
339 return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
340 }
341
342 byte_string
343 to_bytes(const wide_string& __wstr)
344 {
345 auto __ptr = __wstr.data();
346 return to_bytes(__ptr, __ptr + __wstr.size());
347 }
348
349 byte_string
350 to_bytes(const _Elem* __first, const _Elem* __last)
351 {
352 if (!_M_with_cvtstate)
353 _M_state = state_type();
354 byte_string __out{ _M_byte_err_string.get_allocator() };
355 if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
356 _M_count))
357 return __out;
358 if (_M_with_strings)
359 return _M_byte_err_string;
360 __throw_range_error("wstring_convert::to_bytes");
361 }
362 /// @}
363
364 // _GLIBCXX_RESOLVE_LIB_DEFECTS
365 // 2174. wstring_convert::converted() should be noexcept
366 /// The number of elements successfully converted in the last conversion.
367 size_t converted() const noexcept { return _M_count; }
368
369 /// The final conversion state of the last conversion.
370 state_type state() const { return _M_state; }
371
372 private:
373 unique_ptr<_Codecvt> _M_cvt;
374 byte_string _M_byte_err_string;
375 wide_string _M_wide_err_string;
376 state_type _M_state = state_type();
377 size_t _M_count = 0;
378 bool _M_with_cvtstate = false;
379 bool _M_with_strings = false;
380 };
381
382_GLIBCXX_END_NAMESPACE_CXX11
383
384 /// Buffer conversions
385 template<typename _Codecvt, typename _Elem = wchar_t,
386 typename _Tr = char_traits<_Elem>>
387 class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
388 {
389 typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
390
391 public:
392 typedef typename _Codecvt::state_type state_type;
393
394 /// Default constructor.
395 wbuffer_convert() : wbuffer_convert(nullptr) { }
396
397 /** Constructor.
398 *
399 * @param __bytebuf The underlying byte stream buffer.
400 * @param __pcvt The facet to use for conversions.
401 * @param __state Initial conversion state.
402 *
403 * Takes ownership of @p __pcvt and will delete it in the destructor.
404 */
405 explicit
406 wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
407 state_type __state = state_type())
408 : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
409 {
410 if (!_M_cvt)
411 __throw_logic_error("wbuffer_convert");
412
413 _M_always_noconv = _M_cvt->always_noconv();
414
415 if (_M_buf)
416 {
417 this->setp(_M_put_area, _M_put_area + _S_buffer_length);
418 this->setg(_M_get_area + _S_putback_length,
419 _M_get_area + _S_putback_length,
420 _M_get_area + _S_putback_length);
421 }
422 }
423
424 ~wbuffer_convert() = default;
425
426 // _GLIBCXX_RESOLVE_LIB_DEFECTS
427 // 2176. Special members for wstring_convert and wbuffer_convert
428 wbuffer_convert(const wbuffer_convert&) = delete;
429 wbuffer_convert& operator=(const wbuffer_convert&) = delete;
430
431 streambuf* rdbuf() const noexcept { return _M_buf; }
432
433 streambuf*
434 rdbuf(streambuf *__bytebuf) noexcept
435 {
436 auto __prev = _M_buf;
437 _M_buf = __bytebuf;
438 return __prev;
439 }
440
441 /// The conversion state following the last conversion.
442 state_type state() const noexcept { return _M_state; }
443
444 protected:
445 int
446 sync()
447 { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
448
449 typename _Wide_streambuf::int_type
450 overflow(typename _Wide_streambuf::int_type __out)
451 {
452 if (!_M_buf || !_M_conv_put())
453 return _Tr::eof();
454 else if (!_Tr::eq_int_type(__out, _Tr::eof()))
455 return this->sputc(__out);
456 return _Tr::not_eof(__out);
457 }
458
459 typename _Wide_streambuf::int_type
460 underflow()
461 {
462 if (!_M_buf)
463 return _Tr::eof();
464
465 if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
466 return _Tr::to_int_type(*this->gptr());
467 else
468 return _Tr::eof();
469 }
470
471 streamsize
472 xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
473 {
474 if (!_M_buf || __n == 0)
475 return 0;
476 streamsize __done = 0;
477 do
478 {
479 auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
480 __n - __done);
481 _Tr::copy(this->pptr(), __s + __done, __nn);
482 this->pbump(__nn);
483 __done += __nn;
484 } while (__done < __n && _M_conv_put());
485 return __done;
486 }
487
488 private:
489 // fill the get area from converted contents of the byte stream buffer
490 bool
491 _M_conv_get()
492 {
493 const streamsize __pb1 = this->gptr() - this->eback();
494 const streamsize __pb2 = _S_putback_length;
495 const streamsize __npb = std::min(__pb1, __pb2);
496
497 _Tr::move(_M_get_area + _S_putback_length - __npb,
498 this->gptr() - __npb, __npb);
499
500 streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
501 __nbytes = std::min(__nbytes, _M_buf->in_avail());
502 if (__nbytes < 1)
503 __nbytes = 1;
504 __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
505 if (__nbytes < 1)
506 return false;
507 __nbytes += _M_unconv;
508
509 // convert _M_get_buf into _M_get_area
510
511 _Elem* __outbuf = _M_get_area + _S_putback_length;
512 _Elem* __outnext = __outbuf;
513 const char* __bnext = _M_get_buf;
514
515 codecvt_base::result __result;
516 if (_M_always_noconv)
517 __result = codecvt_base::noconv;
518 else
519 {
520 _Elem* __outend = _M_get_area + _S_buffer_length;
521
522 __result = _M_cvt->in(_M_state,
523 __bnext, __bnext + __nbytes, __bnext,
524 __outbuf, __outend, __outnext);
525 }
526
527 if (__result == codecvt_base::noconv)
528 {
529 // cast is safe because noconv means _Elem is same type as char
530 auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
531 _Tr::copy(__outbuf, __get_buf, __nbytes);
532 _M_unconv = 0;
533 return true;
534 }
535
536 if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
537 char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
538
539 this->setg(__outbuf, __outbuf, __outnext);
540
541 return __result != codecvt_base::error;
542 }
543
544 // unused
545 bool
546 _M_put(...)
547 { return false; }
548
549 bool
550 _M_put(const char* __p, streamsize __n)
551 {
552 if (_M_buf->sputn(__p, __n) < __n)
553 return false;
554 return true;
555 }
556
557 // convert the put area and write to the byte stream buffer
558 bool
559 _M_conv_put()
560 {
561 _Elem* const __first = this->pbase();
562 const _Elem* const __last = this->pptr();
563 const streamsize __pending = __last - __first;
564
565 if (_M_always_noconv)
566 return _M_put(__first, __pending);
567
568 char __outbuf[2 * _S_buffer_length];
569
570 const _Elem* __next = __first;
571 const _Elem* __start;
572 do
573 {
574 __start = __next;
575 char* __outnext = __outbuf;
576 char* const __outlast = __outbuf + sizeof(__outbuf);
577 auto __result = _M_cvt->out(_M_state, __next, __last, __next,
578 __outnext, __outlast, __outnext);
579 if (__result == codecvt_base::error)
580 return false;
581 else if (__result == codecvt_base::noconv)
582 return _M_put(__next, __pending);
583
584 if (!_M_put(__outbuf, __outnext - __outbuf))
585 return false;
586 }
587 while (__next != __last && __next != __start);
588
589 if (__next != __last)
590 _Tr::move(__first, __next, __last - __next);
591
592 this->pbump(__first - __next);
593 return __next != __first;
594 }
595
596 streambuf* _M_buf;
597 unique_ptr<_Codecvt> _M_cvt;
598 state_type _M_state;
599
600 static const streamsize _S_buffer_length = 32;
601 static const streamsize _S_putback_length = 3;
602 _Elem _M_put_area[_S_buffer_length];
603 _Elem _M_get_area[_S_buffer_length];
604 streamsize _M_unconv = 0;
605 char _M_get_buf[_S_buffer_length-_S_putback_length];
606 bool _M_always_noconv;
607 };
608
609#endif // _GLIBCXX_USE_WCHAR_T
610
611 /// @} group locales
612
613_GLIBCXX_END_NAMESPACE_VERSION
614} // namespace
615
616#endif // __cplusplus
617
618#endif /* _LOCALE_CONV_H */
619