1// wstring_convert implementation -*- C++ -*-
2
3// Copyright (C) 2015-2019 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file bits/locale_conv.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{locale}
28 */
29
30#ifndef _LOCALE_CONV_H
31#define _LOCALE_CONV_H 1
32
33#if __cplusplus < 201103L
34# include <bits/c++0x_warning.h>
35#else
36
37#include <streambuf>
38#include <bits/stringfwd.h>
39#include <bits/allocator.h>
40#include <bits/codecvt.h>
41#include <bits/unique_ptr.h>
42
43namespace std _GLIBCXX_VISIBILITY(default)
44{
45_GLIBCXX_BEGIN_NAMESPACE_VERSION
46
47 /**
48 * @addtogroup locales
49 * @{
50 */
51
52 template<typename _OutStr, typename _InChar, typename _Codecvt,
53 typename _State, typename _Fn>
54 bool
55 __do_str_codecvt(const _InChar* __first, const _InChar* __last,
56 _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
57 size_t& __count, _Fn __fn)
58 {
59 if (__first == __last)
60 {
61 __outstr.clear();
62 __count = 0;
63 return true;
64 }
65
66 size_t __outchars = 0;
67 auto __next = __first;
68 const auto __maxlen = __cvt.max_length() + 1;
69
70 codecvt_base::result __result;
71 do
72 {
73 __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
74 auto __outnext = &__outstr.front() + __outchars;
75 auto const __outlast = &__outstr.back() + 1;
76 __result = (__cvt.*__fn)(__state, __next, __last, __next,
77 __outnext, __outlast, __outnext);
78 __outchars = __outnext - &__outstr.front();
79 }
80 while (__result == codecvt_base::partial && __next != __last
81 && (__outstr.size() - __outchars) < __maxlen);
82
83 if (__result == codecvt_base::error)
84 {
85 __count = __next - __first;
86 return false;
87 }
88
89 if (__result == codecvt_base::noconv)
90 {
91 __outstr.assign(__first, __last);
92 __count = __last - __first;
93 }
94 else
95 {
96 __outstr.resize(__outchars);
97 __count = __next - __first;
98 }
99
100 return true;
101 }
102
103 // Convert narrow character string to wide.
104 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
105 inline bool
106 __str_codecvt_in(const char* __first, const char* __last,
107 basic_string<_CharT, _Traits, _Alloc>& __outstr,
108 const codecvt<_CharT, char, _State>& __cvt,
109 _State& __state, size_t& __count)
110 {
111 using _Codecvt = codecvt<_CharT, char, _State>;
112 using _ConvFn
113 = codecvt_base::result
114 (_Codecvt::*)(_State&, const char*, const char*, const char*&,
115 _CharT*, _CharT*, _CharT*&) const;
116 _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
117 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
118 __count, __fn);
119 }
120
121 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
122 inline bool
123 __str_codecvt_in(const char* __first, const char* __last,
124 basic_string<_CharT, _Traits, _Alloc>& __outstr,
125 const codecvt<_CharT, char, _State>& __cvt)
126 {
127 _State __state = {};
128 size_t __n;
129 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
130 }
131
132 // Convert wide character string to narrow.
133 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
134 inline bool
135 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
136 basic_string<char, _Traits, _Alloc>& __outstr,
137 const codecvt<_CharT, char, _State>& __cvt,
138 _State& __state, size_t& __count)
139 {
140 using _Codecvt = codecvt<_CharT, char, _State>;
141 using _ConvFn
142 = codecvt_base::result
143 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
144 char*, char*, char*&) const;
145 _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
146 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
147 __count, __fn);
148 }
149
150 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
151 inline bool
152 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
153 basic_string<char, _Traits, _Alloc>& __outstr,
154 const codecvt<_CharT, char, _State>& __cvt)
155 {
156 _State __state = {};
157 size_t __n;
158 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
159 }
160
161#ifdef _GLIBCXX_USE_CHAR8_T
162
163 // Convert wide character string to narrow.
164 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
165 inline bool
166 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
167 basic_string<char8_t, _Traits, _Alloc>& __outstr,
168 const codecvt<_CharT, char8_t, _State>& __cvt,
169 _State& __state, size_t& __count)
170 {
171 using _Codecvt = codecvt<_CharT, char8_t, _State>;
172 using _ConvFn
173 = codecvt_base::result
174 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
175 char8_t*, char8_t*, char8_t*&) const;
176 _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out;
177 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
178 __count, __fn);
179 }
180
181 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
182 inline bool
183 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
184 basic_string<char8_t, _Traits, _Alloc>& __outstr,
185 const codecvt<_CharT, char8_t, _State>& __cvt)
186 {
187 _State __state = {};
188 size_t __n;
189 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
190 }
191
192#endif // _GLIBCXX_USE_CHAR8_T
193
194#ifdef _GLIBCXX_USE_WCHAR_T
195
196_GLIBCXX_BEGIN_NAMESPACE_CXX11
197
198 /// String conversions
199 template<typename _Codecvt, typename _Elem = wchar_t,
200 typename _Wide_alloc = allocator<_Elem>,
201 typename _Byte_alloc = allocator<char>>
202 class wstring_convert
203 {
204 public:
205 typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string;
206 typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
207 typedef typename _Codecvt::state_type state_type;
208 typedef typename wide_string::traits_type::int_type int_type;
209
210 /// Default constructor.
211 wstring_convert() : _M_cvt(new _Codecvt()) { }
212
213 /** Constructor.
214 *
215 * @param __pcvt The facet to use for conversions.
216 *
217 * Takes ownership of @p __pcvt and will delete it in the destructor.
218 */
219 explicit
220 wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt)
221 {
222 if (!_M_cvt)
223 __throw_logic_error("wstring_convert");
224 }
225
226 /** Construct with an initial converstion state.
227 *
228 * @param __pcvt The facet to use for conversions.
229 * @param __state Initial conversion state.
230 *
231 * Takes ownership of @p __pcvt and will delete it in the destructor.
232 * The object's conversion state will persist between conversions.
233 */
234 wstring_convert(_Codecvt* __pcvt, state_type __state)
235 : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
236 {
237 if (!_M_cvt)
238 __throw_logic_error("wstring_convert");
239 }
240
241 /** Construct with error strings.
242 *
243 * @param __byte_err A string to return on failed conversions.
244 * @param __wide_err A wide string to return on failed conversions.
245 */
246 explicit
247 wstring_convert(const byte_string& __byte_err,
248 const wide_string& __wide_err = wide_string())
249 : _M_cvt(new _Codecvt),
250 _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
251 _M_with_strings(true)
252 {
253 if (!_M_cvt)
254 __throw_logic_error("wstring_convert");
255 }
256
257 ~wstring_convert() = default;
258
259 // _GLIBCXX_RESOLVE_LIB_DEFECTS
260 // 2176. Special members for wstring_convert and wbuffer_convert
261 wstring_convert(const wstring_convert&) = delete;
262 wstring_convert& operator=(const wstring_convert&) = delete;
263
264 /// @{ Convert from bytes.
265 wide_string
266 from_bytes(char __byte)
267 {
268 char __bytes[2] = { __byte };
269 return from_bytes(__bytes, __bytes+1);
270 }
271
272 wide_string
273 from_bytes(const char* __ptr)
274 { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
275
276 wide_string
277 from_bytes(const byte_string& __str)
278 {
279 auto __ptr = __str.data();
280 return from_bytes(__ptr, __ptr + __str.size());
281 }
282
283 wide_string
284 from_bytes(const char* __first, const char* __last)
285 {
286 if (!_M_with_cvtstate)
287 _M_state = state_type();
288 wide_string __out{ _M_wide_err_string.get_allocator() };
289 if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
290 _M_count))
291 return __out;
292 if (_M_with_strings)
293 return _M_wide_err_string;
294 __throw_range_error("wstring_convert::from_bytes");
295 }
296 /// @}
297
298 /// @{ Convert to bytes.
299 byte_string
300 to_bytes(_Elem __wchar)
301 {
302 _Elem __wchars[2] = { __wchar };
303 return to_bytes(__wchars, __wchars+1);
304 }
305
306 byte_string
307 to_bytes(const _Elem* __ptr)
308 {
309 return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
310 }
311
312 byte_string
313 to_bytes(const wide_string& __wstr)
314 {
315 auto __ptr = __wstr.data();
316 return to_bytes(__ptr, __ptr + __wstr.size());
317 }
318
319 byte_string
320 to_bytes(const _Elem* __first, const _Elem* __last)
321 {
322 if (!_M_with_cvtstate)
323 _M_state = state_type();
324 byte_string __out{ _M_byte_err_string.get_allocator() };
325 if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
326 _M_count))
327 return __out;
328 if (_M_with_strings)
329 return _M_byte_err_string;
330 __throw_range_error("wstring_convert::to_bytes");
331 }
332 /// @}
333
334 // _GLIBCXX_RESOLVE_LIB_DEFECTS
335 // 2174. wstring_convert::converted() should be noexcept
336 /// The number of elements successfully converted in the last conversion.
337 size_t converted() const noexcept { return _M_count; }
338
339 /// The final conversion state of the last conversion.
340 state_type state() const { return _M_state; }
341
342 private:
343 unique_ptr<_Codecvt> _M_cvt;
344 byte_string _M_byte_err_string;
345 wide_string _M_wide_err_string;
346 state_type _M_state = state_type();
347 size_t _M_count = 0;
348 bool _M_with_cvtstate = false;
349 bool _M_with_strings = false;
350 };
351
352_GLIBCXX_END_NAMESPACE_CXX11
353
354 /// Buffer conversions
355 template<typename _Codecvt, typename _Elem = wchar_t,
356 typename _Tr = char_traits<_Elem>>
357 class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
358 {
359 typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
360
361 public:
362 typedef typename _Codecvt::state_type state_type;
363
364 /// Default constructor.
365 wbuffer_convert() : wbuffer_convert(nullptr) { }
366
367 /** Constructor.
368 *
369 * @param __bytebuf The underlying byte stream buffer.
370 * @param __pcvt The facet to use for conversions.
371 * @param __state Initial conversion state.
372 *
373 * Takes ownership of @p __pcvt and will delete it in the destructor.
374 */
375 explicit
376 wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
377 state_type __state = state_type())
378 : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
379 {
380 if (!_M_cvt)
381 __throw_logic_error("wbuffer_convert");
382
383 _M_always_noconv = _M_cvt->always_noconv();
384
385 if (_M_buf)
386 {
387 this->setp(_M_put_area, _M_put_area + _S_buffer_length);
388 this->setg(_M_get_area + _S_putback_length,
389 _M_get_area + _S_putback_length,
390 _M_get_area + _S_putback_length);
391 }
392 }
393
394 ~wbuffer_convert() = default;
395
396 // _GLIBCXX_RESOLVE_LIB_DEFECTS
397 // 2176. Special members for wstring_convert and wbuffer_convert
398 wbuffer_convert(const wbuffer_convert&) = delete;
399 wbuffer_convert& operator=(const wbuffer_convert&) = delete;
400
401 streambuf* rdbuf() const noexcept { return _M_buf; }
402
403 streambuf*
404 rdbuf(streambuf *__bytebuf) noexcept
405 {
406 auto __prev = _M_buf;
407 _M_buf = __bytebuf;
408 return __prev;
409 }
410
411 /// The conversion state following the last conversion.
412 state_type state() const noexcept { return _M_state; }
413
414 protected:
415 int
416 sync()
417 { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
418
419 typename _Wide_streambuf::int_type
420 overflow(typename _Wide_streambuf::int_type __out)
421 {
422 if (!_M_buf || !_M_conv_put())
423 return _Tr::eof();
424 else if (!_Tr::eq_int_type(__out, _Tr::eof()))
425 return this->sputc(__out);
426 return _Tr::not_eof(__out);
427 }
428
429 typename _Wide_streambuf::int_type
430 underflow()
431 {
432 if (!_M_buf)
433 return _Tr::eof();
434
435 if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
436 return _Tr::to_int_type(*this->gptr());
437 else
438 return _Tr::eof();
439 }
440
441 streamsize
442 xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
443 {
444 if (!_M_buf || __n == 0)
445 return 0;
446 streamsize __done = 0;
447 do
448 {
449 auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
450 __n - __done);
451 _Tr::copy(this->pptr(), __s + __done, __nn);
452 this->pbump(__nn);
453 __done += __nn;
454 } while (__done < __n && _M_conv_put());
455 return __done;
456 }
457
458 private:
459 // fill the get area from converted contents of the byte stream buffer
460 bool
461 _M_conv_get()
462 {
463 const streamsize __pb1 = this->gptr() - this->eback();
464 const streamsize __pb2 = _S_putback_length;
465 const streamsize __npb = std::min(__pb1, __pb2);
466
467 _Tr::move(_M_get_area + _S_putback_length - __npb,
468 this->gptr() - __npb, __npb);
469
470 streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
471 __nbytes = std::min(__nbytes, _M_buf->in_avail());
472 if (__nbytes < 1)
473 __nbytes = 1;
474 __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
475 if (__nbytes < 1)
476 return false;
477 __nbytes += _M_unconv;
478
479 // convert _M_get_buf into _M_get_area
480
481 _Elem* __outbuf = _M_get_area + _S_putback_length;
482 _Elem* __outnext = __outbuf;
483 const char* __bnext = _M_get_buf;
484
485 codecvt_base::result __result;
486 if (_M_always_noconv)
487 __result = codecvt_base::noconv;
488 else
489 {
490 _Elem* __outend = _M_get_area + _S_buffer_length;
491
492 __result = _M_cvt->in(_M_state,
493 __bnext, __bnext + __nbytes, __bnext,
494 __outbuf, __outend, __outnext);
495 }
496
497 if (__result == codecvt_base::noconv)
498 {
499 // cast is safe because noconv means _Elem is same type as char
500 auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
501 _Tr::copy(__outbuf, __get_buf, __nbytes);
502 _M_unconv = 0;
503 return true;
504 }
505
506 if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
507 char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
508
509 this->setg(__outbuf, __outbuf, __outnext);
510
511 return __result != codecvt_base::error;
512 }
513
514 // unused
515 bool
516 _M_put(...)
517 { return false; }
518
519 bool
520 _M_put(const char* __p, streamsize __n)
521 {
522 if (_M_buf->sputn(__p, __n) < __n)
523 return false;
524 return true;
525 }
526
527 // convert the put area and write to the byte stream buffer
528 bool
529 _M_conv_put()
530 {
531 _Elem* const __first = this->pbase();
532 const _Elem* const __last = this->pptr();
533 const streamsize __pending = __last - __first;
534
535 if (_M_always_noconv)
536 return _M_put(__first, __pending);
537
538 char __outbuf[2 * _S_buffer_length];
539
540 const _Elem* __next = __first;
541 const _Elem* __start;
542 do
543 {
544 __start = __next;
545 char* __outnext = __outbuf;
546 char* const __outlast = __outbuf + sizeof(__outbuf);
547 auto __result = _M_cvt->out(_M_state, __next, __last, __next,
548 __outnext, __outlast, __outnext);
549 if (__result == codecvt_base::error)
550 return false;
551 else if (__result == codecvt_base::noconv)
552 return _M_put(__next, __pending);
553
554 if (!_M_put(__outbuf, __outnext - __outbuf))
555 return false;
556 }
557 while (__next != __last && __next != __start);
558
559 if (__next != __last)
560 _Tr::move(__first, __next, __last - __next);
561
562 this->pbump(__first - __next);
563 return __next != __first;
564 }
565
566 streambuf* _M_buf;
567 unique_ptr<_Codecvt> _M_cvt;
568 state_type _M_state;
569
570 static const streamsize _S_buffer_length = 32;
571 static const streamsize _S_putback_length = 3;
572 _Elem _M_put_area[_S_buffer_length];
573 _Elem _M_get_area[_S_buffer_length];
574 streamsize _M_unconv = 0;
575 char _M_get_buf[_S_buffer_length-_S_putback_length];
576 bool _M_always_noconv;
577 };
578
579#endif // _GLIBCXX_USE_WCHAR_T
580
581 /// @} group locales
582
583_GLIBCXX_END_NAMESPACE_VERSION
584} // namespace
585
586#endif // __cplusplus
587
588#endif /* _LOCALE_CONV_H */
589