1// Locale support (codecvt) -*- C++ -*-
2
3// Copyright (C) 2000-2018 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file bits/codecvt.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{locale}
28 */
29
30//
31// ISO C++ 14882: 22.2.1.5 Template class codecvt
32//
33
34// Written by Benjamin Kosnik <bkoz@redhat.com>
35
36#ifndef _CODECVT_H
37#define _CODECVT_H 1
38
39#pragma GCC system_header
40
41namespace std _GLIBCXX_VISIBILITY(default)
42{
43_GLIBCXX_BEGIN_NAMESPACE_VERSION
44
45 /// Empty base class for codecvt facet [22.2.1.5].
46 class codecvt_base
47 {
48 public:
49 enum result
50 {
51 ok,
52 partial,
53 error,
54 noconv
55 };
56 };
57
58 /**
59 * @brief Common base for codecvt functions.
60 *
61 * This template class provides implementations of the public functions
62 * that forward to the protected virtual functions.
63 *
64 * This template also provides abstract stubs for the protected virtual
65 * functions.
66 */
67 template<typename _InternT, typename _ExternT, typename _StateT>
68 class __codecvt_abstract_base
69 : public locale::facet, public codecvt_base
70 {
71 public:
72 // Types:
73 typedef codecvt_base::result result;
74 typedef _InternT intern_type;
75 typedef _ExternT extern_type;
76 typedef _StateT state_type;
77
78 // 22.2.1.5.1 codecvt members
79 /**
80 * @brief Convert from internal to external character set.
81 *
82 * Converts input string of intern_type to output string of
83 * extern_type. This is analogous to wcsrtombs. It does this by
84 * calling codecvt::do_out.
85 *
86 * The source and destination character sets are determined by the
87 * facet's locale, internal and external types.
88 *
89 * The characters in [from,from_end) are converted and written to
90 * [to,to_end). from_next and to_next are set to point to the
91 * character following the last successfully converted character,
92 * respectively. If the result needed no conversion, from_next and
93 * to_next are not affected.
94 *
95 * The @a state argument should be initialized if the input is at the
96 * beginning and carried from a previous call if continuing
97 * conversion. There are no guarantees about how @a state is used.
98 *
99 * The result returned is a member of codecvt_base::result. If
100 * all the input is converted, returns codecvt_base::ok. If no
101 * conversion is necessary, returns codecvt_base::noconv. If
102 * the input ends early or there is insufficient space in the
103 * output, returns codecvt_base::partial. Otherwise the
104 * conversion failed and codecvt_base::error is returned.
105 *
106 * @param __state Persistent conversion state data.
107 * @param __from Start of input.
108 * @param __from_end End of input.
109 * @param __from_next Returns start of unconverted data.
110 * @param __to Start of output buffer.
111 * @param __to_end End of output buffer.
112 * @param __to_next Returns start of unused output area.
113 * @return codecvt_base::result.
114 */
115 result
116 out(state_type& __state, const intern_type* __from,
117 const intern_type* __from_end, const intern_type*& __from_next,
118 extern_type* __to, extern_type* __to_end,
119 extern_type*& __to_next) const
120 {
121 return this->do_out(__state, __from, __from_end, __from_next,
122 __to, __to_end, __to_next);
123 }
124
125 /**
126 * @brief Reset conversion state.
127 *
128 * Writes characters to output that would restore @a state to initial
129 * conditions. The idea is that if a partial conversion occurs, then
130 * the converting the characters written by this function would leave
131 * the state in initial conditions, rather than partial conversion
132 * state. It does this by calling codecvt::do_unshift().
133 *
134 * For example, if 4 external characters always converted to 1 internal
135 * character, and input to in() had 6 external characters with state
136 * saved, this function would write two characters to the output and
137 * set the state to initialized conditions.
138 *
139 * The source and destination character sets are determined by the
140 * facet's locale, internal and external types.
141 *
142 * The result returned is a member of codecvt_base::result. If the
143 * state could be reset and data written, returns codecvt_base::ok. If
144 * no conversion is necessary, returns codecvt_base::noconv. If the
145 * output has insufficient space, returns codecvt_base::partial.
146 * Otherwise the reset failed and codecvt_base::error is returned.
147 *
148 * @param __state Persistent conversion state data.
149 * @param __to Start of output buffer.
150 * @param __to_end End of output buffer.
151 * @param __to_next Returns start of unused output area.
152 * @return codecvt_base::result.
153 */
154 result
155 unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
156 extern_type*& __to_next) const
157 { return this->do_unshift(__state, __to,__to_end,__to_next); }
158
159 /**
160 * @brief Convert from external to internal character set.
161 *
162 * Converts input string of extern_type to output string of
163 * intern_type. This is analogous to mbsrtowcs. It does this by
164 * calling codecvt::do_in.
165 *
166 * The source and destination character sets are determined by the
167 * facet's locale, internal and external types.
168 *
169 * The characters in [from,from_end) are converted and written to
170 * [to,to_end). from_next and to_next are set to point to the
171 * character following the last successfully converted character,
172 * respectively. If the result needed no conversion, from_next and
173 * to_next are not affected.
174 *
175 * The @a state argument should be initialized if the input is at the
176 * beginning and carried from a previous call if continuing
177 * conversion. There are no guarantees about how @a state is used.
178 *
179 * The result returned is a member of codecvt_base::result. If
180 * all the input is converted, returns codecvt_base::ok. If no
181 * conversion is necessary, returns codecvt_base::noconv. If
182 * the input ends early or there is insufficient space in the
183 * output, returns codecvt_base::partial. Otherwise the
184 * conversion failed and codecvt_base::error is returned.
185 *
186 * @param __state Persistent conversion state data.
187 * @param __from Start of input.
188 * @param __from_end End of input.
189 * @param __from_next Returns start of unconverted data.
190 * @param __to Start of output buffer.
191 * @param __to_end End of output buffer.
192 * @param __to_next Returns start of unused output area.
193 * @return codecvt_base::result.
194 */
195 result
196 in(state_type& __state, const extern_type* __from,
197 const extern_type* __from_end, const extern_type*& __from_next,
198 intern_type* __to, intern_type* __to_end,
199 intern_type*& __to_next) const
200 {
201 return this->do_in(__state, __from, __from_end, __from_next,
202 __to, __to_end, __to_next);
203 }
204
205 int
206 encoding() const throw()
207 { return this->do_encoding(); }
208
209 bool
210 always_noconv() const throw()
211 { return this->do_always_noconv(); }
212
213 int
214 length(state_type& __state, const extern_type* __from,
215 const extern_type* __end, size_t __max) const
216 { return this->do_length(__state, __from, __end, __max); }
217
218 int
219 max_length() const throw()
220 { return this->do_max_length(); }
221
222 protected:
223 explicit
224 __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
225
226 virtual
227 ~__codecvt_abstract_base() { }
228
229 /**
230 * @brief Convert from internal to external character set.
231 *
232 * Converts input string of intern_type to output string of
233 * extern_type. This function is a hook for derived classes to change
234 * the value returned. @see out for more information.
235 */
236 virtual result
237 do_out(state_type& __state, const intern_type* __from,
238 const intern_type* __from_end, const intern_type*& __from_next,
239 extern_type* __to, extern_type* __to_end,
240 extern_type*& __to_next) const = 0;
241
242 virtual result
243 do_unshift(state_type& __state, extern_type* __to,
244 extern_type* __to_end, extern_type*& __to_next) const = 0;
245
246 virtual result
247 do_in(state_type& __state, const extern_type* __from,
248 const extern_type* __from_end, const extern_type*& __from_next,
249 intern_type* __to, intern_type* __to_end,
250 intern_type*& __to_next) const = 0;
251
252 virtual int
253 do_encoding() const throw() = 0;
254
255 virtual bool
256 do_always_noconv() const throw() = 0;
257
258 virtual int
259 do_length(state_type&, const extern_type* __from,
260 const extern_type* __end, size_t __max) const = 0;
261
262 virtual int
263 do_max_length() const throw() = 0;
264 };
265
266 /**
267 * @brief Primary class template codecvt.
268 * @ingroup locales
269 *
270 * NB: Generic, mostly useless implementation.
271 *
272 */
273 template<typename _InternT, typename _ExternT, typename _StateT>
274 class codecvt
275 : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
276 {
277 public:
278 // Types:
279 typedef codecvt_base::result result;
280 typedef _InternT intern_type;
281 typedef _ExternT extern_type;
282 typedef _StateT state_type;
283
284 protected:
285 __c_locale _M_c_locale_codecvt;
286
287 public:
288 static locale::id id;
289
290 explicit
291 codecvt(size_t __refs = 0)
292 : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs),
293 _M_c_locale_codecvt(0)
294 { }
295
296 explicit
297 codecvt(__c_locale __cloc, size_t __refs = 0);
298
299 protected:
300 virtual
301 ~codecvt() { }
302
303 virtual result
304 do_out(state_type& __state, const intern_type* __from,
305 const intern_type* __from_end, const intern_type*& __from_next,
306 extern_type* __to, extern_type* __to_end,
307 extern_type*& __to_next) const;
308
309 virtual result
310 do_unshift(state_type& __state, extern_type* __to,
311 extern_type* __to_end, extern_type*& __to_next) const;
312
313 virtual result
314 do_in(state_type& __state, const extern_type* __from,
315 const extern_type* __from_end, const extern_type*& __from_next,
316 intern_type* __to, intern_type* __to_end,
317 intern_type*& __to_next) const;
318
319 virtual int
320 do_encoding() const throw();
321
322 virtual bool
323 do_always_noconv() const throw();
324
325 virtual int
326 do_length(state_type&, const extern_type* __from,
327 const extern_type* __end, size_t __max) const;
328
329 virtual int
330 do_max_length() const throw();
331 };
332
333 template<typename _InternT, typename _ExternT, typename _StateT>
334 locale::id codecvt<_InternT, _ExternT, _StateT>::id;
335
336 /// class codecvt<char, char, mbstate_t> specialization.
337 template<>
338 class codecvt<char, char, mbstate_t>
339 : public __codecvt_abstract_base<char, char, mbstate_t>
340 {
341 friend class messages<char>;
342
343 public:
344 // Types:
345 typedef char intern_type;
346 typedef char extern_type;
347 typedef mbstate_t state_type;
348
349 protected:
350 __c_locale _M_c_locale_codecvt;
351
352 public:
353 static locale::id id;
354
355 explicit
356 codecvt(size_t __refs = 0);
357
358 explicit
359 codecvt(__c_locale __cloc, size_t __refs = 0);
360
361 protected:
362 virtual
363 ~codecvt();
364
365 virtual result
366 do_out(state_type& __state, const intern_type* __from,
367 const intern_type* __from_end, const intern_type*& __from_next,
368 extern_type* __to, extern_type* __to_end,
369 extern_type*& __to_next) const;
370
371 virtual result
372 do_unshift(state_type& __state, extern_type* __to,
373 extern_type* __to_end, extern_type*& __to_next) const;
374
375 virtual result
376 do_in(state_type& __state, const extern_type* __from,
377 const extern_type* __from_end, const extern_type*& __from_next,
378 intern_type* __to, intern_type* __to_end,
379 intern_type*& __to_next) const;
380
381 virtual int
382 do_encoding() const throw();
383
384 virtual bool
385 do_always_noconv() const throw();
386
387 virtual int
388 do_length(state_type&, const extern_type* __from,
389 const extern_type* __end, size_t __max) const;
390
391 virtual int
392 do_max_length() const throw();
393 };
394
395#ifdef _GLIBCXX_USE_WCHAR_T
396 /** @brief Class codecvt<wchar_t, char, mbstate_t> specialization.
397 *
398 * Converts between narrow and wide characters in the native character set
399 */
400 template<>
401 class codecvt<wchar_t, char, mbstate_t>
402 : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
403 {
404 friend class messages<wchar_t>;
405
406 public:
407 // Types:
408 typedef wchar_t intern_type;
409 typedef char extern_type;
410 typedef mbstate_t state_type;
411
412 protected:
413 __c_locale _M_c_locale_codecvt;
414
415 public:
416 static locale::id id;
417
418 explicit
419 codecvt(size_t __refs = 0);
420
421 explicit
422 codecvt(__c_locale __cloc, size_t __refs = 0);
423
424 protected:
425 virtual
426 ~codecvt();
427
428 virtual result
429 do_out(state_type& __state, const intern_type* __from,
430 const intern_type* __from_end, const intern_type*& __from_next,
431 extern_type* __to, extern_type* __to_end,
432 extern_type*& __to_next) const;
433
434 virtual result
435 do_unshift(state_type& __state,
436 extern_type* __to, extern_type* __to_end,
437 extern_type*& __to_next) const;
438
439 virtual result
440 do_in(state_type& __state,
441 const extern_type* __from, const extern_type* __from_end,
442 const extern_type*& __from_next,
443 intern_type* __to, intern_type* __to_end,
444 intern_type*& __to_next) const;
445
446 virtual
447 int do_encoding() const throw();
448
449 virtual
450 bool do_always_noconv() const throw();
451
452 virtual
453 int do_length(state_type&, const extern_type* __from,
454 const extern_type* __end, size_t __max) const;
455
456 virtual int
457 do_max_length() const throw();
458 };
459#endif //_GLIBCXX_USE_WCHAR_T
460
461#if __cplusplus >= 201103L
462#ifdef _GLIBCXX_USE_C99_STDINT_TR1
463 /** @brief Class codecvt<char16_t, char, mbstate_t> specialization.
464 *
465 * Converts between UTF-16 and UTF-8.
466 */
467 template<>
468 class codecvt<char16_t, char, mbstate_t>
469 : public __codecvt_abstract_base<char16_t, char, mbstate_t>
470 {
471 public:
472 // Types:
473 typedef char16_t intern_type;
474 typedef char extern_type;
475 typedef mbstate_t state_type;
476
477 public:
478 static locale::id id;
479
480 explicit
481 codecvt(size_t __refs = 0)
482 : __codecvt_abstract_base<char16_t, char, mbstate_t>(__refs) { }
483
484 protected:
485 virtual
486 ~codecvt();
487
488 virtual result
489 do_out(state_type& __state, const intern_type* __from,
490 const intern_type* __from_end, const intern_type*& __from_next,
491 extern_type* __to, extern_type* __to_end,
492 extern_type*& __to_next) const;
493
494 virtual result
495 do_unshift(state_type& __state,
496 extern_type* __to, extern_type* __to_end,
497 extern_type*& __to_next) const;
498
499 virtual result
500 do_in(state_type& __state,
501 const extern_type* __from, const extern_type* __from_end,
502 const extern_type*& __from_next,
503 intern_type* __to, intern_type* __to_end,
504 intern_type*& __to_next) const;
505
506 virtual
507 int do_encoding() const throw();
508
509 virtual
510 bool do_always_noconv() const throw();
511
512 virtual
513 int do_length(state_type&, const extern_type* __from,
514 const extern_type* __end, size_t __max) const;
515
516 virtual int
517 do_max_length() const throw();
518 };
519
520 /** @brief Class codecvt<char32_t, char, mbstate_t> specialization.
521 *
522 * Converts between UTF-32 and UTF-8.
523 */
524 template<>
525 class codecvt<char32_t, char, mbstate_t>
526 : public __codecvt_abstract_base<char32_t, char, mbstate_t>
527 {
528 public:
529 // Types:
530 typedef char32_t intern_type;
531 typedef char extern_type;
532 typedef mbstate_t state_type;
533
534 public:
535 static locale::id id;
536
537 explicit
538 codecvt(size_t __refs = 0)
539 : __codecvt_abstract_base<char32_t, char, mbstate_t>(__refs) { }
540
541 protected:
542 virtual
543 ~codecvt();
544
545 virtual result
546 do_out(state_type& __state, const intern_type* __from,
547 const intern_type* __from_end, const intern_type*& __from_next,
548 extern_type* __to, extern_type* __to_end,
549 extern_type*& __to_next) const;
550
551 virtual result
552 do_unshift(state_type& __state,
553 extern_type* __to, extern_type* __to_end,
554 extern_type*& __to_next) const;
555
556 virtual result
557 do_in(state_type& __state,
558 const extern_type* __from, const extern_type* __from_end,
559 const extern_type*& __from_next,
560 intern_type* __to, intern_type* __to_end,
561 intern_type*& __to_next) const;
562
563 virtual
564 int do_encoding() const throw();
565
566 virtual
567 bool do_always_noconv() const throw();
568
569 virtual
570 int do_length(state_type&, const extern_type* __from,
571 const extern_type* __end, size_t __max) const;
572
573 virtual int
574 do_max_length() const throw();
575 };
576
577#endif // _GLIBCXX_USE_C99_STDINT_TR1
578#endif // C++11
579
580 /// class codecvt_byname [22.2.1.6].
581 template<typename _InternT, typename _ExternT, typename _StateT>
582 class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
583 {
584 public:
585 explicit
586 codecvt_byname(const char* __s, size_t __refs = 0)
587 : codecvt<_InternT, _ExternT, _StateT>(__refs)
588 {
589 if (__builtin_strcmp(__s, "C") != 0
590 && __builtin_strcmp(__s, "POSIX") != 0)
591 {
592 this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
593 this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
594 }
595 }
596
597#if __cplusplus >= 201103L
598 explicit
599 codecvt_byname(const string& __s, size_t __refs = 0)
600 : codecvt_byname(__s.c_str(), __refs) { }
601#endif
602
603 protected:
604 virtual
605 ~codecvt_byname() { }
606 };
607
608#if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1)
609 template<>
610 class codecvt_byname<char16_t, char, mbstate_t>
611 : public codecvt<char16_t, char, mbstate_t>
612 {
613 public:
614 explicit
615 codecvt_byname(const char*, size_t __refs = 0)
616 : codecvt<char16_t, char, mbstate_t>(__refs) { }
617
618 explicit
619 codecvt_byname(const string& __s, size_t __refs = 0)
620 : codecvt_byname(__s.c_str(), __refs) { }
621
622 protected:
623 virtual
624 ~codecvt_byname() { }
625 };
626
627 template<>
628 class codecvt_byname<char32_t, char, mbstate_t>
629 : public codecvt<char32_t, char, mbstate_t>
630 {
631 public:
632 explicit
633 codecvt_byname(const char*, size_t __refs = 0)
634 : codecvt<char32_t, char, mbstate_t>(__refs) { }
635
636 explicit
637 codecvt_byname(const string& __s, size_t __refs = 0)
638 : codecvt_byname(__s.c_str(), __refs) { }
639
640 protected:
641 virtual
642 ~codecvt_byname() { }
643 };
644#endif
645
646 // Inhibit implicit instantiations for required instantiations,
647 // which are defined via explicit instantiations elsewhere.
648#if _GLIBCXX_EXTERN_TEMPLATE
649 extern template class codecvt_byname<char, char, mbstate_t>;
650
651 extern template
652 const codecvt<char, char, mbstate_t>&
653 use_facet<codecvt<char, char, mbstate_t> >(const locale&);
654
655 extern template
656 bool
657 has_facet<codecvt<char, char, mbstate_t> >(const locale&);
658
659#ifdef _GLIBCXX_USE_WCHAR_T
660 extern template class codecvt_byname<wchar_t, char, mbstate_t>;
661
662 extern template
663 const codecvt<wchar_t, char, mbstate_t>&
664 use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
665
666 extern template
667 bool
668 has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
669#endif
670
671#if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1)
672 extern template class codecvt_byname<char16_t, char, mbstate_t>;
673 extern template class codecvt_byname<char32_t, char, mbstate_t>;
674#endif
675
676#endif
677
678_GLIBCXX_END_NAMESPACE_VERSION
679} // namespace std
680
681#endif // _CODECVT_H
682