1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21
22#if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
23#define SDL_DISABLE_ANALYZE_MACROS 1
24#endif
25
26#include "../SDL_internal.h"
27
28/* This file contains portable iconv functions for SDL */
29
30#include "SDL_stdinc.h"
31#include "SDL_endian.h"
32
33#if defined(HAVE_ICONV) && defined(HAVE_ICONV_H)
34#ifdef __FreeBSD__
35/* Define LIBICONV_PLUG to use iconv from the base instead of ports and avoid linker errors. */
36#define LIBICONV_PLUG 1
37#endif
38#include <iconv.h>
39
40/* Depending on which standard the iconv() was implemented with,
41 iconv() may or may not use const char ** for the inbuf param.
42 If we get this wrong, it's just a warning, so no big deal.
43*/
44#if defined(_XGP6) || defined(__APPLE__) || defined(__RISCOS__) || defined(__FREEBSD__) || \
45 defined(__EMSCRIPTEN__) || \
46 (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) || \
47 (defined(_NEWLIB_VERSION)))
48#define ICONV_INBUF_NONCONST
49#endif
50
51#include <errno.h>
52
53SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof (iconv_t) <= sizeof (SDL_iconv_t));
54
55SDL_iconv_t
56SDL_iconv_open(const char *tocode, const char *fromcode)
57{
58 return (SDL_iconv_t) ((size_t) iconv_open(tocode, fromcode));
59}
60
61int
62SDL_iconv_close(SDL_iconv_t cd)
63{
64 return iconv_close((iconv_t) ((size_t) cd));
65}
66
67size_t
68SDL_iconv(SDL_iconv_t cd,
69 const char **inbuf, size_t * inbytesleft,
70 char **outbuf, size_t * outbytesleft)
71{
72 size_t retCode;
73#ifdef ICONV_INBUF_NONCONST
74 retCode = iconv((iconv_t) ((size_t) cd), (char **) inbuf, inbytesleft, outbuf, outbytesleft);
75#else
76 retCode = iconv((iconv_t) ((size_t) cd), inbuf, inbytesleft, outbuf, outbytesleft);
77#endif
78 if (retCode == (size_t) - 1) {
79 switch (errno) {
80 case E2BIG:
81 return SDL_ICONV_E2BIG;
82 case EILSEQ:
83 return SDL_ICONV_EILSEQ;
84 case EINVAL:
85 return SDL_ICONV_EINVAL;
86 default:
87 return SDL_ICONV_ERROR;
88 }
89 }
90 return retCode;
91}
92
93#else
94
95/* Lots of useful information on Unicode at:
96 http://www.cl.cam.ac.uk/~mgk25/unicode.html
97*/
98
99#define UNICODE_BOM 0xFEFF
100
101#define UNKNOWN_ASCII '?'
102#define UNKNOWN_UNICODE 0xFFFD
103
104enum
105{
106 ENCODING_UNKNOWN,
107 ENCODING_ASCII,
108 ENCODING_LATIN1,
109 ENCODING_UTF8,
110 ENCODING_UTF16, /* Needs byte order marker */
111 ENCODING_UTF16BE,
112 ENCODING_UTF16LE,
113 ENCODING_UTF32, /* Needs byte order marker */
114 ENCODING_UTF32BE,
115 ENCODING_UTF32LE,
116 ENCODING_UCS2BE,
117 ENCODING_UCS2LE,
118 ENCODING_UCS4BE,
119 ENCODING_UCS4LE,
120};
121#if SDL_BYTEORDER == SDL_BIG_ENDIAN
122#define ENCODING_UTF16NATIVE ENCODING_UTF16BE
123#define ENCODING_UTF32NATIVE ENCODING_UTF32BE
124#define ENCODING_UCS2NATIVE ENCODING_UCS2BE
125#define ENCODING_UCS4NATIVE ENCODING_UCS4BE
126#else
127#define ENCODING_UTF16NATIVE ENCODING_UTF16LE
128#define ENCODING_UTF32NATIVE ENCODING_UTF32LE
129#define ENCODING_UCS2NATIVE ENCODING_UCS2LE
130#define ENCODING_UCS4NATIVE ENCODING_UCS4LE
131#endif
132
133struct _SDL_iconv_t
134{
135 int src_fmt;
136 int dst_fmt;
137};
138
139static struct
140{
141 const char *name;
142 int format;
143} encodings[] = {
144/* *INDENT-OFF* */
145 { "ASCII", ENCODING_ASCII },
146 { "US-ASCII", ENCODING_ASCII },
147 { "8859-1", ENCODING_LATIN1 },
148 { "ISO-8859-1", ENCODING_LATIN1 },
149 { "UTF8", ENCODING_UTF8 },
150 { "UTF-8", ENCODING_UTF8 },
151 { "UTF16", ENCODING_UTF16 },
152 { "UTF-16", ENCODING_UTF16 },
153 { "UTF16BE", ENCODING_UTF16BE },
154 { "UTF-16BE", ENCODING_UTF16BE },
155 { "UTF16LE", ENCODING_UTF16LE },
156 { "UTF-16LE", ENCODING_UTF16LE },
157 { "UTF32", ENCODING_UTF32 },
158 { "UTF-32", ENCODING_UTF32 },
159 { "UTF32BE", ENCODING_UTF32BE },
160 { "UTF-32BE", ENCODING_UTF32BE },
161 { "UTF32LE", ENCODING_UTF32LE },
162 { "UTF-32LE", ENCODING_UTF32LE },
163 { "UCS2", ENCODING_UCS2BE },
164 { "UCS-2", ENCODING_UCS2BE },
165 { "UCS-2LE", ENCODING_UCS2LE },
166 { "UCS-2BE", ENCODING_UCS2BE },
167 { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
168 { "UCS4", ENCODING_UCS4BE },
169 { "UCS-4", ENCODING_UCS4BE },
170 { "UCS-4LE", ENCODING_UCS4LE },
171 { "UCS-4BE", ENCODING_UCS4BE },
172 { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
173/* *INDENT-ON* */
174};
175
176static const char *
177getlocale(char *buffer, size_t bufsize)
178{
179 const char *lang;
180 char *ptr;
181
182 lang = SDL_getenv("LC_ALL");
183 if (!lang) {
184 lang = SDL_getenv("LC_CTYPE");
185 }
186 if (!lang) {
187 lang = SDL_getenv("LC_MESSAGES");
188 }
189 if (!lang) {
190 lang = SDL_getenv("LANG");
191 }
192 if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
193 lang = "ASCII";
194 }
195
196 /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
197 ptr = SDL_strchr(lang, '.');
198 if (ptr != NULL) {
199 lang = ptr + 1;
200 }
201
202 SDL_strlcpy(buffer, lang, bufsize);
203 ptr = SDL_strchr(buffer, '@');
204 if (ptr != NULL) {
205 *ptr = '\0'; /* chop end of string. */
206 }
207
208 return buffer;
209}
210
211SDL_iconv_t
212SDL_iconv_open(const char *tocode, const char *fromcode)
213{
214 int src_fmt = ENCODING_UNKNOWN;
215 int dst_fmt = ENCODING_UNKNOWN;
216 int i;
217 char fromcode_buffer[64];
218 char tocode_buffer[64];
219
220 if (!fromcode || !*fromcode) {
221 fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
222 }
223 if (!tocode || !*tocode) {
224 tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
225 }
226 for (i = 0; i < SDL_arraysize(encodings); ++i) {
227 if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
228 src_fmt = encodings[i].format;
229 if (dst_fmt != ENCODING_UNKNOWN) {
230 break;
231 }
232 }
233 if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
234 dst_fmt = encodings[i].format;
235 if (src_fmt != ENCODING_UNKNOWN) {
236 break;
237 }
238 }
239 }
240 if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
241 SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
242 if (cd) {
243 cd->src_fmt = src_fmt;
244 cd->dst_fmt = dst_fmt;
245 return cd;
246 }
247 }
248 return (SDL_iconv_t) - 1;
249}
250
251size_t
252SDL_iconv(SDL_iconv_t cd,
253 const char **inbuf, size_t * inbytesleft,
254 char **outbuf, size_t * outbytesleft)
255{
256 /* For simplicity, we'll convert everything to and from UCS-4 */
257 const char *src;
258 char *dst;
259 size_t srclen, dstlen;
260 Uint32 ch = 0;
261 size_t total;
262
263 if (!inbuf || !*inbuf) {
264 /* Reset the context */
265 return 0;
266 }
267 if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
268 return SDL_ICONV_E2BIG;
269 }
270 src = *inbuf;
271 srclen = (inbytesleft ? *inbytesleft : 0);
272 dst = *outbuf;
273 dstlen = *outbytesleft;
274
275 switch (cd->src_fmt) {
276 case ENCODING_UTF16:
277 /* Scan for a byte order marker */
278 {
279 Uint8 *p = (Uint8 *) src;
280 size_t n = srclen / 2;
281 while (n) {
282 if (p[0] == 0xFF && p[1] == 0xFE) {
283 cd->src_fmt = ENCODING_UTF16BE;
284 break;
285 } else if (p[0] == 0xFE && p[1] == 0xFF) {
286 cd->src_fmt = ENCODING_UTF16LE;
287 break;
288 }
289 p += 2;
290 --n;
291 }
292 if (n == 0) {
293 /* We can't tell, default to host order */
294 cd->src_fmt = ENCODING_UTF16NATIVE;
295 }
296 }
297 break;
298 case ENCODING_UTF32:
299 /* Scan for a byte order marker */
300 {
301 Uint8 *p = (Uint8 *) src;
302 size_t n = srclen / 4;
303 while (n) {
304 if (p[0] == 0xFF && p[1] == 0xFE &&
305 p[2] == 0x00 && p[3] == 0x00) {
306 cd->src_fmt = ENCODING_UTF32BE;
307 break;
308 } else if (p[0] == 0x00 && p[1] == 0x00 &&
309 p[2] == 0xFE && p[3] == 0xFF) {
310 cd->src_fmt = ENCODING_UTF32LE;
311 break;
312 }
313 p += 4;
314 --n;
315 }
316 if (n == 0) {
317 /* We can't tell, default to host order */
318 cd->src_fmt = ENCODING_UTF32NATIVE;
319 }
320 }
321 break;
322 }
323
324 switch (cd->dst_fmt) {
325 case ENCODING_UTF16:
326 /* Default to host order, need to add byte order marker */
327 if (dstlen < 2) {
328 return SDL_ICONV_E2BIG;
329 }
330 *(Uint16 *) dst = UNICODE_BOM;
331 dst += 2;
332 dstlen -= 2;
333 cd->dst_fmt = ENCODING_UTF16NATIVE;
334 break;
335 case ENCODING_UTF32:
336 /* Default to host order, need to add byte order marker */
337 if (dstlen < 4) {
338 return SDL_ICONV_E2BIG;
339 }
340 *(Uint32 *) dst = UNICODE_BOM;
341 dst += 4;
342 dstlen -= 4;
343 cd->dst_fmt = ENCODING_UTF32NATIVE;
344 break;
345 }
346
347 total = 0;
348 while (srclen > 0) {
349 /* Decode a character */
350 switch (cd->src_fmt) {
351 case ENCODING_ASCII:
352 {
353 Uint8 *p = (Uint8 *) src;
354 ch = (Uint32) (p[0] & 0x7F);
355 ++src;
356 --srclen;
357 }
358 break;
359 case ENCODING_LATIN1:
360 {
361 Uint8 *p = (Uint8 *) src;
362 ch = (Uint32) p[0];
363 ++src;
364 --srclen;
365 }
366 break;
367 case ENCODING_UTF8: /* RFC 3629 */
368 {
369 Uint8 *p = (Uint8 *) src;
370 size_t left = 0;
371 SDL_bool overlong = SDL_FALSE;
372 if (p[0] >= 0xF0) {
373 if ((p[0] & 0xF8) != 0xF0) {
374 /* Skip illegal sequences
375 return SDL_ICONV_EILSEQ;
376 */
377 ch = UNKNOWN_UNICODE;
378 } else {
379 if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
380 overlong = SDL_TRUE;
381 }
382 ch = (Uint32) (p[0] & 0x07);
383 left = 3;
384 }
385 } else if (p[0] >= 0xE0) {
386 if ((p[0] & 0xF0) != 0xE0) {
387 /* Skip illegal sequences
388 return SDL_ICONV_EILSEQ;
389 */
390 ch = UNKNOWN_UNICODE;
391 } else {
392 if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
393 overlong = SDL_TRUE;
394 }
395 ch = (Uint32) (p[0] & 0x0F);
396 left = 2;
397 }
398 } else if (p[0] >= 0xC0) {
399 if ((p[0] & 0xE0) != 0xC0) {
400 /* Skip illegal sequences
401 return SDL_ICONV_EILSEQ;
402 */
403 ch = UNKNOWN_UNICODE;
404 } else {
405 if ((p[0] & 0xDE) == 0xC0) {
406 overlong = SDL_TRUE;
407 }
408 ch = (Uint32) (p[0] & 0x1F);
409 left = 1;
410 }
411 } else {
412 if ((p[0] & 0x80) != 0x00) {
413 /* Skip illegal sequences
414 return SDL_ICONV_EILSEQ;
415 */
416 ch = UNKNOWN_UNICODE;
417 } else {
418 ch = (Uint32) p[0];
419 }
420 }
421 ++src;
422 --srclen;
423 if (srclen < left) {
424 return SDL_ICONV_EINVAL;
425 }
426 while (left--) {
427 ++p;
428 if ((p[0] & 0xC0) != 0x80) {
429 /* Skip illegal sequences
430 return SDL_ICONV_EILSEQ;
431 */
432 ch = UNKNOWN_UNICODE;
433 break;
434 }
435 ch <<= 6;
436 ch |= (p[0] & 0x3F);
437 ++src;
438 --srclen;
439 }
440 if (overlong) {
441 /* Potential security risk
442 return SDL_ICONV_EILSEQ;
443 */
444 ch = UNKNOWN_UNICODE;
445 }
446 if ((ch >= 0xD800 && ch <= 0xDFFF) ||
447 (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
448 /* Skip illegal sequences
449 return SDL_ICONV_EILSEQ;
450 */
451 ch = UNKNOWN_UNICODE;
452 }
453 }
454 break;
455 case ENCODING_UTF16BE: /* RFC 2781 */
456 {
457 Uint8 *p = (Uint8 *) src;
458 Uint16 W1, W2;
459 if (srclen < 2) {
460 return SDL_ICONV_EINVAL;
461 }
462 W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
463 src += 2;
464 srclen -= 2;
465 if (W1 < 0xD800 || W1 > 0xDFFF) {
466 ch = (Uint32) W1;
467 break;
468 }
469 if (W1 > 0xDBFF) {
470 /* Skip illegal sequences
471 return SDL_ICONV_EILSEQ;
472 */
473 ch = UNKNOWN_UNICODE;
474 break;
475 }
476 if (srclen < 2) {
477 return SDL_ICONV_EINVAL;
478 }
479 p = (Uint8 *) src;
480 W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
481 src += 2;
482 srclen -= 2;
483 if (W2 < 0xDC00 || W2 > 0xDFFF) {
484 /* Skip illegal sequences
485 return SDL_ICONV_EILSEQ;
486 */
487 ch = UNKNOWN_UNICODE;
488 break;
489 }
490 ch = (((Uint32) (W1 & 0x3FF) << 10) |
491 (Uint32) (W2 & 0x3FF)) + 0x10000;
492 }
493 break;
494 case ENCODING_UTF16LE: /* RFC 2781 */
495 {
496 Uint8 *p = (Uint8 *) src;
497 Uint16 W1, W2;
498 if (srclen < 2) {
499 return SDL_ICONV_EINVAL;
500 }
501 W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
502 src += 2;
503 srclen -= 2;
504 if (W1 < 0xD800 || W1 > 0xDFFF) {
505 ch = (Uint32) W1;
506 break;
507 }
508 if (W1 > 0xDBFF) {
509 /* Skip illegal sequences
510 return SDL_ICONV_EILSEQ;
511 */
512 ch = UNKNOWN_UNICODE;
513 break;
514 }
515 if (srclen < 2) {
516 return SDL_ICONV_EINVAL;
517 }
518 p = (Uint8 *) src;
519 W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
520 src += 2;
521 srclen -= 2;
522 if (W2 < 0xDC00 || W2 > 0xDFFF) {
523 /* Skip illegal sequences
524 return SDL_ICONV_EILSEQ;
525 */
526 ch = UNKNOWN_UNICODE;
527 break;
528 }
529 ch = (((Uint32) (W1 & 0x3FF) << 10) |
530 (Uint32) (W2 & 0x3FF)) + 0x10000;
531 }
532 break;
533 case ENCODING_UCS2LE:
534 {
535 Uint8 *p = (Uint8 *) src;
536 if (srclen < 2) {
537 return SDL_ICONV_EINVAL;
538 }
539 ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
540 src += 2;
541 srclen -= 2;
542 }
543 break;
544 case ENCODING_UCS2BE:
545 {
546 Uint8 *p = (Uint8 *) src;
547 if (srclen < 2) {
548 return SDL_ICONV_EINVAL;
549 }
550 ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
551 src += 2;
552 srclen -= 2;
553 }
554 break;
555 case ENCODING_UCS4BE:
556 case ENCODING_UTF32BE:
557 {
558 Uint8 *p = (Uint8 *) src;
559 if (srclen < 4) {
560 return SDL_ICONV_EINVAL;
561 }
562 ch = ((Uint32) p[0] << 24) |
563 ((Uint32) p[1] << 16) |
564 ((Uint32) p[2] << 8) | (Uint32) p[3];
565 src += 4;
566 srclen -= 4;
567 }
568 break;
569 case ENCODING_UCS4LE:
570 case ENCODING_UTF32LE:
571 {
572 Uint8 *p = (Uint8 *) src;
573 if (srclen < 4) {
574 return SDL_ICONV_EINVAL;
575 }
576 ch = ((Uint32) p[3] << 24) |
577 ((Uint32) p[2] << 16) |
578 ((Uint32) p[1] << 8) | (Uint32) p[0];
579 src += 4;
580 srclen -= 4;
581 }
582 break;
583 }
584
585 /* Encode a character */
586 switch (cd->dst_fmt) {
587 case ENCODING_ASCII:
588 {
589 Uint8 *p = (Uint8 *) dst;
590 if (dstlen < 1) {
591 return SDL_ICONV_E2BIG;
592 }
593 if (ch > 0x7F) {
594 *p = UNKNOWN_ASCII;
595 } else {
596 *p = (Uint8) ch;
597 }
598 ++dst;
599 --dstlen;
600 }
601 break;
602 case ENCODING_LATIN1:
603 {
604 Uint8 *p = (Uint8 *) dst;
605 if (dstlen < 1) {
606 return SDL_ICONV_E2BIG;
607 }
608 if (ch > 0xFF) {
609 *p = UNKNOWN_ASCII;
610 } else {
611 *p = (Uint8) ch;
612 }
613 ++dst;
614 --dstlen;
615 }
616 break;
617 case ENCODING_UTF8: /* RFC 3629 */
618 {
619 Uint8 *p = (Uint8 *) dst;
620 if (ch > 0x10FFFF) {
621 ch = UNKNOWN_UNICODE;
622 }
623 if (ch <= 0x7F) {
624 if (dstlen < 1) {
625 return SDL_ICONV_E2BIG;
626 }
627 *p = (Uint8) ch;
628 ++dst;
629 --dstlen;
630 } else if (ch <= 0x7FF) {
631 if (dstlen < 2) {
632 return SDL_ICONV_E2BIG;
633 }
634 p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
635 p[1] = 0x80 | (Uint8) (ch & 0x3F);
636 dst += 2;
637 dstlen -= 2;
638 } else if (ch <= 0xFFFF) {
639 if (dstlen < 3) {
640 return SDL_ICONV_E2BIG;
641 }
642 p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
643 p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
644 p[2] = 0x80 | (Uint8) (ch & 0x3F);
645 dst += 3;
646 dstlen -= 3;
647 } else {
648 if (dstlen < 4) {
649 return SDL_ICONV_E2BIG;
650 }
651 p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
652 p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
653 p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
654 p[3] = 0x80 | (Uint8) (ch & 0x3F);
655 dst += 4;
656 dstlen -= 4;
657 }
658 }
659 break;
660 case ENCODING_UTF16BE: /* RFC 2781 */
661 {
662 Uint8 *p = (Uint8 *) dst;
663 if (ch > 0x10FFFF) {
664 ch = UNKNOWN_UNICODE;
665 }
666 if (ch < 0x10000) {
667 if (dstlen < 2) {
668 return SDL_ICONV_E2BIG;
669 }
670 p[0] = (Uint8) (ch >> 8);
671 p[1] = (Uint8) ch;
672 dst += 2;
673 dstlen -= 2;
674 } else {
675 Uint16 W1, W2;
676 if (dstlen < 4) {
677 return SDL_ICONV_E2BIG;
678 }
679 ch = ch - 0x10000;
680 W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
681 W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
682 p[0] = (Uint8) (W1 >> 8);
683 p[1] = (Uint8) W1;
684 p[2] = (Uint8) (W2 >> 8);
685 p[3] = (Uint8) W2;
686 dst += 4;
687 dstlen -= 4;
688 }
689 }
690 break;
691 case ENCODING_UTF16LE: /* RFC 2781 */
692 {
693 Uint8 *p = (Uint8 *) dst;
694 if (ch > 0x10FFFF) {
695 ch = UNKNOWN_UNICODE;
696 }
697 if (ch < 0x10000) {
698 if (dstlen < 2) {
699 return SDL_ICONV_E2BIG;
700 }
701 p[1] = (Uint8) (ch >> 8);
702 p[0] = (Uint8) ch;
703 dst += 2;
704 dstlen -= 2;
705 } else {
706 Uint16 W1, W2;
707 if (dstlen < 4) {
708 return SDL_ICONV_E2BIG;
709 }
710 ch = ch - 0x10000;
711 W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
712 W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
713 p[1] = (Uint8) (W1 >> 8);
714 p[0] = (Uint8) W1;
715 p[3] = (Uint8) (W2 >> 8);
716 p[2] = (Uint8) W2;
717 dst += 4;
718 dstlen -= 4;
719 }
720 }
721 break;
722 case ENCODING_UCS2BE:
723 {
724 Uint8 *p = (Uint8 *) dst;
725 if (ch > 0xFFFF) {
726 ch = UNKNOWN_UNICODE;
727 }
728 if (dstlen < 2) {
729 return SDL_ICONV_E2BIG;
730 }
731 p[0] = (Uint8) (ch >> 8);
732 p[1] = (Uint8) ch;
733 dst += 2;
734 dstlen -= 2;
735 }
736 break;
737 case ENCODING_UCS2LE:
738 {
739 Uint8 *p = (Uint8 *) dst;
740 if (ch > 0xFFFF) {
741 ch = UNKNOWN_UNICODE;
742 }
743 if (dstlen < 2) {
744 return SDL_ICONV_E2BIG;
745 }
746 p[1] = (Uint8) (ch >> 8);
747 p[0] = (Uint8) ch;
748 dst += 2;
749 dstlen -= 2;
750 }
751 break;
752 case ENCODING_UTF32BE:
753 if (ch > 0x10FFFF) {
754 ch = UNKNOWN_UNICODE;
755 }
756 /* fallthrough */
757 case ENCODING_UCS4BE:
758 if (ch > 0x7FFFFFFF) {
759 ch = UNKNOWN_UNICODE;
760 }
761 {
762 Uint8 *p = (Uint8 *) dst;
763 if (dstlen < 4) {
764 return SDL_ICONV_E2BIG;
765 }
766 p[0] = (Uint8) (ch >> 24);
767 p[1] = (Uint8) (ch >> 16);
768 p[2] = (Uint8) (ch >> 8);
769 p[3] = (Uint8) ch;
770 dst += 4;
771 dstlen -= 4;
772 }
773 break;
774 case ENCODING_UTF32LE:
775 if (ch > 0x10FFFF) {
776 ch = UNKNOWN_UNICODE;
777 }
778 /* fallthrough */
779 case ENCODING_UCS4LE:
780 if (ch > 0x7FFFFFFF) {
781 ch = UNKNOWN_UNICODE;
782 }
783 {
784 Uint8 *p = (Uint8 *) dst;
785 if (dstlen < 4) {
786 return SDL_ICONV_E2BIG;
787 }
788 p[3] = (Uint8) (ch >> 24);
789 p[2] = (Uint8) (ch >> 16);
790 p[1] = (Uint8) (ch >> 8);
791 p[0] = (Uint8) ch;
792 dst += 4;
793 dstlen -= 4;
794 }
795 break;
796 }
797
798 /* Update state */
799 *inbuf = src;
800 *inbytesleft = srclen;
801 *outbuf = dst;
802 *outbytesleft = dstlen;
803 ++total;
804 }
805 return total;
806}
807
808int
809SDL_iconv_close(SDL_iconv_t cd)
810{
811 if (cd != (SDL_iconv_t)-1) {
812 SDL_free(cd);
813 }
814 return 0;
815}
816
817#endif /* !HAVE_ICONV */
818
819char *
820SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
821 size_t inbytesleft)
822{
823 SDL_iconv_t cd;
824 char *string;
825 size_t stringsize;
826 char *outbuf;
827 size_t outbytesleft;
828 size_t retCode = 0;
829
830 cd = SDL_iconv_open(tocode, fromcode);
831 if (cd == (SDL_iconv_t) - 1) {
832 /* See if we can recover here (fixes iconv on Solaris 11) */
833 if (!tocode || !*tocode) {
834 tocode = "UTF-8";
835 }
836 if (!fromcode || !*fromcode) {
837 fromcode = "UTF-8";
838 }
839 cd = SDL_iconv_open(tocode, fromcode);
840 }
841 if (cd == (SDL_iconv_t) - 1) {
842 return NULL;
843 }
844
845 stringsize = inbytesleft > 4 ? inbytesleft : 4;
846 string = (char *) SDL_malloc(stringsize);
847 if (!string) {
848 SDL_iconv_close(cd);
849 return NULL;
850 }
851 outbuf = string;
852 outbytesleft = stringsize;
853 SDL_memset(outbuf, 0, 4);
854
855 while (inbytesleft > 0) {
856 const size_t oldinbytesleft = inbytesleft;
857 retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
858 switch (retCode) {
859 case SDL_ICONV_E2BIG:
860 {
861 char *oldstring = string;
862 stringsize *= 2;
863 string = (char *) SDL_realloc(string, stringsize);
864 if (!string) {
865 SDL_iconv_close(cd);
866 return NULL;
867 }
868 outbuf = string + (outbuf - oldstring);
869 outbytesleft = stringsize - (outbuf - string);
870 SDL_memset(outbuf, 0, 4);
871 }
872 break;
873 case SDL_ICONV_EILSEQ:
874 /* Try skipping some input data - not perfect, but... */
875 ++inbuf;
876 --inbytesleft;
877 break;
878 case SDL_ICONV_EINVAL:
879 case SDL_ICONV_ERROR:
880 /* We can't continue... */
881 inbytesleft = 0;
882 break;
883 }
884 /* Avoid infinite loops when nothing gets converted */
885 if (oldinbytesleft == inbytesleft)
886 {
887 break;
888 }
889 }
890 SDL_iconv_close(cd);
891
892 return string;
893}
894
895/* vi: set ts=4 sw=4 expandtab: */
896