1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21#include "SDL_internal.h"
22
23// This file contains portable iconv functions for SDL
24
25#if defined(HAVE_ICONV) && defined(HAVE_ICONV_H)
26#ifndef SDL_USE_LIBICONV
27// Define LIBICONV_PLUG to use iconv from the base instead of ports and avoid linker errors.
28#define LIBICONV_PLUG 1
29#endif
30#include <iconv.h>
31#include <errno.h>
32
33SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof(iconv_t) <= sizeof(SDL_iconv_t));
34
35SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
36{
37 return (SDL_iconv_t)((uintptr_t)iconv_open(tocode, fromcode));
38}
39
40int SDL_iconv_close(SDL_iconv_t cd)
41{
42 if ((size_t)cd == SDL_ICONV_ERROR) {
43 return -1;
44 }
45 return iconv_close((iconv_t)((uintptr_t)cd));
46}
47
48size_t SDL_iconv(SDL_iconv_t cd,
49 const char **inbuf, size_t *inbytesleft,
50 char **outbuf, size_t *outbytesleft)
51{
52 if ((size_t)cd == SDL_ICONV_ERROR) {
53 return SDL_ICONV_ERROR;
54 }
55 /* iconv's second parameter may or may not be `const char const *` depending on the
56 C runtime's whims. Casting to void * seems to make everyone happy, though. */
57 const size_t retCode = iconv((iconv_t)((uintptr_t)cd), (void *)inbuf, inbytesleft, outbuf, outbytesleft);
58 if (retCode == (size_t)-1) {
59 switch (errno) {
60 case E2BIG:
61 return SDL_ICONV_E2BIG;
62 case EILSEQ:
63 return SDL_ICONV_EILSEQ;
64 case EINVAL:
65 return SDL_ICONV_EINVAL;
66 default:
67 return SDL_ICONV_ERROR;
68 }
69 }
70 return retCode;
71}
72
73#else
74
75/* Lots of useful information on Unicode at:
76 http://www.cl.cam.ac.uk/~mgk25/unicode.html
77*/
78
79#define UNICODE_BOM 0xFEFF
80
81#define UNKNOWN_ASCII '?'
82#define UNKNOWN_UNICODE 0xFFFD
83
84enum
85{
86 ENCODING_UNKNOWN,
87 ENCODING_ASCII,
88 ENCODING_LATIN1,
89 ENCODING_UTF8,
90 ENCODING_UTF16, // Needs byte order marker
91 ENCODING_UTF16BE,
92 ENCODING_UTF16LE,
93 ENCODING_UTF32, // Needs byte order marker
94 ENCODING_UTF32BE,
95 ENCODING_UTF32LE,
96 ENCODING_UCS2BE,
97 ENCODING_UCS2LE,
98 ENCODING_UCS4BE,
99 ENCODING_UCS4LE,
100};
101#if SDL_BYTEORDER == SDL_BIG_ENDIAN
102#define ENCODING_UTF16NATIVE ENCODING_UTF16BE
103#define ENCODING_UTF32NATIVE ENCODING_UTF32BE
104#define ENCODING_UCS2NATIVE ENCODING_UCS2BE
105#define ENCODING_UCS4NATIVE ENCODING_UCS4BE
106#else
107#define ENCODING_UTF16NATIVE ENCODING_UTF16LE
108#define ENCODING_UTF32NATIVE ENCODING_UTF32LE
109#define ENCODING_UCS2NATIVE ENCODING_UCS2LE
110#define ENCODING_UCS4NATIVE ENCODING_UCS4LE
111#endif
112
113struct SDL_iconv_data_t
114{
115 int src_fmt;
116 int dst_fmt;
117};
118
119static struct
120{
121 const char *name;
122 int format;
123} encodings[] = {
124 /* *INDENT-OFF* */ // clang-format off
125 { "ASCII", ENCODING_ASCII },
126 { "US-ASCII", ENCODING_ASCII },
127 { "8859-1", ENCODING_LATIN1 },
128 { "ISO-8859-1", ENCODING_LATIN1 },
129#if defined(SDL_PLATFORM_WINDOWS) || defined(SDL_PLATFORM_OS2)
130 { "WCHAR_T", ENCODING_UTF16LE },
131#else
132 { "WCHAR_T", ENCODING_UCS4NATIVE },
133#endif
134 { "UTF8", ENCODING_UTF8 },
135 { "UTF-8", ENCODING_UTF8 },
136 { "UTF16", ENCODING_UTF16 },
137 { "UTF-16", ENCODING_UTF16 },
138 { "UTF16BE", ENCODING_UTF16BE },
139 { "UTF-16BE", ENCODING_UTF16BE },
140 { "UTF16LE", ENCODING_UTF16LE },
141 { "UTF-16LE", ENCODING_UTF16LE },
142 { "UTF32", ENCODING_UTF32 },
143 { "UTF-32", ENCODING_UTF32 },
144 { "UTF32BE", ENCODING_UTF32BE },
145 { "UTF-32BE", ENCODING_UTF32BE },
146 { "UTF32LE", ENCODING_UTF32LE },
147 { "UTF-32LE", ENCODING_UTF32LE },
148 { "UCS2", ENCODING_UCS2BE },
149 { "UCS-2", ENCODING_UCS2BE },
150 { "UCS-2LE", ENCODING_UCS2LE },
151 { "UCS-2BE", ENCODING_UCS2BE },
152 { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
153 { "UCS4", ENCODING_UCS4BE },
154 { "UCS-4", ENCODING_UCS4BE },
155 { "UCS-4LE", ENCODING_UCS4LE },
156 { "UCS-4BE", ENCODING_UCS4BE },
157 { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
158/* *INDENT-ON* */ // clang-format on
159};
160
161static const char *getlocale(char *buffer, size_t bufsize)
162{
163 const char *lang;
164 char *ptr;
165
166 lang = SDL_getenv("LC_ALL");
167 if (!lang) {
168 lang = SDL_getenv("LC_CTYPE");
169 }
170 if (!lang) {
171 lang = SDL_getenv("LC_MESSAGES");
172 }
173 if (!lang) {
174 lang = SDL_getenv("LANG");
175 }
176 if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
177 lang = "ASCII";
178 }
179
180 // We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8"
181 ptr = SDL_strchr(lang, '.');
182 if (ptr) {
183 lang = ptr + 1;
184 }
185
186 SDL_strlcpy(buffer, lang, bufsize);
187 ptr = SDL_strchr(buffer, '@');
188 if (ptr) {
189 *ptr = '\0'; // chop end of string.
190 }
191
192 return buffer;
193}
194
195SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
196{
197 int src_fmt = ENCODING_UNKNOWN;
198 int dst_fmt = ENCODING_UNKNOWN;
199 int i;
200 char fromcode_buffer[64];
201 char tocode_buffer[64];
202
203 if (!fromcode || !*fromcode) {
204 fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
205 }
206 if (!tocode || !*tocode) {
207 tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
208 }
209 for (i = 0; i < SDL_arraysize(encodings); ++i) {
210 if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
211 src_fmt = encodings[i].format;
212 if (dst_fmt != ENCODING_UNKNOWN) {
213 break;
214 }
215 }
216 if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
217 dst_fmt = encodings[i].format;
218 if (src_fmt != ENCODING_UNKNOWN) {
219 break;
220 }
221 }
222 }
223 if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
224 SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
225 if (cd) {
226 cd->src_fmt = src_fmt;
227 cd->dst_fmt = dst_fmt;
228 return cd;
229 }
230 }
231 return (SDL_iconv_t)-1;
232}
233
234size_t SDL_iconv(SDL_iconv_t cd,
235 const char **inbuf, size_t *inbytesleft,
236 char **outbuf, size_t *outbytesleft)
237{
238 // For simplicity, we'll convert everything to and from UCS-4
239 const char *src;
240 char *dst;
241 size_t srclen, dstlen;
242 Uint32 ch = 0;
243 size_t total;
244
245 if ((size_t)cd == SDL_ICONV_ERROR) {
246 return SDL_ICONV_ERROR;
247 }
248 if (!inbuf || !*inbuf) {
249 // Reset the context
250 return 0;
251 }
252 if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
253 return SDL_ICONV_E2BIG;
254 }
255 src = *inbuf;
256 srclen = (inbytesleft ? *inbytesleft : 0);
257 dst = *outbuf;
258 dstlen = *outbytesleft;
259
260 switch (cd->src_fmt) {
261 case ENCODING_UTF16:
262 // Scan for a byte order marker
263 {
264 Uint8 *p = (Uint8 *)src;
265 size_t n = srclen / 2;
266 while (n) {
267 if (p[0] == 0xFF && p[1] == 0xFE) {
268 cd->src_fmt = ENCODING_UTF16BE;
269 break;
270 } else if (p[0] == 0xFE && p[1] == 0xFF) {
271 cd->src_fmt = ENCODING_UTF16LE;
272 break;
273 }
274 p += 2;
275 --n;
276 }
277 if (n == 0) {
278 // We can't tell, default to host order
279 cd->src_fmt = ENCODING_UTF16NATIVE;
280 }
281 }
282 break;
283 case ENCODING_UTF32:
284 // Scan for a byte order marker
285 {
286 Uint8 *p = (Uint8 *)src;
287 size_t n = srclen / 4;
288 while (n) {
289 if (p[0] == 0xFF && p[1] == 0xFE &&
290 p[2] == 0x00 && p[3] == 0x00) {
291 cd->src_fmt = ENCODING_UTF32BE;
292 break;
293 } else if (p[0] == 0x00 && p[1] == 0x00 &&
294 p[2] == 0xFE && p[3] == 0xFF) {
295 cd->src_fmt = ENCODING_UTF32LE;
296 break;
297 }
298 p += 4;
299 --n;
300 }
301 if (n == 0) {
302 // We can't tell, default to host order
303 cd->src_fmt = ENCODING_UTF32NATIVE;
304 }
305 }
306 break;
307 }
308
309 switch (cd->dst_fmt) {
310 case ENCODING_UTF16:
311 // Default to host order, need to add byte order marker
312 if (dstlen < 2) {
313 return SDL_ICONV_E2BIG;
314 }
315 *(Uint16 *)dst = UNICODE_BOM;
316 dst += 2;
317 dstlen -= 2;
318 cd->dst_fmt = ENCODING_UTF16NATIVE;
319 break;
320 case ENCODING_UTF32:
321 // Default to host order, need to add byte order marker
322 if (dstlen < 4) {
323 return SDL_ICONV_E2BIG;
324 }
325 *(Uint32 *)dst = UNICODE_BOM;
326 dst += 4;
327 dstlen -= 4;
328 cd->dst_fmt = ENCODING_UTF32NATIVE;
329 break;
330 }
331
332 total = 0;
333 while (srclen > 0) {
334 // Decode a character
335 switch (cd->src_fmt) {
336 case ENCODING_ASCII:
337 {
338 Uint8 *p = (Uint8 *)src;
339 ch = (Uint32)(p[0] & 0x7F);
340 ++src;
341 --srclen;
342 } break;
343 case ENCODING_LATIN1:
344 {
345 Uint8 *p = (Uint8 *)src;
346 ch = (Uint32)p[0];
347 ++src;
348 --srclen;
349 } break;
350 case ENCODING_UTF8: // RFC 3629
351 {
352 Uint8 *p = (Uint8 *)src;
353 size_t left = 0;
354 bool overlong = false;
355 if (p[0] >= 0xF0) {
356 if ((p[0] & 0xF8) != 0xF0) {
357 /* Skip illegal sequences
358 return SDL_ICONV_EILSEQ;
359 */
360 ch = UNKNOWN_UNICODE;
361 } else {
362 if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
363 overlong = true;
364 }
365 ch = (Uint32)(p[0] & 0x07);
366 left = 3;
367 }
368 } else if (p[0] >= 0xE0) {
369 if ((p[0] & 0xF0) != 0xE0) {
370 /* Skip illegal sequences
371 return SDL_ICONV_EILSEQ;
372 */
373 ch = UNKNOWN_UNICODE;
374 } else {
375 if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
376 overlong = true;
377 }
378 ch = (Uint32)(p[0] & 0x0F);
379 left = 2;
380 }
381 } else if (p[0] >= 0xC0) {
382 if ((p[0] & 0xE0) != 0xC0) {
383 /* Skip illegal sequences
384 return SDL_ICONV_EILSEQ;
385 */
386 ch = UNKNOWN_UNICODE;
387 } else {
388 if ((p[0] & 0xDE) == 0xC0) {
389 overlong = true;
390 }
391 ch = (Uint32)(p[0] & 0x1F);
392 left = 1;
393 }
394 } else {
395 if (p[0] & 0x80) {
396 /* Skip illegal sequences
397 return SDL_ICONV_EILSEQ;
398 */
399 ch = UNKNOWN_UNICODE;
400 } else {
401 ch = (Uint32)p[0];
402 }
403 }
404 ++src;
405 --srclen;
406 if (srclen < left) {
407 return SDL_ICONV_EINVAL;
408 }
409 while (left--) {
410 ++p;
411 if ((p[0] & 0xC0) != 0x80) {
412 /* Skip illegal sequences
413 return SDL_ICONV_EILSEQ;
414 */
415 ch = UNKNOWN_UNICODE;
416 break;
417 }
418 ch <<= 6;
419 ch |= (p[0] & 0x3F);
420 ++src;
421 --srclen;
422 }
423 if (overlong) {
424 /* Potential security risk
425 return SDL_ICONV_EILSEQ;
426 */
427 ch = UNKNOWN_UNICODE;
428 }
429 if ((ch >= 0xD800 && ch <= 0xDFFF) ||
430 (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
431 /* Skip illegal sequences
432 return SDL_ICONV_EILSEQ;
433 */
434 ch = UNKNOWN_UNICODE;
435 }
436 } break;
437 case ENCODING_UTF16BE: // RFC 2781
438 {
439 Uint8 *p = (Uint8 *)src;
440 Uint16 W1, W2;
441 if (srclen < 2) {
442 return SDL_ICONV_EINVAL;
443 }
444 W1 = ((Uint16)p[0] << 8) | (Uint16)p[1];
445 src += 2;
446 srclen -= 2;
447 if (W1 < 0xD800 || W1 > 0xDFFF) {
448 ch = (Uint32)W1;
449 break;
450 }
451 if (W1 > 0xDBFF) {
452 /* Skip illegal sequences
453 return SDL_ICONV_EILSEQ;
454 */
455 ch = UNKNOWN_UNICODE;
456 break;
457 }
458 if (srclen < 2) {
459 return SDL_ICONV_EINVAL;
460 }
461 p = (Uint8 *)src;
462 W2 = ((Uint16)p[0] << 8) | (Uint16)p[1];
463 src += 2;
464 srclen -= 2;
465 if (W2 < 0xDC00 || W2 > 0xDFFF) {
466 /* Skip illegal sequences
467 return SDL_ICONV_EILSEQ;
468 */
469 ch = UNKNOWN_UNICODE;
470 break;
471 }
472 ch = (((Uint32)(W1 & 0x3FF) << 10) |
473 (Uint32)(W2 & 0x3FF)) +
474 0x10000;
475 } break;
476 case ENCODING_UTF16LE: // RFC 2781
477 {
478 Uint8 *p = (Uint8 *)src;
479 Uint16 W1, W2;
480 if (srclen < 2) {
481 return SDL_ICONV_EINVAL;
482 }
483 W1 = ((Uint16)p[1] << 8) | (Uint16)p[0];
484 src += 2;
485 srclen -= 2;
486 if (W1 < 0xD800 || W1 > 0xDFFF) {
487 ch = (Uint32)W1;
488 break;
489 }
490 if (W1 > 0xDBFF) {
491 /* Skip illegal sequences
492 return SDL_ICONV_EILSEQ;
493 */
494 ch = UNKNOWN_UNICODE;
495 break;
496 }
497 if (srclen < 2) {
498 return SDL_ICONV_EINVAL;
499 }
500 p = (Uint8 *)src;
501 W2 = ((Uint16)p[1] << 8) | (Uint16)p[0];
502 src += 2;
503 srclen -= 2;
504 if (W2 < 0xDC00 || W2 > 0xDFFF) {
505 /* Skip illegal sequences
506 return SDL_ICONV_EILSEQ;
507 */
508 ch = UNKNOWN_UNICODE;
509 break;
510 }
511 ch = (((Uint32)(W1 & 0x3FF) << 10) |
512 (Uint32)(W2 & 0x3FF)) +
513 0x10000;
514 } break;
515 case ENCODING_UCS2LE:
516 {
517 Uint8 *p = (Uint8 *)src;
518 if (srclen < 2) {
519 return SDL_ICONV_EINVAL;
520 }
521 ch = ((Uint32)p[1] << 8) | (Uint32)p[0];
522 src += 2;
523 srclen -= 2;
524 } break;
525 case ENCODING_UCS2BE:
526 {
527 Uint8 *p = (Uint8 *)src;
528 if (srclen < 2) {
529 return SDL_ICONV_EINVAL;
530 }
531 ch = ((Uint32)p[0] << 8) | (Uint32)p[1];
532 src += 2;
533 srclen -= 2;
534 } break;
535 case ENCODING_UCS4BE:
536 case ENCODING_UTF32BE:
537 {
538 Uint8 *p = (Uint8 *)src;
539 if (srclen < 4) {
540 return SDL_ICONV_EINVAL;
541 }
542 ch = ((Uint32)p[0] << 24) |
543 ((Uint32)p[1] << 16) |
544 ((Uint32)p[2] << 8) | (Uint32)p[3];
545 src += 4;
546 srclen -= 4;
547 } break;
548 case ENCODING_UCS4LE:
549 case ENCODING_UTF32LE:
550 {
551 Uint8 *p = (Uint8 *)src;
552 if (srclen < 4) {
553 return SDL_ICONV_EINVAL;
554 }
555 ch = ((Uint32)p[3] << 24) |
556 ((Uint32)p[2] << 16) |
557 ((Uint32)p[1] << 8) | (Uint32)p[0];
558 src += 4;
559 srclen -= 4;
560 } break;
561 }
562
563 // Encode a character
564 switch (cd->dst_fmt) {
565 case ENCODING_ASCII:
566 {
567 Uint8 *p = (Uint8 *)dst;
568 if (dstlen < 1) {
569 return SDL_ICONV_E2BIG;
570 }
571 if (ch > 0x7F) {
572 *p = UNKNOWN_ASCII;
573 } else {
574 *p = (Uint8)ch;
575 }
576 ++dst;
577 --dstlen;
578 } break;
579 case ENCODING_LATIN1:
580 {
581 Uint8 *p = (Uint8 *)dst;
582 if (dstlen < 1) {
583 return SDL_ICONV_E2BIG;
584 }
585 if (ch > 0xFF) {
586 *p = UNKNOWN_ASCII;
587 } else {
588 *p = (Uint8)ch;
589 }
590 ++dst;
591 --dstlen;
592 } break;
593 case ENCODING_UTF8: // RFC 3629
594 {
595 Uint8 *p = (Uint8 *)dst;
596 if (ch > 0x10FFFF) {
597 ch = UNKNOWN_UNICODE;
598 }
599 if (ch <= 0x7F) {
600 if (dstlen < 1) {
601 return SDL_ICONV_E2BIG;
602 }
603 *p = (Uint8)ch;
604 ++dst;
605 --dstlen;
606 } else if (ch <= 0x7FF) {
607 if (dstlen < 2) {
608 return SDL_ICONV_E2BIG;
609 }
610 p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
611 p[1] = 0x80 | (Uint8)(ch & 0x3F);
612 dst += 2;
613 dstlen -= 2;
614 } else if (ch <= 0xFFFF) {
615 if (dstlen < 3) {
616 return SDL_ICONV_E2BIG;
617 }
618 p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
619 p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
620 p[2] = 0x80 | (Uint8)(ch & 0x3F);
621 dst += 3;
622 dstlen -= 3;
623 } else {
624 if (dstlen < 4) {
625 return SDL_ICONV_E2BIG;
626 }
627 p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
628 p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
629 p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
630 p[3] = 0x80 | (Uint8)(ch & 0x3F);
631 dst += 4;
632 dstlen -= 4;
633 }
634 } break;
635 case ENCODING_UTF16BE: // RFC 2781
636 {
637 Uint8 *p = (Uint8 *)dst;
638 if (ch > 0x10FFFF) {
639 ch = UNKNOWN_UNICODE;
640 }
641 if (ch < 0x10000) {
642 if (dstlen < 2) {
643 return SDL_ICONV_E2BIG;
644 }
645 p[0] = (Uint8)(ch >> 8);
646 p[1] = (Uint8)ch;
647 dst += 2;
648 dstlen -= 2;
649 } else {
650 Uint16 W1, W2;
651 if (dstlen < 4) {
652 return SDL_ICONV_E2BIG;
653 }
654 ch = ch - 0x10000;
655 W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
656 W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
657 p[0] = (Uint8)(W1 >> 8);
658 p[1] = (Uint8)W1;
659 p[2] = (Uint8)(W2 >> 8);
660 p[3] = (Uint8)W2;
661 dst += 4;
662 dstlen -= 4;
663 }
664 } break;
665 case ENCODING_UTF16LE: // RFC 2781
666 {
667 Uint8 *p = (Uint8 *)dst;
668 if (ch > 0x10FFFF) {
669 ch = UNKNOWN_UNICODE;
670 }
671 if (ch < 0x10000) {
672 if (dstlen < 2) {
673 return SDL_ICONV_E2BIG;
674 }
675 p[1] = (Uint8)(ch >> 8);
676 p[0] = (Uint8)ch;
677 dst += 2;
678 dstlen -= 2;
679 } else {
680 Uint16 W1, W2;
681 if (dstlen < 4) {
682 return SDL_ICONV_E2BIG;
683 }
684 ch = ch - 0x10000;
685 W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
686 W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
687 p[1] = (Uint8)(W1 >> 8);
688 p[0] = (Uint8)W1;
689 p[3] = (Uint8)(W2 >> 8);
690 p[2] = (Uint8)W2;
691 dst += 4;
692 dstlen -= 4;
693 }
694 } break;
695 case ENCODING_UCS2BE:
696 {
697 Uint8 *p = (Uint8 *)dst;
698 if (ch > 0xFFFF) {
699 ch = UNKNOWN_UNICODE;
700 }
701 if (dstlen < 2) {
702 return SDL_ICONV_E2BIG;
703 }
704 p[0] = (Uint8)(ch >> 8);
705 p[1] = (Uint8)ch;
706 dst += 2;
707 dstlen -= 2;
708 } break;
709 case ENCODING_UCS2LE:
710 {
711 Uint8 *p = (Uint8 *)dst;
712 if (ch > 0xFFFF) {
713 ch = UNKNOWN_UNICODE;
714 }
715 if (dstlen < 2) {
716 return SDL_ICONV_E2BIG;
717 }
718 p[1] = (Uint8)(ch >> 8);
719 p[0] = (Uint8)ch;
720 dst += 2;
721 dstlen -= 2;
722 } break;
723 case ENCODING_UTF32BE:
724 if (ch > 0x10FFFF) {
725 ch = UNKNOWN_UNICODE;
726 }
727 SDL_FALLTHROUGH;
728 case ENCODING_UCS4BE:
729 if (ch > 0x7FFFFFFF) {
730 ch = UNKNOWN_UNICODE;
731 }
732 {
733 Uint8 *p = (Uint8 *)dst;
734 if (dstlen < 4) {
735 return SDL_ICONV_E2BIG;
736 }
737 p[0] = (Uint8)(ch >> 24);
738 p[1] = (Uint8)(ch >> 16);
739 p[2] = (Uint8)(ch >> 8);
740 p[3] = (Uint8)ch;
741 dst += 4;
742 dstlen -= 4;
743 }
744 break;
745 case ENCODING_UTF32LE:
746 if (ch > 0x10FFFF) {
747 ch = UNKNOWN_UNICODE;
748 }
749 SDL_FALLTHROUGH;
750 case ENCODING_UCS4LE:
751 if (ch > 0x7FFFFFFF) {
752 ch = UNKNOWN_UNICODE;
753 }
754 {
755 Uint8 *p = (Uint8 *)dst;
756 if (dstlen < 4) {
757 return SDL_ICONV_E2BIG;
758 }
759 p[3] = (Uint8)(ch >> 24);
760 p[2] = (Uint8)(ch >> 16);
761 p[1] = (Uint8)(ch >> 8);
762 p[0] = (Uint8)ch;
763 dst += 4;
764 dstlen -= 4;
765 }
766 break;
767 }
768
769 // Update state
770 *inbuf = src;
771 *inbytesleft = srclen;
772 *outbuf = dst;
773 *outbytesleft = dstlen;
774 ++total;
775 }
776 return total;
777}
778
779int SDL_iconv_close(SDL_iconv_t cd)
780{
781 if (cd == (SDL_iconv_t)-1) {
782 return -1;
783 }
784 SDL_free(cd);
785 return 0;
786}
787
788#endif // !HAVE_ICONV
789
790char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
791{
792 SDL_iconv_t cd;
793 char *string;
794 size_t stringsize;
795 char *outbuf;
796 size_t outbytesleft;
797 size_t retCode = 0;
798
799 if (!tocode || !*tocode) {
800 tocode = "UTF-8";
801 }
802 if (!fromcode || !*fromcode) {
803 fromcode = "UTF-8";
804 }
805 cd = SDL_iconv_open(tocode, fromcode);
806 if (cd == (SDL_iconv_t)-1) {
807 return NULL;
808 }
809
810 stringsize = inbytesleft;
811 string = (char *)SDL_malloc(stringsize + sizeof(Uint32));
812 if (!string) {
813 SDL_iconv_close(cd);
814 return NULL;
815 }
816 outbuf = string;
817 outbytesleft = stringsize;
818 SDL_memset(outbuf, 0, sizeof(Uint32));
819
820 while (inbytesleft > 0) {
821 const size_t oldinbytesleft = inbytesleft;
822 retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
823 switch (retCode) {
824 case SDL_ICONV_E2BIG:
825 {
826 const ptrdiff_t diff = (ptrdiff_t) (outbuf - string);
827 char *oldstring = string;
828 stringsize *= 2;
829 string = (char *)SDL_realloc(string, stringsize + sizeof(Uint32));
830 if (!string) {
831 SDL_free(oldstring);
832 SDL_iconv_close(cd);
833 return NULL;
834 }
835 outbuf = string + diff;
836 outbytesleft = stringsize - diff;
837 SDL_memset(outbuf, 0, sizeof(Uint32));
838 continue;
839 }
840 case SDL_ICONV_EILSEQ:
841 // Try skipping some input data - not perfect, but...
842 ++inbuf;
843 --inbytesleft;
844 break;
845 case SDL_ICONV_EINVAL:
846 case SDL_ICONV_ERROR:
847 // We can't continue...
848 inbytesleft = 0;
849 break;
850 }
851 // Avoid infinite loops when nothing gets converted
852 if (oldinbytesleft == inbytesleft) {
853 break;
854 }
855 }
856 SDL_memset(outbuf, 0, sizeof(Uint32));
857 SDL_iconv_close(cd);
858
859 return string;
860}
861