1 | /* |
2 | Simple DirectMedia Layer |
3 | Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org> |
4 | |
5 | This software is provided 'as-is', without any express or implied |
6 | warranty. In no event will the authors be held liable for any damages |
7 | arising from the use of this software. |
8 | |
9 | Permission is granted to anyone to use this software for any purpose, |
10 | including commercial applications, and to alter it and redistribute it |
11 | freely, subject to the following restrictions: |
12 | |
13 | 1. The origin of this software must not be misrepresented; you must not |
14 | claim that you wrote the original software. If you use this software |
15 | in a product, an acknowledgment in the product documentation would be |
16 | appreciated but is not required. |
17 | 2. Altered source versions must be plainly marked as such, and must not be |
18 | misrepresented as being the original software. |
19 | 3. This notice may not be removed or altered from any source distribution. |
20 | */ |
21 | #include "SDL_internal.h" |
22 | |
23 | // This file contains portable iconv functions for SDL |
24 | |
25 | #if defined(HAVE_ICONV) && defined(HAVE_ICONV_H) |
26 | #ifndef SDL_USE_LIBICONV |
27 | // Define LIBICONV_PLUG to use iconv from the base instead of ports and avoid linker errors. |
28 | #define LIBICONV_PLUG 1 |
29 | #endif |
30 | #include <iconv.h> |
31 | #include <errno.h> |
32 | |
33 | SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof(iconv_t) <= sizeof(SDL_iconv_t)); |
34 | |
35 | SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode) |
36 | { |
37 | return (SDL_iconv_t)((uintptr_t)iconv_open(tocode, fromcode)); |
38 | } |
39 | |
40 | int SDL_iconv_close(SDL_iconv_t cd) |
41 | { |
42 | if ((size_t)cd == SDL_ICONV_ERROR) { |
43 | return -1; |
44 | } |
45 | return iconv_close((iconv_t)((uintptr_t)cd)); |
46 | } |
47 | |
48 | size_t SDL_iconv(SDL_iconv_t cd, |
49 | const char **inbuf, size_t *inbytesleft, |
50 | char **outbuf, size_t *outbytesleft) |
51 | { |
52 | if ((size_t)cd == SDL_ICONV_ERROR) { |
53 | return SDL_ICONV_ERROR; |
54 | } |
55 | /* iconv's second parameter may or may not be `const char const *` depending on the |
56 | C runtime's whims. Casting to void * seems to make everyone happy, though. */ |
57 | const size_t retCode = iconv((iconv_t)((uintptr_t)cd), (void *)inbuf, inbytesleft, outbuf, outbytesleft); |
58 | if (retCode == (size_t)-1) { |
59 | switch (errno) { |
60 | case E2BIG: |
61 | return SDL_ICONV_E2BIG; |
62 | case EILSEQ: |
63 | return SDL_ICONV_EILSEQ; |
64 | case EINVAL: |
65 | return SDL_ICONV_EINVAL; |
66 | default: |
67 | return SDL_ICONV_ERROR; |
68 | } |
69 | } |
70 | return retCode; |
71 | } |
72 | |
73 | #else |
74 | |
75 | /* Lots of useful information on Unicode at: |
76 | http://www.cl.cam.ac.uk/~mgk25/unicode.html |
77 | */ |
78 | |
79 | #define UNICODE_BOM 0xFEFF |
80 | |
81 | #define UNKNOWN_ASCII '?' |
82 | #define UNKNOWN_UNICODE 0xFFFD |
83 | |
84 | enum |
85 | { |
86 | ENCODING_UNKNOWN, |
87 | ENCODING_ASCII, |
88 | ENCODING_LATIN1, |
89 | ENCODING_UTF8, |
90 | ENCODING_UTF16, // Needs byte order marker |
91 | ENCODING_UTF16BE, |
92 | ENCODING_UTF16LE, |
93 | ENCODING_UTF32, // Needs byte order marker |
94 | ENCODING_UTF32BE, |
95 | ENCODING_UTF32LE, |
96 | ENCODING_UCS2BE, |
97 | ENCODING_UCS2LE, |
98 | ENCODING_UCS4BE, |
99 | ENCODING_UCS4LE, |
100 | }; |
101 | #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
102 | #define ENCODING_UTF16NATIVE ENCODING_UTF16BE |
103 | #define ENCODING_UTF32NATIVE ENCODING_UTF32BE |
104 | #define ENCODING_UCS2NATIVE ENCODING_UCS2BE |
105 | #define ENCODING_UCS4NATIVE ENCODING_UCS4BE |
106 | #else |
107 | #define ENCODING_UTF16NATIVE ENCODING_UTF16LE |
108 | #define ENCODING_UTF32NATIVE ENCODING_UTF32LE |
109 | #define ENCODING_UCS2NATIVE ENCODING_UCS2LE |
110 | #define ENCODING_UCS4NATIVE ENCODING_UCS4LE |
111 | #endif |
112 | |
113 | struct SDL_iconv_data_t |
114 | { |
115 | int src_fmt; |
116 | int dst_fmt; |
117 | }; |
118 | |
119 | static struct |
120 | { |
121 | const char *name; |
122 | int format; |
123 | } encodings[] = { |
124 | /* *INDENT-OFF* */ // clang-format off |
125 | { "ASCII" , ENCODING_ASCII }, |
126 | { "US-ASCII" , ENCODING_ASCII }, |
127 | { "8859-1" , ENCODING_LATIN1 }, |
128 | { "ISO-8859-1" , ENCODING_LATIN1 }, |
129 | #if defined(SDL_PLATFORM_WINDOWS) || defined(SDL_PLATFORM_OS2) |
130 | { "WCHAR_T" , ENCODING_UTF16LE }, |
131 | #else |
132 | { "WCHAR_T" , ENCODING_UCS4NATIVE }, |
133 | #endif |
134 | { "UTF8" , ENCODING_UTF8 }, |
135 | { "UTF-8" , ENCODING_UTF8 }, |
136 | { "UTF16" , ENCODING_UTF16 }, |
137 | { "UTF-16" , ENCODING_UTF16 }, |
138 | { "UTF16BE" , ENCODING_UTF16BE }, |
139 | { "UTF-16BE" , ENCODING_UTF16BE }, |
140 | { "UTF16LE" , ENCODING_UTF16LE }, |
141 | { "UTF-16LE" , ENCODING_UTF16LE }, |
142 | { "UTF32" , ENCODING_UTF32 }, |
143 | { "UTF-32" , ENCODING_UTF32 }, |
144 | { "UTF32BE" , ENCODING_UTF32BE }, |
145 | { "UTF-32BE" , ENCODING_UTF32BE }, |
146 | { "UTF32LE" , ENCODING_UTF32LE }, |
147 | { "UTF-32LE" , ENCODING_UTF32LE }, |
148 | { "UCS2" , ENCODING_UCS2BE }, |
149 | { "UCS-2" , ENCODING_UCS2BE }, |
150 | { "UCS-2LE" , ENCODING_UCS2LE }, |
151 | { "UCS-2BE" , ENCODING_UCS2BE }, |
152 | { "UCS-2-INTERNAL" , ENCODING_UCS2NATIVE }, |
153 | { "UCS4" , ENCODING_UCS4BE }, |
154 | { "UCS-4" , ENCODING_UCS4BE }, |
155 | { "UCS-4LE" , ENCODING_UCS4LE }, |
156 | { "UCS-4BE" , ENCODING_UCS4BE }, |
157 | { "UCS-4-INTERNAL" , ENCODING_UCS4NATIVE }, |
158 | /* *INDENT-ON* */ // clang-format on |
159 | }; |
160 | |
161 | static const char *getlocale(char *buffer, size_t bufsize) |
162 | { |
163 | const char *lang; |
164 | char *ptr; |
165 | |
166 | lang = SDL_getenv("LC_ALL" ); |
167 | if (!lang) { |
168 | lang = SDL_getenv("LC_CTYPE" ); |
169 | } |
170 | if (!lang) { |
171 | lang = SDL_getenv("LC_MESSAGES" ); |
172 | } |
173 | if (!lang) { |
174 | lang = SDL_getenv("LANG" ); |
175 | } |
176 | if (!lang || !*lang || SDL_strcmp(lang, "C" ) == 0) { |
177 | lang = "ASCII" ; |
178 | } |
179 | |
180 | // We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" |
181 | ptr = SDL_strchr(lang, '.'); |
182 | if (ptr) { |
183 | lang = ptr + 1; |
184 | } |
185 | |
186 | SDL_strlcpy(buffer, lang, bufsize); |
187 | ptr = SDL_strchr(buffer, '@'); |
188 | if (ptr) { |
189 | *ptr = '\0'; // chop end of string. |
190 | } |
191 | |
192 | return buffer; |
193 | } |
194 | |
195 | SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode) |
196 | { |
197 | int src_fmt = ENCODING_UNKNOWN; |
198 | int dst_fmt = ENCODING_UNKNOWN; |
199 | int i; |
200 | char fromcode_buffer[64]; |
201 | char tocode_buffer[64]; |
202 | |
203 | if (!fromcode || !*fromcode) { |
204 | fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer)); |
205 | } |
206 | if (!tocode || !*tocode) { |
207 | tocode = getlocale(tocode_buffer, sizeof(tocode_buffer)); |
208 | } |
209 | for (i = 0; i < SDL_arraysize(encodings); ++i) { |
210 | if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) { |
211 | src_fmt = encodings[i].format; |
212 | if (dst_fmt != ENCODING_UNKNOWN) { |
213 | break; |
214 | } |
215 | } |
216 | if (SDL_strcasecmp(tocode, encodings[i].name) == 0) { |
217 | dst_fmt = encodings[i].format; |
218 | if (src_fmt != ENCODING_UNKNOWN) { |
219 | break; |
220 | } |
221 | } |
222 | } |
223 | if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) { |
224 | SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd)); |
225 | if (cd) { |
226 | cd->src_fmt = src_fmt; |
227 | cd->dst_fmt = dst_fmt; |
228 | return cd; |
229 | } |
230 | } |
231 | return (SDL_iconv_t)-1; |
232 | } |
233 | |
234 | size_t SDL_iconv(SDL_iconv_t cd, |
235 | const char **inbuf, size_t *inbytesleft, |
236 | char **outbuf, size_t *outbytesleft) |
237 | { |
238 | // For simplicity, we'll convert everything to and from UCS-4 |
239 | const char *src; |
240 | char *dst; |
241 | size_t srclen, dstlen; |
242 | Uint32 ch = 0; |
243 | size_t total; |
244 | |
245 | if ((size_t)cd == SDL_ICONV_ERROR) { |
246 | return SDL_ICONV_ERROR; |
247 | } |
248 | if (!inbuf || !*inbuf) { |
249 | // Reset the context |
250 | return 0; |
251 | } |
252 | if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) { |
253 | return SDL_ICONV_E2BIG; |
254 | } |
255 | src = *inbuf; |
256 | srclen = (inbytesleft ? *inbytesleft : 0); |
257 | dst = *outbuf; |
258 | dstlen = *outbytesleft; |
259 | |
260 | switch (cd->src_fmt) { |
261 | case ENCODING_UTF16: |
262 | // Scan for a byte order marker |
263 | { |
264 | Uint8 *p = (Uint8 *)src; |
265 | size_t n = srclen / 2; |
266 | while (n) { |
267 | if (p[0] == 0xFF && p[1] == 0xFE) { |
268 | cd->src_fmt = ENCODING_UTF16BE; |
269 | break; |
270 | } else if (p[0] == 0xFE && p[1] == 0xFF) { |
271 | cd->src_fmt = ENCODING_UTF16LE; |
272 | break; |
273 | } |
274 | p += 2; |
275 | --n; |
276 | } |
277 | if (n == 0) { |
278 | // We can't tell, default to host order |
279 | cd->src_fmt = ENCODING_UTF16NATIVE; |
280 | } |
281 | } |
282 | break; |
283 | case ENCODING_UTF32: |
284 | // Scan for a byte order marker |
285 | { |
286 | Uint8 *p = (Uint8 *)src; |
287 | size_t n = srclen / 4; |
288 | while (n) { |
289 | if (p[0] == 0xFF && p[1] == 0xFE && |
290 | p[2] == 0x00 && p[3] == 0x00) { |
291 | cd->src_fmt = ENCODING_UTF32BE; |
292 | break; |
293 | } else if (p[0] == 0x00 && p[1] == 0x00 && |
294 | p[2] == 0xFE && p[3] == 0xFF) { |
295 | cd->src_fmt = ENCODING_UTF32LE; |
296 | break; |
297 | } |
298 | p += 4; |
299 | --n; |
300 | } |
301 | if (n == 0) { |
302 | // We can't tell, default to host order |
303 | cd->src_fmt = ENCODING_UTF32NATIVE; |
304 | } |
305 | } |
306 | break; |
307 | } |
308 | |
309 | switch (cd->dst_fmt) { |
310 | case ENCODING_UTF16: |
311 | // Default to host order, need to add byte order marker |
312 | if (dstlen < 2) { |
313 | return SDL_ICONV_E2BIG; |
314 | } |
315 | *(Uint16 *)dst = UNICODE_BOM; |
316 | dst += 2; |
317 | dstlen -= 2; |
318 | cd->dst_fmt = ENCODING_UTF16NATIVE; |
319 | break; |
320 | case ENCODING_UTF32: |
321 | // Default to host order, need to add byte order marker |
322 | if (dstlen < 4) { |
323 | return SDL_ICONV_E2BIG; |
324 | } |
325 | *(Uint32 *)dst = UNICODE_BOM; |
326 | dst += 4; |
327 | dstlen -= 4; |
328 | cd->dst_fmt = ENCODING_UTF32NATIVE; |
329 | break; |
330 | } |
331 | |
332 | total = 0; |
333 | while (srclen > 0) { |
334 | // Decode a character |
335 | switch (cd->src_fmt) { |
336 | case ENCODING_ASCII: |
337 | { |
338 | Uint8 *p = (Uint8 *)src; |
339 | ch = (Uint32)(p[0] & 0x7F); |
340 | ++src; |
341 | --srclen; |
342 | } break; |
343 | case ENCODING_LATIN1: |
344 | { |
345 | Uint8 *p = (Uint8 *)src; |
346 | ch = (Uint32)p[0]; |
347 | ++src; |
348 | --srclen; |
349 | } break; |
350 | case ENCODING_UTF8: // RFC 3629 |
351 | { |
352 | Uint8 *p = (Uint8 *)src; |
353 | size_t left = 0; |
354 | bool overlong = false; |
355 | if (p[0] >= 0xF0) { |
356 | if ((p[0] & 0xF8) != 0xF0) { |
357 | /* Skip illegal sequences |
358 | return SDL_ICONV_EILSEQ; |
359 | */ |
360 | ch = UNKNOWN_UNICODE; |
361 | } else { |
362 | if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) { |
363 | overlong = true; |
364 | } |
365 | ch = (Uint32)(p[0] & 0x07); |
366 | left = 3; |
367 | } |
368 | } else if (p[0] >= 0xE0) { |
369 | if ((p[0] & 0xF0) != 0xE0) { |
370 | /* Skip illegal sequences |
371 | return SDL_ICONV_EILSEQ; |
372 | */ |
373 | ch = UNKNOWN_UNICODE; |
374 | } else { |
375 | if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) { |
376 | overlong = true; |
377 | } |
378 | ch = (Uint32)(p[0] & 0x0F); |
379 | left = 2; |
380 | } |
381 | } else if (p[0] >= 0xC0) { |
382 | if ((p[0] & 0xE0) != 0xC0) { |
383 | /* Skip illegal sequences |
384 | return SDL_ICONV_EILSEQ; |
385 | */ |
386 | ch = UNKNOWN_UNICODE; |
387 | } else { |
388 | if ((p[0] & 0xDE) == 0xC0) { |
389 | overlong = true; |
390 | } |
391 | ch = (Uint32)(p[0] & 0x1F); |
392 | left = 1; |
393 | } |
394 | } else { |
395 | if (p[0] & 0x80) { |
396 | /* Skip illegal sequences |
397 | return SDL_ICONV_EILSEQ; |
398 | */ |
399 | ch = UNKNOWN_UNICODE; |
400 | } else { |
401 | ch = (Uint32)p[0]; |
402 | } |
403 | } |
404 | ++src; |
405 | --srclen; |
406 | if (srclen < left) { |
407 | return SDL_ICONV_EINVAL; |
408 | } |
409 | while (left--) { |
410 | ++p; |
411 | if ((p[0] & 0xC0) != 0x80) { |
412 | /* Skip illegal sequences |
413 | return SDL_ICONV_EILSEQ; |
414 | */ |
415 | ch = UNKNOWN_UNICODE; |
416 | break; |
417 | } |
418 | ch <<= 6; |
419 | ch |= (p[0] & 0x3F); |
420 | ++src; |
421 | --srclen; |
422 | } |
423 | if (overlong) { |
424 | /* Potential security risk |
425 | return SDL_ICONV_EILSEQ; |
426 | */ |
427 | ch = UNKNOWN_UNICODE; |
428 | } |
429 | if ((ch >= 0xD800 && ch <= 0xDFFF) || |
430 | (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) { |
431 | /* Skip illegal sequences |
432 | return SDL_ICONV_EILSEQ; |
433 | */ |
434 | ch = UNKNOWN_UNICODE; |
435 | } |
436 | } break; |
437 | case ENCODING_UTF16BE: // RFC 2781 |
438 | { |
439 | Uint8 *p = (Uint8 *)src; |
440 | Uint16 W1, W2; |
441 | if (srclen < 2) { |
442 | return SDL_ICONV_EINVAL; |
443 | } |
444 | W1 = ((Uint16)p[0] << 8) | (Uint16)p[1]; |
445 | src += 2; |
446 | srclen -= 2; |
447 | if (W1 < 0xD800 || W1 > 0xDFFF) { |
448 | ch = (Uint32)W1; |
449 | break; |
450 | } |
451 | if (W1 > 0xDBFF) { |
452 | /* Skip illegal sequences |
453 | return SDL_ICONV_EILSEQ; |
454 | */ |
455 | ch = UNKNOWN_UNICODE; |
456 | break; |
457 | } |
458 | if (srclen < 2) { |
459 | return SDL_ICONV_EINVAL; |
460 | } |
461 | p = (Uint8 *)src; |
462 | W2 = ((Uint16)p[0] << 8) | (Uint16)p[1]; |
463 | src += 2; |
464 | srclen -= 2; |
465 | if (W2 < 0xDC00 || W2 > 0xDFFF) { |
466 | /* Skip illegal sequences |
467 | return SDL_ICONV_EILSEQ; |
468 | */ |
469 | ch = UNKNOWN_UNICODE; |
470 | break; |
471 | } |
472 | ch = (((Uint32)(W1 & 0x3FF) << 10) | |
473 | (Uint32)(W2 & 0x3FF)) + |
474 | 0x10000; |
475 | } break; |
476 | case ENCODING_UTF16LE: // RFC 2781 |
477 | { |
478 | Uint8 *p = (Uint8 *)src; |
479 | Uint16 W1, W2; |
480 | if (srclen < 2) { |
481 | return SDL_ICONV_EINVAL; |
482 | } |
483 | W1 = ((Uint16)p[1] << 8) | (Uint16)p[0]; |
484 | src += 2; |
485 | srclen -= 2; |
486 | if (W1 < 0xD800 || W1 > 0xDFFF) { |
487 | ch = (Uint32)W1; |
488 | break; |
489 | } |
490 | if (W1 > 0xDBFF) { |
491 | /* Skip illegal sequences |
492 | return SDL_ICONV_EILSEQ; |
493 | */ |
494 | ch = UNKNOWN_UNICODE; |
495 | break; |
496 | } |
497 | if (srclen < 2) { |
498 | return SDL_ICONV_EINVAL; |
499 | } |
500 | p = (Uint8 *)src; |
501 | W2 = ((Uint16)p[1] << 8) | (Uint16)p[0]; |
502 | src += 2; |
503 | srclen -= 2; |
504 | if (W2 < 0xDC00 || W2 > 0xDFFF) { |
505 | /* Skip illegal sequences |
506 | return SDL_ICONV_EILSEQ; |
507 | */ |
508 | ch = UNKNOWN_UNICODE; |
509 | break; |
510 | } |
511 | ch = (((Uint32)(W1 & 0x3FF) << 10) | |
512 | (Uint32)(W2 & 0x3FF)) + |
513 | 0x10000; |
514 | } break; |
515 | case ENCODING_UCS2LE: |
516 | { |
517 | Uint8 *p = (Uint8 *)src; |
518 | if (srclen < 2) { |
519 | return SDL_ICONV_EINVAL; |
520 | } |
521 | ch = ((Uint32)p[1] << 8) | (Uint32)p[0]; |
522 | src += 2; |
523 | srclen -= 2; |
524 | } break; |
525 | case ENCODING_UCS2BE: |
526 | { |
527 | Uint8 *p = (Uint8 *)src; |
528 | if (srclen < 2) { |
529 | return SDL_ICONV_EINVAL; |
530 | } |
531 | ch = ((Uint32)p[0] << 8) | (Uint32)p[1]; |
532 | src += 2; |
533 | srclen -= 2; |
534 | } break; |
535 | case ENCODING_UCS4BE: |
536 | case ENCODING_UTF32BE: |
537 | { |
538 | Uint8 *p = (Uint8 *)src; |
539 | if (srclen < 4) { |
540 | return SDL_ICONV_EINVAL; |
541 | } |
542 | ch = ((Uint32)p[0] << 24) | |
543 | ((Uint32)p[1] << 16) | |
544 | ((Uint32)p[2] << 8) | (Uint32)p[3]; |
545 | src += 4; |
546 | srclen -= 4; |
547 | } break; |
548 | case ENCODING_UCS4LE: |
549 | case ENCODING_UTF32LE: |
550 | { |
551 | Uint8 *p = (Uint8 *)src; |
552 | if (srclen < 4) { |
553 | return SDL_ICONV_EINVAL; |
554 | } |
555 | ch = ((Uint32)p[3] << 24) | |
556 | ((Uint32)p[2] << 16) | |
557 | ((Uint32)p[1] << 8) | (Uint32)p[0]; |
558 | src += 4; |
559 | srclen -= 4; |
560 | } break; |
561 | } |
562 | |
563 | // Encode a character |
564 | switch (cd->dst_fmt) { |
565 | case ENCODING_ASCII: |
566 | { |
567 | Uint8 *p = (Uint8 *)dst; |
568 | if (dstlen < 1) { |
569 | return SDL_ICONV_E2BIG; |
570 | } |
571 | if (ch > 0x7F) { |
572 | *p = UNKNOWN_ASCII; |
573 | } else { |
574 | *p = (Uint8)ch; |
575 | } |
576 | ++dst; |
577 | --dstlen; |
578 | } break; |
579 | case ENCODING_LATIN1: |
580 | { |
581 | Uint8 *p = (Uint8 *)dst; |
582 | if (dstlen < 1) { |
583 | return SDL_ICONV_E2BIG; |
584 | } |
585 | if (ch > 0xFF) { |
586 | *p = UNKNOWN_ASCII; |
587 | } else { |
588 | *p = (Uint8)ch; |
589 | } |
590 | ++dst; |
591 | --dstlen; |
592 | } break; |
593 | case ENCODING_UTF8: // RFC 3629 |
594 | { |
595 | Uint8 *p = (Uint8 *)dst; |
596 | if (ch > 0x10FFFF) { |
597 | ch = UNKNOWN_UNICODE; |
598 | } |
599 | if (ch <= 0x7F) { |
600 | if (dstlen < 1) { |
601 | return SDL_ICONV_E2BIG; |
602 | } |
603 | *p = (Uint8)ch; |
604 | ++dst; |
605 | --dstlen; |
606 | } else if (ch <= 0x7FF) { |
607 | if (dstlen < 2) { |
608 | return SDL_ICONV_E2BIG; |
609 | } |
610 | p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F); |
611 | p[1] = 0x80 | (Uint8)(ch & 0x3F); |
612 | dst += 2; |
613 | dstlen -= 2; |
614 | } else if (ch <= 0xFFFF) { |
615 | if (dstlen < 3) { |
616 | return SDL_ICONV_E2BIG; |
617 | } |
618 | p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F); |
619 | p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F); |
620 | p[2] = 0x80 | (Uint8)(ch & 0x3F); |
621 | dst += 3; |
622 | dstlen -= 3; |
623 | } else { |
624 | if (dstlen < 4) { |
625 | return SDL_ICONV_E2BIG; |
626 | } |
627 | p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07); |
628 | p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F); |
629 | p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F); |
630 | p[3] = 0x80 | (Uint8)(ch & 0x3F); |
631 | dst += 4; |
632 | dstlen -= 4; |
633 | } |
634 | } break; |
635 | case ENCODING_UTF16BE: // RFC 2781 |
636 | { |
637 | Uint8 *p = (Uint8 *)dst; |
638 | if (ch > 0x10FFFF) { |
639 | ch = UNKNOWN_UNICODE; |
640 | } |
641 | if (ch < 0x10000) { |
642 | if (dstlen < 2) { |
643 | return SDL_ICONV_E2BIG; |
644 | } |
645 | p[0] = (Uint8)(ch >> 8); |
646 | p[1] = (Uint8)ch; |
647 | dst += 2; |
648 | dstlen -= 2; |
649 | } else { |
650 | Uint16 W1, W2; |
651 | if (dstlen < 4) { |
652 | return SDL_ICONV_E2BIG; |
653 | } |
654 | ch = ch - 0x10000; |
655 | W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); |
656 | W2 = 0xDC00 | (Uint16)(ch & 0x3FF); |
657 | p[0] = (Uint8)(W1 >> 8); |
658 | p[1] = (Uint8)W1; |
659 | p[2] = (Uint8)(W2 >> 8); |
660 | p[3] = (Uint8)W2; |
661 | dst += 4; |
662 | dstlen -= 4; |
663 | } |
664 | } break; |
665 | case ENCODING_UTF16LE: // RFC 2781 |
666 | { |
667 | Uint8 *p = (Uint8 *)dst; |
668 | if (ch > 0x10FFFF) { |
669 | ch = UNKNOWN_UNICODE; |
670 | } |
671 | if (ch < 0x10000) { |
672 | if (dstlen < 2) { |
673 | return SDL_ICONV_E2BIG; |
674 | } |
675 | p[1] = (Uint8)(ch >> 8); |
676 | p[0] = (Uint8)ch; |
677 | dst += 2; |
678 | dstlen -= 2; |
679 | } else { |
680 | Uint16 W1, W2; |
681 | if (dstlen < 4) { |
682 | return SDL_ICONV_E2BIG; |
683 | } |
684 | ch = ch - 0x10000; |
685 | W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); |
686 | W2 = 0xDC00 | (Uint16)(ch & 0x3FF); |
687 | p[1] = (Uint8)(W1 >> 8); |
688 | p[0] = (Uint8)W1; |
689 | p[3] = (Uint8)(W2 >> 8); |
690 | p[2] = (Uint8)W2; |
691 | dst += 4; |
692 | dstlen -= 4; |
693 | } |
694 | } break; |
695 | case ENCODING_UCS2BE: |
696 | { |
697 | Uint8 *p = (Uint8 *)dst; |
698 | if (ch > 0xFFFF) { |
699 | ch = UNKNOWN_UNICODE; |
700 | } |
701 | if (dstlen < 2) { |
702 | return SDL_ICONV_E2BIG; |
703 | } |
704 | p[0] = (Uint8)(ch >> 8); |
705 | p[1] = (Uint8)ch; |
706 | dst += 2; |
707 | dstlen -= 2; |
708 | } break; |
709 | case ENCODING_UCS2LE: |
710 | { |
711 | Uint8 *p = (Uint8 *)dst; |
712 | if (ch > 0xFFFF) { |
713 | ch = UNKNOWN_UNICODE; |
714 | } |
715 | if (dstlen < 2) { |
716 | return SDL_ICONV_E2BIG; |
717 | } |
718 | p[1] = (Uint8)(ch >> 8); |
719 | p[0] = (Uint8)ch; |
720 | dst += 2; |
721 | dstlen -= 2; |
722 | } break; |
723 | case ENCODING_UTF32BE: |
724 | if (ch > 0x10FFFF) { |
725 | ch = UNKNOWN_UNICODE; |
726 | } |
727 | SDL_FALLTHROUGH; |
728 | case ENCODING_UCS4BE: |
729 | if (ch > 0x7FFFFFFF) { |
730 | ch = UNKNOWN_UNICODE; |
731 | } |
732 | { |
733 | Uint8 *p = (Uint8 *)dst; |
734 | if (dstlen < 4) { |
735 | return SDL_ICONV_E2BIG; |
736 | } |
737 | p[0] = (Uint8)(ch >> 24); |
738 | p[1] = (Uint8)(ch >> 16); |
739 | p[2] = (Uint8)(ch >> 8); |
740 | p[3] = (Uint8)ch; |
741 | dst += 4; |
742 | dstlen -= 4; |
743 | } |
744 | break; |
745 | case ENCODING_UTF32LE: |
746 | if (ch > 0x10FFFF) { |
747 | ch = UNKNOWN_UNICODE; |
748 | } |
749 | SDL_FALLTHROUGH; |
750 | case ENCODING_UCS4LE: |
751 | if (ch > 0x7FFFFFFF) { |
752 | ch = UNKNOWN_UNICODE; |
753 | } |
754 | { |
755 | Uint8 *p = (Uint8 *)dst; |
756 | if (dstlen < 4) { |
757 | return SDL_ICONV_E2BIG; |
758 | } |
759 | p[3] = (Uint8)(ch >> 24); |
760 | p[2] = (Uint8)(ch >> 16); |
761 | p[1] = (Uint8)(ch >> 8); |
762 | p[0] = (Uint8)ch; |
763 | dst += 4; |
764 | dstlen -= 4; |
765 | } |
766 | break; |
767 | } |
768 | |
769 | // Update state |
770 | *inbuf = src; |
771 | *inbytesleft = srclen; |
772 | *outbuf = dst; |
773 | *outbytesleft = dstlen; |
774 | ++total; |
775 | } |
776 | return total; |
777 | } |
778 | |
779 | int SDL_iconv_close(SDL_iconv_t cd) |
780 | { |
781 | if (cd == (SDL_iconv_t)-1) { |
782 | return -1; |
783 | } |
784 | SDL_free(cd); |
785 | return 0; |
786 | } |
787 | |
788 | #endif // !HAVE_ICONV |
789 | |
790 | char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft) |
791 | { |
792 | SDL_iconv_t cd; |
793 | char *string; |
794 | size_t stringsize; |
795 | char *outbuf; |
796 | size_t outbytesleft; |
797 | size_t retCode = 0; |
798 | |
799 | if (!tocode || !*tocode) { |
800 | tocode = "UTF-8" ; |
801 | } |
802 | if (!fromcode || !*fromcode) { |
803 | fromcode = "UTF-8" ; |
804 | } |
805 | cd = SDL_iconv_open(tocode, fromcode); |
806 | if (cd == (SDL_iconv_t)-1) { |
807 | return NULL; |
808 | } |
809 | |
810 | stringsize = inbytesleft; |
811 | string = (char *)SDL_malloc(stringsize + sizeof(Uint32)); |
812 | if (!string) { |
813 | SDL_iconv_close(cd); |
814 | return NULL; |
815 | } |
816 | outbuf = string; |
817 | outbytesleft = stringsize; |
818 | SDL_memset(outbuf, 0, sizeof(Uint32)); |
819 | |
820 | while (inbytesleft > 0) { |
821 | const size_t oldinbytesleft = inbytesleft; |
822 | retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); |
823 | switch (retCode) { |
824 | case SDL_ICONV_E2BIG: |
825 | { |
826 | const ptrdiff_t diff = (ptrdiff_t) (outbuf - string); |
827 | char *oldstring = string; |
828 | stringsize *= 2; |
829 | string = (char *)SDL_realloc(string, stringsize + sizeof(Uint32)); |
830 | if (!string) { |
831 | SDL_free(oldstring); |
832 | SDL_iconv_close(cd); |
833 | return NULL; |
834 | } |
835 | outbuf = string + diff; |
836 | outbytesleft = stringsize - diff; |
837 | SDL_memset(outbuf, 0, sizeof(Uint32)); |
838 | continue; |
839 | } |
840 | case SDL_ICONV_EILSEQ: |
841 | // Try skipping some input data - not perfect, but... |
842 | ++inbuf; |
843 | --inbytesleft; |
844 | break; |
845 | case SDL_ICONV_EINVAL: |
846 | case SDL_ICONV_ERROR: |
847 | // We can't continue... |
848 | inbytesleft = 0; |
849 | break; |
850 | } |
851 | // Avoid infinite loops when nothing gets converted |
852 | if (oldinbytesleft == inbytesleft) { |
853 | break; |
854 | } |
855 | } |
856 | SDL_memset(outbuf, 0, sizeof(Uint32)); |
857 | SDL_iconv_close(cd); |
858 | |
859 | return string; |
860 | } |
861 | |