1 | /* |
2 | Simple DirectMedia Layer |
3 | Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org> |
4 | |
5 | This software is provided 'as-is', without any express or implied |
6 | warranty. In no event will the authors be held liable for any damages |
7 | arising from the use of this software. |
8 | |
9 | Permission is granted to anyone to use this software for any purpose, |
10 | including commercial applications, and to alter it and redistribute it |
11 | freely, subject to the following restrictions: |
12 | |
13 | 1. The origin of this software must not be misrepresented; you must not |
14 | claim that you wrote the original software. If you use this software |
15 | in a product, an acknowledgment in the product documentation would be |
16 | appreciated but is not required. |
17 | 2. Altered source versions must be plainly marked as such, and must not be |
18 | misrepresented as being the original software. |
19 | 3. This notice may not be removed or altered from any source distribution. |
20 | */ |
21 | |
22 | #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS) |
23 | #define SDL_DISABLE_ANALYZE_MACROS 1 |
24 | #endif |
25 | |
26 | #include "../SDL_internal.h" |
27 | |
28 | /* This file contains portable iconv functions for SDL */ |
29 | |
30 | #include "SDL_stdinc.h" |
31 | #include "SDL_endian.h" |
32 | |
33 | #if defined(HAVE_ICONV) && defined(HAVE_ICONV_H) |
34 | #ifdef __FreeBSD__ |
35 | /* Define LIBICONV_PLUG to use iconv from the base instead of ports and avoid linker errors. */ |
36 | #define LIBICONV_PLUG 1 |
37 | #endif |
38 | #include <iconv.h> |
39 | |
40 | /* Depending on which standard the iconv() was implemented with, |
41 | iconv() may or may not use const char ** for the inbuf param. |
42 | If we get this wrong, it's just a warning, so no big deal. |
43 | */ |
44 | #if defined(_XGP6) || defined(__APPLE__) || defined(__RISCOS__) || defined(__FREEBSD__) || \ |
45 | defined(__EMSCRIPTEN__) || \ |
46 | (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) || \ |
47 | (defined(_NEWLIB_VERSION))) |
48 | #define ICONV_INBUF_NONCONST |
49 | #endif |
50 | |
51 | #include <errno.h> |
52 | |
53 | SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof (iconv_t) <= sizeof (SDL_iconv_t)); |
54 | |
55 | SDL_iconv_t |
56 | SDL_iconv_open(const char *tocode, const char *fromcode) |
57 | { |
58 | return (SDL_iconv_t) ((size_t) iconv_open(tocode, fromcode)); |
59 | } |
60 | |
61 | int |
62 | SDL_iconv_close(SDL_iconv_t cd) |
63 | { |
64 | return iconv_close((iconv_t) ((size_t) cd)); |
65 | } |
66 | |
67 | size_t |
68 | SDL_iconv(SDL_iconv_t cd, |
69 | const char **inbuf, size_t * inbytesleft, |
70 | char **outbuf, size_t * outbytesleft) |
71 | { |
72 | size_t retCode; |
73 | #ifdef ICONV_INBUF_NONCONST |
74 | retCode = iconv((iconv_t) ((size_t) cd), (char **) inbuf, inbytesleft, outbuf, outbytesleft); |
75 | #else |
76 | retCode = iconv((iconv_t) ((size_t) cd), inbuf, inbytesleft, outbuf, outbytesleft); |
77 | #endif |
78 | if (retCode == (size_t) - 1) { |
79 | switch (errno) { |
80 | case E2BIG: |
81 | return SDL_ICONV_E2BIG; |
82 | case EILSEQ: |
83 | return SDL_ICONV_EILSEQ; |
84 | case EINVAL: |
85 | return SDL_ICONV_EINVAL; |
86 | default: |
87 | return SDL_ICONV_ERROR; |
88 | } |
89 | } |
90 | return retCode; |
91 | } |
92 | |
93 | #else |
94 | |
95 | /* Lots of useful information on Unicode at: |
96 | http://www.cl.cam.ac.uk/~mgk25/unicode.html |
97 | */ |
98 | |
99 | #define UNICODE_BOM 0xFEFF |
100 | |
101 | #define UNKNOWN_ASCII '?' |
102 | #define UNKNOWN_UNICODE 0xFFFD |
103 | |
104 | enum |
105 | { |
106 | ENCODING_UNKNOWN, |
107 | ENCODING_ASCII, |
108 | ENCODING_LATIN1, |
109 | ENCODING_UTF8, |
110 | ENCODING_UTF16, /* Needs byte order marker */ |
111 | ENCODING_UTF16BE, |
112 | ENCODING_UTF16LE, |
113 | ENCODING_UTF32, /* Needs byte order marker */ |
114 | ENCODING_UTF32BE, |
115 | ENCODING_UTF32LE, |
116 | ENCODING_UCS2BE, |
117 | ENCODING_UCS2LE, |
118 | ENCODING_UCS4BE, |
119 | ENCODING_UCS4LE, |
120 | }; |
121 | #if SDL_BYTEORDER == SDL_BIG_ENDIAN |
122 | #define ENCODING_UTF16NATIVE ENCODING_UTF16BE |
123 | #define ENCODING_UTF32NATIVE ENCODING_UTF32BE |
124 | #define ENCODING_UCS2NATIVE ENCODING_UCS2BE |
125 | #define ENCODING_UCS4NATIVE ENCODING_UCS4BE |
126 | #else |
127 | #define ENCODING_UTF16NATIVE ENCODING_UTF16LE |
128 | #define ENCODING_UTF32NATIVE ENCODING_UTF32LE |
129 | #define ENCODING_UCS2NATIVE ENCODING_UCS2LE |
130 | #define ENCODING_UCS4NATIVE ENCODING_UCS4LE |
131 | #endif |
132 | |
133 | struct _SDL_iconv_t |
134 | { |
135 | int src_fmt; |
136 | int dst_fmt; |
137 | }; |
138 | |
139 | static struct |
140 | { |
141 | const char *name; |
142 | int format; |
143 | } encodings[] = { |
144 | /* *INDENT-OFF* */ |
145 | { "ASCII" , ENCODING_ASCII }, |
146 | { "US-ASCII" , ENCODING_ASCII }, |
147 | { "8859-1" , ENCODING_LATIN1 }, |
148 | { "ISO-8859-1" , ENCODING_LATIN1 }, |
149 | { "UTF8" , ENCODING_UTF8 }, |
150 | { "UTF-8" , ENCODING_UTF8 }, |
151 | { "UTF16" , ENCODING_UTF16 }, |
152 | { "UTF-16" , ENCODING_UTF16 }, |
153 | { "UTF16BE" , ENCODING_UTF16BE }, |
154 | { "UTF-16BE" , ENCODING_UTF16BE }, |
155 | { "UTF16LE" , ENCODING_UTF16LE }, |
156 | { "UTF-16LE" , ENCODING_UTF16LE }, |
157 | { "UTF32" , ENCODING_UTF32 }, |
158 | { "UTF-32" , ENCODING_UTF32 }, |
159 | { "UTF32BE" , ENCODING_UTF32BE }, |
160 | { "UTF-32BE" , ENCODING_UTF32BE }, |
161 | { "UTF32LE" , ENCODING_UTF32LE }, |
162 | { "UTF-32LE" , ENCODING_UTF32LE }, |
163 | { "UCS2" , ENCODING_UCS2BE }, |
164 | { "UCS-2" , ENCODING_UCS2BE }, |
165 | { "UCS-2LE" , ENCODING_UCS2LE }, |
166 | { "UCS-2BE" , ENCODING_UCS2BE }, |
167 | { "UCS-2-INTERNAL" , ENCODING_UCS2NATIVE }, |
168 | { "UCS4" , ENCODING_UCS4BE }, |
169 | { "UCS-4" , ENCODING_UCS4BE }, |
170 | { "UCS-4LE" , ENCODING_UCS4LE }, |
171 | { "UCS-4BE" , ENCODING_UCS4BE }, |
172 | { "UCS-4-INTERNAL" , ENCODING_UCS4NATIVE }, |
173 | /* *INDENT-ON* */ |
174 | }; |
175 | |
176 | static const char * |
177 | getlocale(char *buffer, size_t bufsize) |
178 | { |
179 | const char *lang; |
180 | char *ptr; |
181 | |
182 | lang = SDL_getenv("LC_ALL" ); |
183 | if (!lang) { |
184 | lang = SDL_getenv("LC_CTYPE" ); |
185 | } |
186 | if (!lang) { |
187 | lang = SDL_getenv("LC_MESSAGES" ); |
188 | } |
189 | if (!lang) { |
190 | lang = SDL_getenv("LANG" ); |
191 | } |
192 | if (!lang || !*lang || SDL_strcmp(lang, "C" ) == 0) { |
193 | lang = "ASCII" ; |
194 | } |
195 | |
196 | /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */ |
197 | ptr = SDL_strchr(lang, '.'); |
198 | if (ptr != NULL) { |
199 | lang = ptr + 1; |
200 | } |
201 | |
202 | SDL_strlcpy(buffer, lang, bufsize); |
203 | ptr = SDL_strchr(buffer, '@'); |
204 | if (ptr != NULL) { |
205 | *ptr = '\0'; /* chop end of string. */ |
206 | } |
207 | |
208 | return buffer; |
209 | } |
210 | |
211 | SDL_iconv_t |
212 | SDL_iconv_open(const char *tocode, const char *fromcode) |
213 | { |
214 | int src_fmt = ENCODING_UNKNOWN; |
215 | int dst_fmt = ENCODING_UNKNOWN; |
216 | int i; |
217 | char fromcode_buffer[64]; |
218 | char tocode_buffer[64]; |
219 | |
220 | if (!fromcode || !*fromcode) { |
221 | fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer)); |
222 | } |
223 | if (!tocode || !*tocode) { |
224 | tocode = getlocale(tocode_buffer, sizeof(tocode_buffer)); |
225 | } |
226 | for (i = 0; i < SDL_arraysize(encodings); ++i) { |
227 | if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) { |
228 | src_fmt = encodings[i].format; |
229 | if (dst_fmt != ENCODING_UNKNOWN) { |
230 | break; |
231 | } |
232 | } |
233 | if (SDL_strcasecmp(tocode, encodings[i].name) == 0) { |
234 | dst_fmt = encodings[i].format; |
235 | if (src_fmt != ENCODING_UNKNOWN) { |
236 | break; |
237 | } |
238 | } |
239 | } |
240 | if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) { |
241 | SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd)); |
242 | if (cd) { |
243 | cd->src_fmt = src_fmt; |
244 | cd->dst_fmt = dst_fmt; |
245 | return cd; |
246 | } |
247 | } |
248 | return (SDL_iconv_t) - 1; |
249 | } |
250 | |
251 | size_t |
252 | SDL_iconv(SDL_iconv_t cd, |
253 | const char **inbuf, size_t * inbytesleft, |
254 | char **outbuf, size_t * outbytesleft) |
255 | { |
256 | /* For simplicity, we'll convert everything to and from UCS-4 */ |
257 | const char *src; |
258 | char *dst; |
259 | size_t srclen, dstlen; |
260 | Uint32 ch = 0; |
261 | size_t total; |
262 | |
263 | if (!inbuf || !*inbuf) { |
264 | /* Reset the context */ |
265 | return 0; |
266 | } |
267 | if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) { |
268 | return SDL_ICONV_E2BIG; |
269 | } |
270 | src = *inbuf; |
271 | srclen = (inbytesleft ? *inbytesleft : 0); |
272 | dst = *outbuf; |
273 | dstlen = *outbytesleft; |
274 | |
275 | switch (cd->src_fmt) { |
276 | case ENCODING_UTF16: |
277 | /* Scan for a byte order marker */ |
278 | { |
279 | Uint8 *p = (Uint8 *) src; |
280 | size_t n = srclen / 2; |
281 | while (n) { |
282 | if (p[0] == 0xFF && p[1] == 0xFE) { |
283 | cd->src_fmt = ENCODING_UTF16BE; |
284 | break; |
285 | } else if (p[0] == 0xFE && p[1] == 0xFF) { |
286 | cd->src_fmt = ENCODING_UTF16LE; |
287 | break; |
288 | } |
289 | p += 2; |
290 | --n; |
291 | } |
292 | if (n == 0) { |
293 | /* We can't tell, default to host order */ |
294 | cd->src_fmt = ENCODING_UTF16NATIVE; |
295 | } |
296 | } |
297 | break; |
298 | case ENCODING_UTF32: |
299 | /* Scan for a byte order marker */ |
300 | { |
301 | Uint8 *p = (Uint8 *) src; |
302 | size_t n = srclen / 4; |
303 | while (n) { |
304 | if (p[0] == 0xFF && p[1] == 0xFE && |
305 | p[2] == 0x00 && p[3] == 0x00) { |
306 | cd->src_fmt = ENCODING_UTF32BE; |
307 | break; |
308 | } else if (p[0] == 0x00 && p[1] == 0x00 && |
309 | p[2] == 0xFE && p[3] == 0xFF) { |
310 | cd->src_fmt = ENCODING_UTF32LE; |
311 | break; |
312 | } |
313 | p += 4; |
314 | --n; |
315 | } |
316 | if (n == 0) { |
317 | /* We can't tell, default to host order */ |
318 | cd->src_fmt = ENCODING_UTF32NATIVE; |
319 | } |
320 | } |
321 | break; |
322 | } |
323 | |
324 | switch (cd->dst_fmt) { |
325 | case ENCODING_UTF16: |
326 | /* Default to host order, need to add byte order marker */ |
327 | if (dstlen < 2) { |
328 | return SDL_ICONV_E2BIG; |
329 | } |
330 | *(Uint16 *) dst = UNICODE_BOM; |
331 | dst += 2; |
332 | dstlen -= 2; |
333 | cd->dst_fmt = ENCODING_UTF16NATIVE; |
334 | break; |
335 | case ENCODING_UTF32: |
336 | /* Default to host order, need to add byte order marker */ |
337 | if (dstlen < 4) { |
338 | return SDL_ICONV_E2BIG; |
339 | } |
340 | *(Uint32 *) dst = UNICODE_BOM; |
341 | dst += 4; |
342 | dstlen -= 4; |
343 | cd->dst_fmt = ENCODING_UTF32NATIVE; |
344 | break; |
345 | } |
346 | |
347 | total = 0; |
348 | while (srclen > 0) { |
349 | /* Decode a character */ |
350 | switch (cd->src_fmt) { |
351 | case ENCODING_ASCII: |
352 | { |
353 | Uint8 *p = (Uint8 *) src; |
354 | ch = (Uint32) (p[0] & 0x7F); |
355 | ++src; |
356 | --srclen; |
357 | } |
358 | break; |
359 | case ENCODING_LATIN1: |
360 | { |
361 | Uint8 *p = (Uint8 *) src; |
362 | ch = (Uint32) p[0]; |
363 | ++src; |
364 | --srclen; |
365 | } |
366 | break; |
367 | case ENCODING_UTF8: /* RFC 3629 */ |
368 | { |
369 | Uint8 *p = (Uint8 *) src; |
370 | size_t left = 0; |
371 | SDL_bool overlong = SDL_FALSE; |
372 | if (p[0] >= 0xF0) { |
373 | if ((p[0] & 0xF8) != 0xF0) { |
374 | /* Skip illegal sequences |
375 | return SDL_ICONV_EILSEQ; |
376 | */ |
377 | ch = UNKNOWN_UNICODE; |
378 | } else { |
379 | if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) { |
380 | overlong = SDL_TRUE; |
381 | } |
382 | ch = (Uint32) (p[0] & 0x07); |
383 | left = 3; |
384 | } |
385 | } else if (p[0] >= 0xE0) { |
386 | if ((p[0] & 0xF0) != 0xE0) { |
387 | /* Skip illegal sequences |
388 | return SDL_ICONV_EILSEQ; |
389 | */ |
390 | ch = UNKNOWN_UNICODE; |
391 | } else { |
392 | if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) { |
393 | overlong = SDL_TRUE; |
394 | } |
395 | ch = (Uint32) (p[0] & 0x0F); |
396 | left = 2; |
397 | } |
398 | } else if (p[0] >= 0xC0) { |
399 | if ((p[0] & 0xE0) != 0xC0) { |
400 | /* Skip illegal sequences |
401 | return SDL_ICONV_EILSEQ; |
402 | */ |
403 | ch = UNKNOWN_UNICODE; |
404 | } else { |
405 | if ((p[0] & 0xDE) == 0xC0) { |
406 | overlong = SDL_TRUE; |
407 | } |
408 | ch = (Uint32) (p[0] & 0x1F); |
409 | left = 1; |
410 | } |
411 | } else { |
412 | if ((p[0] & 0x80) != 0x00) { |
413 | /* Skip illegal sequences |
414 | return SDL_ICONV_EILSEQ; |
415 | */ |
416 | ch = UNKNOWN_UNICODE; |
417 | } else { |
418 | ch = (Uint32) p[0]; |
419 | } |
420 | } |
421 | ++src; |
422 | --srclen; |
423 | if (srclen < left) { |
424 | return SDL_ICONV_EINVAL; |
425 | } |
426 | while (left--) { |
427 | ++p; |
428 | if ((p[0] & 0xC0) != 0x80) { |
429 | /* Skip illegal sequences |
430 | return SDL_ICONV_EILSEQ; |
431 | */ |
432 | ch = UNKNOWN_UNICODE; |
433 | break; |
434 | } |
435 | ch <<= 6; |
436 | ch |= (p[0] & 0x3F); |
437 | ++src; |
438 | --srclen; |
439 | } |
440 | if (overlong) { |
441 | /* Potential security risk |
442 | return SDL_ICONV_EILSEQ; |
443 | */ |
444 | ch = UNKNOWN_UNICODE; |
445 | } |
446 | if ((ch >= 0xD800 && ch <= 0xDFFF) || |
447 | (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) { |
448 | /* Skip illegal sequences |
449 | return SDL_ICONV_EILSEQ; |
450 | */ |
451 | ch = UNKNOWN_UNICODE; |
452 | } |
453 | } |
454 | break; |
455 | case ENCODING_UTF16BE: /* RFC 2781 */ |
456 | { |
457 | Uint8 *p = (Uint8 *) src; |
458 | Uint16 W1, W2; |
459 | if (srclen < 2) { |
460 | return SDL_ICONV_EINVAL; |
461 | } |
462 | W1 = ((Uint16) p[0] << 8) | (Uint16) p[1]; |
463 | src += 2; |
464 | srclen -= 2; |
465 | if (W1 < 0xD800 || W1 > 0xDFFF) { |
466 | ch = (Uint32) W1; |
467 | break; |
468 | } |
469 | if (W1 > 0xDBFF) { |
470 | /* Skip illegal sequences |
471 | return SDL_ICONV_EILSEQ; |
472 | */ |
473 | ch = UNKNOWN_UNICODE; |
474 | break; |
475 | } |
476 | if (srclen < 2) { |
477 | return SDL_ICONV_EINVAL; |
478 | } |
479 | p = (Uint8 *) src; |
480 | W2 = ((Uint16) p[0] << 8) | (Uint16) p[1]; |
481 | src += 2; |
482 | srclen -= 2; |
483 | if (W2 < 0xDC00 || W2 > 0xDFFF) { |
484 | /* Skip illegal sequences |
485 | return SDL_ICONV_EILSEQ; |
486 | */ |
487 | ch = UNKNOWN_UNICODE; |
488 | break; |
489 | } |
490 | ch = (((Uint32) (W1 & 0x3FF) << 10) | |
491 | (Uint32) (W2 & 0x3FF)) + 0x10000; |
492 | } |
493 | break; |
494 | case ENCODING_UTF16LE: /* RFC 2781 */ |
495 | { |
496 | Uint8 *p = (Uint8 *) src; |
497 | Uint16 W1, W2; |
498 | if (srclen < 2) { |
499 | return SDL_ICONV_EINVAL; |
500 | } |
501 | W1 = ((Uint16) p[1] << 8) | (Uint16) p[0]; |
502 | src += 2; |
503 | srclen -= 2; |
504 | if (W1 < 0xD800 || W1 > 0xDFFF) { |
505 | ch = (Uint32) W1; |
506 | break; |
507 | } |
508 | if (W1 > 0xDBFF) { |
509 | /* Skip illegal sequences |
510 | return SDL_ICONV_EILSEQ; |
511 | */ |
512 | ch = UNKNOWN_UNICODE; |
513 | break; |
514 | } |
515 | if (srclen < 2) { |
516 | return SDL_ICONV_EINVAL; |
517 | } |
518 | p = (Uint8 *) src; |
519 | W2 = ((Uint16) p[1] << 8) | (Uint16) p[0]; |
520 | src += 2; |
521 | srclen -= 2; |
522 | if (W2 < 0xDC00 || W2 > 0xDFFF) { |
523 | /* Skip illegal sequences |
524 | return SDL_ICONV_EILSEQ; |
525 | */ |
526 | ch = UNKNOWN_UNICODE; |
527 | break; |
528 | } |
529 | ch = (((Uint32) (W1 & 0x3FF) << 10) | |
530 | (Uint32) (W2 & 0x3FF)) + 0x10000; |
531 | } |
532 | break; |
533 | case ENCODING_UCS2LE: |
534 | { |
535 | Uint8 *p = (Uint8 *) src; |
536 | if (srclen < 2) { |
537 | return SDL_ICONV_EINVAL; |
538 | } |
539 | ch = ((Uint32) p[1] << 8) | (Uint32) p[0]; |
540 | src += 2; |
541 | srclen -= 2; |
542 | } |
543 | break; |
544 | case ENCODING_UCS2BE: |
545 | { |
546 | Uint8 *p = (Uint8 *) src; |
547 | if (srclen < 2) { |
548 | return SDL_ICONV_EINVAL; |
549 | } |
550 | ch = ((Uint32) p[0] << 8) | (Uint32) p[1]; |
551 | src += 2; |
552 | srclen -= 2; |
553 | } |
554 | break; |
555 | case ENCODING_UCS4BE: |
556 | case ENCODING_UTF32BE: |
557 | { |
558 | Uint8 *p = (Uint8 *) src; |
559 | if (srclen < 4) { |
560 | return SDL_ICONV_EINVAL; |
561 | } |
562 | ch = ((Uint32) p[0] << 24) | |
563 | ((Uint32) p[1] << 16) | |
564 | ((Uint32) p[2] << 8) | (Uint32) p[3]; |
565 | src += 4; |
566 | srclen -= 4; |
567 | } |
568 | break; |
569 | case ENCODING_UCS4LE: |
570 | case ENCODING_UTF32LE: |
571 | { |
572 | Uint8 *p = (Uint8 *) src; |
573 | if (srclen < 4) { |
574 | return SDL_ICONV_EINVAL; |
575 | } |
576 | ch = ((Uint32) p[3] << 24) | |
577 | ((Uint32) p[2] << 16) | |
578 | ((Uint32) p[1] << 8) | (Uint32) p[0]; |
579 | src += 4; |
580 | srclen -= 4; |
581 | } |
582 | break; |
583 | } |
584 | |
585 | /* Encode a character */ |
586 | switch (cd->dst_fmt) { |
587 | case ENCODING_ASCII: |
588 | { |
589 | Uint8 *p = (Uint8 *) dst; |
590 | if (dstlen < 1) { |
591 | return SDL_ICONV_E2BIG; |
592 | } |
593 | if (ch > 0x7F) { |
594 | *p = UNKNOWN_ASCII; |
595 | } else { |
596 | *p = (Uint8) ch; |
597 | } |
598 | ++dst; |
599 | --dstlen; |
600 | } |
601 | break; |
602 | case ENCODING_LATIN1: |
603 | { |
604 | Uint8 *p = (Uint8 *) dst; |
605 | if (dstlen < 1) { |
606 | return SDL_ICONV_E2BIG; |
607 | } |
608 | if (ch > 0xFF) { |
609 | *p = UNKNOWN_ASCII; |
610 | } else { |
611 | *p = (Uint8) ch; |
612 | } |
613 | ++dst; |
614 | --dstlen; |
615 | } |
616 | break; |
617 | case ENCODING_UTF8: /* RFC 3629 */ |
618 | { |
619 | Uint8 *p = (Uint8 *) dst; |
620 | if (ch > 0x10FFFF) { |
621 | ch = UNKNOWN_UNICODE; |
622 | } |
623 | if (ch <= 0x7F) { |
624 | if (dstlen < 1) { |
625 | return SDL_ICONV_E2BIG; |
626 | } |
627 | *p = (Uint8) ch; |
628 | ++dst; |
629 | --dstlen; |
630 | } else if (ch <= 0x7FF) { |
631 | if (dstlen < 2) { |
632 | return SDL_ICONV_E2BIG; |
633 | } |
634 | p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F); |
635 | p[1] = 0x80 | (Uint8) (ch & 0x3F); |
636 | dst += 2; |
637 | dstlen -= 2; |
638 | } else if (ch <= 0xFFFF) { |
639 | if (dstlen < 3) { |
640 | return SDL_ICONV_E2BIG; |
641 | } |
642 | p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F); |
643 | p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F); |
644 | p[2] = 0x80 | (Uint8) (ch & 0x3F); |
645 | dst += 3; |
646 | dstlen -= 3; |
647 | } else { |
648 | if (dstlen < 4) { |
649 | return SDL_ICONV_E2BIG; |
650 | } |
651 | p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07); |
652 | p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F); |
653 | p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F); |
654 | p[3] = 0x80 | (Uint8) (ch & 0x3F); |
655 | dst += 4; |
656 | dstlen -= 4; |
657 | } |
658 | } |
659 | break; |
660 | case ENCODING_UTF16BE: /* RFC 2781 */ |
661 | { |
662 | Uint8 *p = (Uint8 *) dst; |
663 | if (ch > 0x10FFFF) { |
664 | ch = UNKNOWN_UNICODE; |
665 | } |
666 | if (ch < 0x10000) { |
667 | if (dstlen < 2) { |
668 | return SDL_ICONV_E2BIG; |
669 | } |
670 | p[0] = (Uint8) (ch >> 8); |
671 | p[1] = (Uint8) ch; |
672 | dst += 2; |
673 | dstlen -= 2; |
674 | } else { |
675 | Uint16 W1, W2; |
676 | if (dstlen < 4) { |
677 | return SDL_ICONV_E2BIG; |
678 | } |
679 | ch = ch - 0x10000; |
680 | W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF); |
681 | W2 = 0xDC00 | (Uint16) (ch & 0x3FF); |
682 | p[0] = (Uint8) (W1 >> 8); |
683 | p[1] = (Uint8) W1; |
684 | p[2] = (Uint8) (W2 >> 8); |
685 | p[3] = (Uint8) W2; |
686 | dst += 4; |
687 | dstlen -= 4; |
688 | } |
689 | } |
690 | break; |
691 | case ENCODING_UTF16LE: /* RFC 2781 */ |
692 | { |
693 | Uint8 *p = (Uint8 *) dst; |
694 | if (ch > 0x10FFFF) { |
695 | ch = UNKNOWN_UNICODE; |
696 | } |
697 | if (ch < 0x10000) { |
698 | if (dstlen < 2) { |
699 | return SDL_ICONV_E2BIG; |
700 | } |
701 | p[1] = (Uint8) (ch >> 8); |
702 | p[0] = (Uint8) ch; |
703 | dst += 2; |
704 | dstlen -= 2; |
705 | } else { |
706 | Uint16 W1, W2; |
707 | if (dstlen < 4) { |
708 | return SDL_ICONV_E2BIG; |
709 | } |
710 | ch = ch - 0x10000; |
711 | W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF); |
712 | W2 = 0xDC00 | (Uint16) (ch & 0x3FF); |
713 | p[1] = (Uint8) (W1 >> 8); |
714 | p[0] = (Uint8) W1; |
715 | p[3] = (Uint8) (W2 >> 8); |
716 | p[2] = (Uint8) W2; |
717 | dst += 4; |
718 | dstlen -= 4; |
719 | } |
720 | } |
721 | break; |
722 | case ENCODING_UCS2BE: |
723 | { |
724 | Uint8 *p = (Uint8 *) dst; |
725 | if (ch > 0xFFFF) { |
726 | ch = UNKNOWN_UNICODE; |
727 | } |
728 | if (dstlen < 2) { |
729 | return SDL_ICONV_E2BIG; |
730 | } |
731 | p[0] = (Uint8) (ch >> 8); |
732 | p[1] = (Uint8) ch; |
733 | dst += 2; |
734 | dstlen -= 2; |
735 | } |
736 | break; |
737 | case ENCODING_UCS2LE: |
738 | { |
739 | Uint8 *p = (Uint8 *) dst; |
740 | if (ch > 0xFFFF) { |
741 | ch = UNKNOWN_UNICODE; |
742 | } |
743 | if (dstlen < 2) { |
744 | return SDL_ICONV_E2BIG; |
745 | } |
746 | p[1] = (Uint8) (ch >> 8); |
747 | p[0] = (Uint8) ch; |
748 | dst += 2; |
749 | dstlen -= 2; |
750 | } |
751 | break; |
752 | case ENCODING_UTF32BE: |
753 | if (ch > 0x10FFFF) { |
754 | ch = UNKNOWN_UNICODE; |
755 | } |
756 | /* fallthrough */ |
757 | case ENCODING_UCS4BE: |
758 | if (ch > 0x7FFFFFFF) { |
759 | ch = UNKNOWN_UNICODE; |
760 | } |
761 | { |
762 | Uint8 *p = (Uint8 *) dst; |
763 | if (dstlen < 4) { |
764 | return SDL_ICONV_E2BIG; |
765 | } |
766 | p[0] = (Uint8) (ch >> 24); |
767 | p[1] = (Uint8) (ch >> 16); |
768 | p[2] = (Uint8) (ch >> 8); |
769 | p[3] = (Uint8) ch; |
770 | dst += 4; |
771 | dstlen -= 4; |
772 | } |
773 | break; |
774 | case ENCODING_UTF32LE: |
775 | if (ch > 0x10FFFF) { |
776 | ch = UNKNOWN_UNICODE; |
777 | } |
778 | /* fallthrough */ |
779 | case ENCODING_UCS4LE: |
780 | if (ch > 0x7FFFFFFF) { |
781 | ch = UNKNOWN_UNICODE; |
782 | } |
783 | { |
784 | Uint8 *p = (Uint8 *) dst; |
785 | if (dstlen < 4) { |
786 | return SDL_ICONV_E2BIG; |
787 | } |
788 | p[3] = (Uint8) (ch >> 24); |
789 | p[2] = (Uint8) (ch >> 16); |
790 | p[1] = (Uint8) (ch >> 8); |
791 | p[0] = (Uint8) ch; |
792 | dst += 4; |
793 | dstlen -= 4; |
794 | } |
795 | break; |
796 | } |
797 | |
798 | /* Update state */ |
799 | *inbuf = src; |
800 | *inbytesleft = srclen; |
801 | *outbuf = dst; |
802 | *outbytesleft = dstlen; |
803 | ++total; |
804 | } |
805 | return total; |
806 | } |
807 | |
808 | int |
809 | SDL_iconv_close(SDL_iconv_t cd) |
810 | { |
811 | if (cd != (SDL_iconv_t)-1) { |
812 | SDL_free(cd); |
813 | } |
814 | return 0; |
815 | } |
816 | |
817 | #endif /* !HAVE_ICONV */ |
818 | |
819 | char * |
820 | SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, |
821 | size_t inbytesleft) |
822 | { |
823 | SDL_iconv_t cd; |
824 | char *string; |
825 | size_t stringsize; |
826 | char *outbuf; |
827 | size_t outbytesleft; |
828 | size_t retCode = 0; |
829 | |
830 | cd = SDL_iconv_open(tocode, fromcode); |
831 | if (cd == (SDL_iconv_t) - 1) { |
832 | /* See if we can recover here (fixes iconv on Solaris 11) */ |
833 | if (!tocode || !*tocode) { |
834 | tocode = "UTF-8" ; |
835 | } |
836 | if (!fromcode || !*fromcode) { |
837 | fromcode = "UTF-8" ; |
838 | } |
839 | cd = SDL_iconv_open(tocode, fromcode); |
840 | } |
841 | if (cd == (SDL_iconv_t) - 1) { |
842 | return NULL; |
843 | } |
844 | |
845 | stringsize = inbytesleft > 4 ? inbytesleft : 4; |
846 | string = (char *) SDL_malloc(stringsize); |
847 | if (!string) { |
848 | SDL_iconv_close(cd); |
849 | return NULL; |
850 | } |
851 | outbuf = string; |
852 | outbytesleft = stringsize; |
853 | SDL_memset(outbuf, 0, 4); |
854 | |
855 | while (inbytesleft > 0) { |
856 | const size_t oldinbytesleft = inbytesleft; |
857 | retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); |
858 | switch (retCode) { |
859 | case SDL_ICONV_E2BIG: |
860 | { |
861 | char *oldstring = string; |
862 | stringsize *= 2; |
863 | string = (char *) SDL_realloc(string, stringsize); |
864 | if (!string) { |
865 | SDL_iconv_close(cd); |
866 | return NULL; |
867 | } |
868 | outbuf = string + (outbuf - oldstring); |
869 | outbytesleft = stringsize - (outbuf - string); |
870 | SDL_memset(outbuf, 0, 4); |
871 | } |
872 | break; |
873 | case SDL_ICONV_EILSEQ: |
874 | /* Try skipping some input data - not perfect, but... */ |
875 | ++inbuf; |
876 | --inbytesleft; |
877 | break; |
878 | case SDL_ICONV_EINVAL: |
879 | case SDL_ICONV_ERROR: |
880 | /* We can't continue... */ |
881 | inbytesleft = 0; |
882 | break; |
883 | } |
884 | /* Avoid infinite loops when nothing gets converted */ |
885 | if (oldinbytesleft == inbytesleft) |
886 | { |
887 | break; |
888 | } |
889 | } |
890 | SDL_iconv_close(cd); |
891 | |
892 | return string; |
893 | } |
894 | |
895 | /* vi: set ts=4 sw=4 expandtab: */ |
896 | |