1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ********************************************************************** |
5 | * Copyright (C) 2002-2015, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** |
8 | * file name: ucnv_u32.c |
9 | * encoding: UTF-8 |
10 | * tab size: 8 (not used) |
11 | * indentation:4 |
12 | * |
13 | * created on: 2002jul01 |
14 | * created by: Markus W. Scherer |
15 | * |
16 | * UTF-32 converter implementation. Used to be in ucnv_utf.c. |
17 | */ |
18 | |
19 | #include "unicode/utypes.h" |
20 | |
21 | #if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
22 | |
23 | #include "unicode/ucnv.h" |
24 | #include "unicode/utf.h" |
25 | #include "ucnv_bld.h" |
26 | #include "ucnv_cnv.h" |
27 | #include "cmemory.h" |
28 | |
29 | #define MAXIMUM_UCS2 0x0000FFFF |
30 | #define MAXIMUM_UTF 0x0010FFFF |
31 | #define HALF_SHIFT 10 |
32 | #define HALF_BASE 0x0010000 |
33 | #define HALF_MASK 0x3FF |
34 | #define SURROGATE_HIGH_START 0xD800 |
35 | #define SURROGATE_LOW_START 0xDC00 |
36 | |
37 | /* -SURROGATE_LOW_START + HALF_BASE */ |
38 | #define SURROGATE_LOW_BASE 9216 |
39 | |
40 | enum { |
41 | UCNV_NEED_TO_WRITE_BOM=1 |
42 | }; |
43 | |
44 | /* UTF-32BE ----------------------------------------------------------------- */ |
45 | U_CDECL_BEGIN |
46 | static void U_CALLCONV |
47 | T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, |
48 | UErrorCode * err) |
49 | { |
50 | const unsigned char *mySource = (unsigned char *) args->source; |
51 | char16_t *myTarget = args->target; |
52 | const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
53 | const char16_t *targetLimit = args->targetLimit; |
54 | unsigned char *toUBytes = args->converter->toUBytes; |
55 | uint32_t ch, i; |
56 | |
57 | /* Restore state of current sequence */ |
58 | if (args->converter->toULength > 0 && myTarget < targetLimit) { |
59 | i = args->converter->toULength; /* restore # of bytes consumed */ |
60 | args->converter->toULength = 0; |
61 | |
62 | ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ |
63 | args->converter->toUnicodeStatus = 0; |
64 | goto morebytes; |
65 | } |
66 | |
67 | while (mySource < sourceLimit && myTarget < targetLimit) { |
68 | i = 0; |
69 | ch = 0; |
70 | morebytes: |
71 | while (i < sizeof(uint32_t)) { |
72 | if (mySource < sourceLimit) { |
73 | ch = (ch << 8) | (uint8_t)(*mySource); |
74 | toUBytes[i++] = (char) *(mySource++); |
75 | } |
76 | else { |
77 | /* stores a partially calculated target*/ |
78 | /* + 1 to make 0 a valid character */ |
79 | args->converter->toUnicodeStatus = ch + 1; |
80 | args->converter->toULength = (int8_t) i; |
81 | goto donefornow; |
82 | } |
83 | } |
84 | |
85 | if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { |
86 | /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ |
87 | if (ch <= MAXIMUM_UCS2) |
88 | { |
89 | /* fits in 16 bits */ |
90 | *(myTarget++) = (char16_t) ch; |
91 | } |
92 | else { |
93 | /* write out the surrogates */ |
94 | *(myTarget++) = U16_LEAD(ch); |
95 | ch = U16_TRAIL(ch); |
96 | if (myTarget < targetLimit) { |
97 | *(myTarget++) = (char16_t)ch; |
98 | } |
99 | else { |
100 | /* Put in overflow buffer (not handled here) */ |
101 | args->converter->UCharErrorBuffer[0] = (char16_t) ch; |
102 | args->converter->UCharErrorBufferLength = 1; |
103 | *err = U_BUFFER_OVERFLOW_ERROR; |
104 | break; |
105 | } |
106 | } |
107 | } |
108 | else { |
109 | args->converter->toULength = (int8_t)i; |
110 | *err = U_ILLEGAL_CHAR_FOUND; |
111 | break; |
112 | } |
113 | } |
114 | |
115 | donefornow: |
116 | if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { |
117 | /* End of target buffer */ |
118 | *err = U_BUFFER_OVERFLOW_ERROR; |
119 | } |
120 | |
121 | args->target = myTarget; |
122 | args->source = (const char *) mySource; |
123 | } |
124 | |
125 | static void U_CALLCONV |
126 | T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, |
127 | UErrorCode * err) |
128 | { |
129 | const unsigned char *mySource = (unsigned char *) args->source; |
130 | char16_t *myTarget = args->target; |
131 | int32_t *myOffsets = args->offsets; |
132 | const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
133 | const char16_t *targetLimit = args->targetLimit; |
134 | unsigned char *toUBytes = args->converter->toUBytes; |
135 | uint32_t ch, i; |
136 | int32_t offsetNum = 0; |
137 | |
138 | /* Restore state of current sequence */ |
139 | if (args->converter->toULength > 0 && myTarget < targetLimit) { |
140 | i = args->converter->toULength; /* restore # of bytes consumed */ |
141 | args->converter->toULength = 0; |
142 | |
143 | ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ |
144 | args->converter->toUnicodeStatus = 0; |
145 | goto morebytes; |
146 | } |
147 | |
148 | while (mySource < sourceLimit && myTarget < targetLimit) { |
149 | i = 0; |
150 | ch = 0; |
151 | morebytes: |
152 | while (i < sizeof(uint32_t)) { |
153 | if (mySource < sourceLimit) { |
154 | ch = (ch << 8) | (uint8_t)(*mySource); |
155 | toUBytes[i++] = (char) *(mySource++); |
156 | } |
157 | else { |
158 | /* stores a partially calculated target*/ |
159 | /* + 1 to make 0 a valid character */ |
160 | args->converter->toUnicodeStatus = ch + 1; |
161 | args->converter->toULength = (int8_t) i; |
162 | goto donefornow; |
163 | } |
164 | } |
165 | |
166 | if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { |
167 | /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ |
168 | if (ch <= MAXIMUM_UCS2) { |
169 | /* fits in 16 bits */ |
170 | *(myTarget++) = (char16_t) ch; |
171 | *(myOffsets++) = offsetNum; |
172 | } |
173 | else { |
174 | /* write out the surrogates */ |
175 | *(myTarget++) = U16_LEAD(ch); |
176 | *myOffsets++ = offsetNum; |
177 | ch = U16_TRAIL(ch); |
178 | if (myTarget < targetLimit) |
179 | { |
180 | *(myTarget++) = (char16_t)ch; |
181 | *(myOffsets++) = offsetNum; |
182 | } |
183 | else { |
184 | /* Put in overflow buffer (not handled here) */ |
185 | args->converter->UCharErrorBuffer[0] = (char16_t) ch; |
186 | args->converter->UCharErrorBufferLength = 1; |
187 | *err = U_BUFFER_OVERFLOW_ERROR; |
188 | break; |
189 | } |
190 | } |
191 | } |
192 | else { |
193 | args->converter->toULength = (int8_t)i; |
194 | *err = U_ILLEGAL_CHAR_FOUND; |
195 | break; |
196 | } |
197 | offsetNum += i; |
198 | } |
199 | |
200 | donefornow: |
201 | if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
202 | { |
203 | /* End of target buffer */ |
204 | *err = U_BUFFER_OVERFLOW_ERROR; |
205 | } |
206 | |
207 | args->target = myTarget; |
208 | args->source = (const char *) mySource; |
209 | args->offsets = myOffsets; |
210 | } |
211 | |
212 | static void U_CALLCONV |
213 | T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args, |
214 | UErrorCode * err) |
215 | { |
216 | const char16_t *mySource = args->source; |
217 | unsigned char *myTarget; |
218 | const char16_t *sourceLimit = args->sourceLimit; |
219 | const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
220 | UChar32 ch, ch2; |
221 | unsigned int indexToWrite; |
222 | unsigned char temp[sizeof(uint32_t)]; |
223 | |
224 | if(mySource >= sourceLimit) { |
225 | /* no input, nothing to do */ |
226 | return; |
227 | } |
228 | |
229 | /* write the BOM if necessary */ |
230 | if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
231 | static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu }; |
232 | ucnv_fromUWriteBytes(args->converter, |
233 | bom, 4, |
234 | &args->target, args->targetLimit, |
235 | &args->offsets, -1, |
236 | err); |
237 | args->converter->fromUnicodeStatus=0; |
238 | } |
239 | |
240 | myTarget = (unsigned char *) args->target; |
241 | temp[0] = 0; |
242 | |
243 | if (args->converter->fromUChar32) { |
244 | ch = args->converter->fromUChar32; |
245 | args->converter->fromUChar32 = 0; |
246 | goto lowsurogate; |
247 | } |
248 | |
249 | while (mySource < sourceLimit && myTarget < targetLimit) { |
250 | ch = *(mySource++); |
251 | |
252 | if (U_IS_SURROGATE(ch)) { |
253 | if (U_IS_LEAD(ch)) { |
254 | lowsurogate: |
255 | if (mySource < sourceLimit) { |
256 | ch2 = *mySource; |
257 | if (U_IS_TRAIL(ch2)) { |
258 | ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; |
259 | mySource++; |
260 | } |
261 | else { |
262 | /* this is an unmatched trail code unit (2nd surrogate) */ |
263 | /* callback(illegal) */ |
264 | args->converter->fromUChar32 = ch; |
265 | *err = U_ILLEGAL_CHAR_FOUND; |
266 | break; |
267 | } |
268 | } |
269 | else { |
270 | /* ran out of source */ |
271 | args->converter->fromUChar32 = ch; |
272 | if (args->flush) { |
273 | /* this is an unmatched trail code unit (2nd surrogate) */ |
274 | /* callback(illegal) */ |
275 | *err = U_ILLEGAL_CHAR_FOUND; |
276 | } |
277 | break; |
278 | } |
279 | } |
280 | else { |
281 | /* this is an unmatched trail code unit (2nd surrogate) */ |
282 | /* callback(illegal) */ |
283 | args->converter->fromUChar32 = ch; |
284 | *err = U_ILLEGAL_CHAR_FOUND; |
285 | break; |
286 | } |
287 | } |
288 | |
289 | /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ |
290 | temp[1] = (uint8_t) (ch >> 16 & 0x1F); |
291 | temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ |
292 | temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ |
293 | |
294 | for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { |
295 | if (myTarget < targetLimit) { |
296 | *(myTarget++) = temp[indexToWrite]; |
297 | } |
298 | else { |
299 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; |
300 | *err = U_BUFFER_OVERFLOW_ERROR; |
301 | } |
302 | } |
303 | } |
304 | |
305 | if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { |
306 | *err = U_BUFFER_OVERFLOW_ERROR; |
307 | } |
308 | |
309 | args->target = (char *) myTarget; |
310 | args->source = mySource; |
311 | } |
312 | |
313 | static void U_CALLCONV |
314 | T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, |
315 | UErrorCode * err) |
316 | { |
317 | const char16_t *mySource = args->source; |
318 | unsigned char *myTarget; |
319 | int32_t *myOffsets; |
320 | const char16_t *sourceLimit = args->sourceLimit; |
321 | const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
322 | UChar32 ch, ch2; |
323 | int32_t offsetNum = 0; |
324 | unsigned int indexToWrite; |
325 | unsigned char temp[sizeof(uint32_t)]; |
326 | |
327 | if(mySource >= sourceLimit) { |
328 | /* no input, nothing to do */ |
329 | return; |
330 | } |
331 | |
332 | /* write the BOM if necessary */ |
333 | if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
334 | static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu }; |
335 | ucnv_fromUWriteBytes(args->converter, |
336 | bom, 4, |
337 | &args->target, args->targetLimit, |
338 | &args->offsets, -1, |
339 | err); |
340 | args->converter->fromUnicodeStatus=0; |
341 | } |
342 | |
343 | myTarget = (unsigned char *) args->target; |
344 | myOffsets = args->offsets; |
345 | temp[0] = 0; |
346 | |
347 | if (args->converter->fromUChar32) { |
348 | ch = args->converter->fromUChar32; |
349 | args->converter->fromUChar32 = 0; |
350 | goto lowsurogate; |
351 | } |
352 | |
353 | while (mySource < sourceLimit && myTarget < targetLimit) { |
354 | ch = *(mySource++); |
355 | |
356 | if (U_IS_SURROGATE(ch)) { |
357 | if (U_IS_LEAD(ch)) { |
358 | lowsurogate: |
359 | if (mySource < sourceLimit) { |
360 | ch2 = *mySource; |
361 | if (U_IS_TRAIL(ch2)) { |
362 | ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; |
363 | mySource++; |
364 | } |
365 | else { |
366 | /* this is an unmatched trail code unit (2nd surrogate) */ |
367 | /* callback(illegal) */ |
368 | args->converter->fromUChar32 = ch; |
369 | *err = U_ILLEGAL_CHAR_FOUND; |
370 | break; |
371 | } |
372 | } |
373 | else { |
374 | /* ran out of source */ |
375 | args->converter->fromUChar32 = ch; |
376 | if (args->flush) { |
377 | /* this is an unmatched trail code unit (2nd surrogate) */ |
378 | /* callback(illegal) */ |
379 | *err = U_ILLEGAL_CHAR_FOUND; |
380 | } |
381 | break; |
382 | } |
383 | } |
384 | else { |
385 | /* this is an unmatched trail code unit (2nd surrogate) */ |
386 | /* callback(illegal) */ |
387 | args->converter->fromUChar32 = ch; |
388 | *err = U_ILLEGAL_CHAR_FOUND; |
389 | break; |
390 | } |
391 | } |
392 | |
393 | /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ |
394 | temp[1] = (uint8_t) (ch >> 16 & 0x1F); |
395 | temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ |
396 | temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ |
397 | |
398 | for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { |
399 | if (myTarget < targetLimit) { |
400 | *(myTarget++) = temp[indexToWrite]; |
401 | *(myOffsets++) = offsetNum; |
402 | } |
403 | else { |
404 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; |
405 | *err = U_BUFFER_OVERFLOW_ERROR; |
406 | } |
407 | } |
408 | offsetNum = offsetNum + 1 + (temp[1] != 0); |
409 | } |
410 | |
411 | if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { |
412 | *err = U_BUFFER_OVERFLOW_ERROR; |
413 | } |
414 | |
415 | args->target = (char *) myTarget; |
416 | args->source = mySource; |
417 | args->offsets = myOffsets; |
418 | } |
419 | |
420 | static UChar32 U_CALLCONV |
421 | T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args, |
422 | UErrorCode* err) |
423 | { |
424 | const uint8_t *mySource; |
425 | UChar32 myUChar; |
426 | int32_t length; |
427 | |
428 | mySource = (const uint8_t *)args->source; |
429 | if (mySource >= (const uint8_t *)args->sourceLimit) |
430 | { |
431 | /* no input */ |
432 | *err = U_INDEX_OUTOFBOUNDS_ERROR; |
433 | return 0xffff; |
434 | } |
435 | |
436 | length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); |
437 | if (length < 4) |
438 | { |
439 | /* got a partial character */ |
440 | uprv_memcpy(args->converter->toUBytes, mySource, length); |
441 | args->converter->toULength = (int8_t)length; |
442 | args->source = (const char *)(mySource + length); |
443 | *err = U_TRUNCATED_CHAR_FOUND; |
444 | return 0xffff; |
445 | } |
446 | |
447 | /* Don't even try to do a direct cast because the value may be on an odd address. */ |
448 | myUChar = ((UChar32)mySource[0] << 24) |
449 | | ((UChar32)mySource[1] << 16) |
450 | | ((UChar32)mySource[2] << 8) |
451 | | ((UChar32)mySource[3]); |
452 | |
453 | args->source = (const char *)(mySource + 4); |
454 | if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { |
455 | return myUChar; |
456 | } |
457 | |
458 | uprv_memcpy(args->converter->toUBytes, mySource, 4); |
459 | args->converter->toULength = 4; |
460 | |
461 | *err = U_ILLEGAL_CHAR_FOUND; |
462 | return 0xffff; |
463 | } |
464 | U_CDECL_END |
465 | static const UConverterImpl _UTF32BEImpl = { |
466 | UCNV_UTF32_BigEndian, |
467 | |
468 | nullptr, |
469 | nullptr, |
470 | |
471 | nullptr, |
472 | nullptr, |
473 | nullptr, |
474 | |
475 | T_UConverter_toUnicode_UTF32_BE, |
476 | T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC, |
477 | T_UConverter_fromUnicode_UTF32_BE, |
478 | T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, |
479 | T_UConverter_getNextUChar_UTF32_BE, |
480 | |
481 | nullptr, |
482 | nullptr, |
483 | nullptr, |
484 | nullptr, |
485 | ucnv_getNonSurrogateUnicodeSet, |
486 | |
487 | nullptr, |
488 | nullptr |
489 | }; |
490 | |
491 | /* The 1232 CCSID refers to any version of Unicode with any endianness of UTF-32 */ |
492 | static const UConverterStaticData _UTF32BEStaticData = { |
493 | sizeof(UConverterStaticData), |
494 | "UTF-32BE" , |
495 | 1232, |
496 | UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4, |
497 | { 0, 0, 0xff, 0xfd }, 4, false, false, |
498 | 0, |
499 | 0, |
500 | { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
501 | }; |
502 | |
503 | const UConverterSharedData _UTF32BEData = |
504 | UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32BEStaticData, &_UTF32BEImpl); |
505 | |
506 | /* UTF-32LE ---------------------------------------------------------- */ |
507 | U_CDECL_BEGIN |
508 | static void U_CALLCONV |
509 | T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, |
510 | UErrorCode * err) |
511 | { |
512 | const unsigned char *mySource = (unsigned char *) args->source; |
513 | char16_t *myTarget = args->target; |
514 | const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
515 | const char16_t *targetLimit = args->targetLimit; |
516 | unsigned char *toUBytes = args->converter->toUBytes; |
517 | uint32_t ch, i; |
518 | |
519 | /* Restore state of current sequence */ |
520 | if (args->converter->toULength > 0 && myTarget < targetLimit) |
521 | { |
522 | i = args->converter->toULength; /* restore # of bytes consumed */ |
523 | args->converter->toULength = 0; |
524 | |
525 | /* Stores the previously calculated ch from a previous call*/ |
526 | ch = args->converter->toUnicodeStatus - 1; |
527 | args->converter->toUnicodeStatus = 0; |
528 | goto morebytes; |
529 | } |
530 | |
531 | while (mySource < sourceLimit && myTarget < targetLimit) |
532 | { |
533 | i = 0; |
534 | ch = 0; |
535 | morebytes: |
536 | while (i < sizeof(uint32_t)) |
537 | { |
538 | if (mySource < sourceLimit) |
539 | { |
540 | ch |= ((uint8_t)(*mySource)) << (i * 8); |
541 | toUBytes[i++] = (char) *(mySource++); |
542 | } |
543 | else |
544 | { |
545 | /* stores a partially calculated target*/ |
546 | /* + 1 to make 0 a valid character */ |
547 | args->converter->toUnicodeStatus = ch + 1; |
548 | args->converter->toULength = (int8_t) i; |
549 | goto donefornow; |
550 | } |
551 | } |
552 | |
553 | if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { |
554 | /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ |
555 | if (ch <= MAXIMUM_UCS2) { |
556 | /* fits in 16 bits */ |
557 | *(myTarget++) = (char16_t) ch; |
558 | } |
559 | else { |
560 | /* write out the surrogates */ |
561 | *(myTarget++) = U16_LEAD(ch); |
562 | ch = U16_TRAIL(ch); |
563 | if (myTarget < targetLimit) { |
564 | *(myTarget++) = (char16_t)ch; |
565 | } |
566 | else { |
567 | /* Put in overflow buffer (not handled here) */ |
568 | args->converter->UCharErrorBuffer[0] = (char16_t) ch; |
569 | args->converter->UCharErrorBufferLength = 1; |
570 | *err = U_BUFFER_OVERFLOW_ERROR; |
571 | break; |
572 | } |
573 | } |
574 | } |
575 | else { |
576 | args->converter->toULength = (int8_t)i; |
577 | *err = U_ILLEGAL_CHAR_FOUND; |
578 | break; |
579 | } |
580 | } |
581 | |
582 | donefornow: |
583 | if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
584 | { |
585 | /* End of target buffer */ |
586 | *err = U_BUFFER_OVERFLOW_ERROR; |
587 | } |
588 | |
589 | args->target = myTarget; |
590 | args->source = (const char *) mySource; |
591 | } |
592 | |
593 | static void U_CALLCONV |
594 | T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, |
595 | UErrorCode * err) |
596 | { |
597 | const unsigned char *mySource = (unsigned char *) args->source; |
598 | char16_t *myTarget = args->target; |
599 | int32_t *myOffsets = args->offsets; |
600 | const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
601 | const char16_t *targetLimit = args->targetLimit; |
602 | unsigned char *toUBytes = args->converter->toUBytes; |
603 | uint32_t ch, i; |
604 | int32_t offsetNum = 0; |
605 | |
606 | /* Restore state of current sequence */ |
607 | if (args->converter->toULength > 0 && myTarget < targetLimit) |
608 | { |
609 | i = args->converter->toULength; /* restore # of bytes consumed */ |
610 | args->converter->toULength = 0; |
611 | |
612 | /* Stores the previously calculated ch from a previous call*/ |
613 | ch = args->converter->toUnicodeStatus - 1; |
614 | args->converter->toUnicodeStatus = 0; |
615 | goto morebytes; |
616 | } |
617 | |
618 | while (mySource < sourceLimit && myTarget < targetLimit) |
619 | { |
620 | i = 0; |
621 | ch = 0; |
622 | morebytes: |
623 | while (i < sizeof(uint32_t)) |
624 | { |
625 | if (mySource < sourceLimit) |
626 | { |
627 | ch |= ((uint8_t)(*mySource)) << (i * 8); |
628 | toUBytes[i++] = (char) *(mySource++); |
629 | } |
630 | else |
631 | { |
632 | /* stores a partially calculated target*/ |
633 | /* + 1 to make 0 a valid character */ |
634 | args->converter->toUnicodeStatus = ch + 1; |
635 | args->converter->toULength = (int8_t) i; |
636 | goto donefornow; |
637 | } |
638 | } |
639 | |
640 | if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) |
641 | { |
642 | /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ |
643 | if (ch <= MAXIMUM_UCS2) |
644 | { |
645 | /* fits in 16 bits */ |
646 | *(myTarget++) = (char16_t) ch; |
647 | *(myOffsets++) = offsetNum; |
648 | } |
649 | else { |
650 | /* write out the surrogates */ |
651 | *(myTarget++) = U16_LEAD(ch); |
652 | *(myOffsets++) = offsetNum; |
653 | ch = U16_TRAIL(ch); |
654 | if (myTarget < targetLimit) |
655 | { |
656 | *(myTarget++) = (char16_t)ch; |
657 | *(myOffsets++) = offsetNum; |
658 | } |
659 | else |
660 | { |
661 | /* Put in overflow buffer (not handled here) */ |
662 | args->converter->UCharErrorBuffer[0] = (char16_t) ch; |
663 | args->converter->UCharErrorBufferLength = 1; |
664 | *err = U_BUFFER_OVERFLOW_ERROR; |
665 | break; |
666 | } |
667 | } |
668 | } |
669 | else |
670 | { |
671 | args->converter->toULength = (int8_t)i; |
672 | *err = U_ILLEGAL_CHAR_FOUND; |
673 | break; |
674 | } |
675 | offsetNum += i; |
676 | } |
677 | |
678 | donefornow: |
679 | if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
680 | { |
681 | /* End of target buffer */ |
682 | *err = U_BUFFER_OVERFLOW_ERROR; |
683 | } |
684 | |
685 | args->target = myTarget; |
686 | args->source = (const char *) mySource; |
687 | args->offsets = myOffsets; |
688 | } |
689 | |
690 | static void U_CALLCONV |
691 | T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args, |
692 | UErrorCode * err) |
693 | { |
694 | const char16_t *mySource = args->source; |
695 | unsigned char *myTarget; |
696 | const char16_t *sourceLimit = args->sourceLimit; |
697 | const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
698 | UChar32 ch, ch2; |
699 | unsigned int indexToWrite; |
700 | unsigned char temp[sizeof(uint32_t)]; |
701 | |
702 | if(mySource >= sourceLimit) { |
703 | /* no input, nothing to do */ |
704 | return; |
705 | } |
706 | |
707 | /* write the BOM if necessary */ |
708 | if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
709 | static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 }; |
710 | ucnv_fromUWriteBytes(args->converter, |
711 | bom, 4, |
712 | &args->target, args->targetLimit, |
713 | &args->offsets, -1, |
714 | err); |
715 | args->converter->fromUnicodeStatus=0; |
716 | } |
717 | |
718 | myTarget = (unsigned char *) args->target; |
719 | temp[3] = 0; |
720 | |
721 | if (args->converter->fromUChar32) |
722 | { |
723 | ch = args->converter->fromUChar32; |
724 | args->converter->fromUChar32 = 0; |
725 | goto lowsurogate; |
726 | } |
727 | |
728 | while (mySource < sourceLimit && myTarget < targetLimit) |
729 | { |
730 | ch = *(mySource++); |
731 | |
732 | if (U16_IS_SURROGATE(ch)) { |
733 | if (U16_IS_LEAD(ch)) |
734 | { |
735 | lowsurogate: |
736 | if (mySource < sourceLimit) |
737 | { |
738 | ch2 = *mySource; |
739 | if (U16_IS_TRAIL(ch2)) { |
740 | ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; |
741 | mySource++; |
742 | } |
743 | else { |
744 | /* this is an unmatched trail code unit (2nd surrogate) */ |
745 | /* callback(illegal) */ |
746 | args->converter->fromUChar32 = ch; |
747 | *err = U_ILLEGAL_CHAR_FOUND; |
748 | break; |
749 | } |
750 | } |
751 | else { |
752 | /* ran out of source */ |
753 | args->converter->fromUChar32 = ch; |
754 | if (args->flush) { |
755 | /* this is an unmatched trail code unit (2nd surrogate) */ |
756 | /* callback(illegal) */ |
757 | *err = U_ILLEGAL_CHAR_FOUND; |
758 | } |
759 | break; |
760 | } |
761 | } |
762 | else { |
763 | /* this is an unmatched trail code unit (2nd surrogate) */ |
764 | /* callback(illegal) */ |
765 | args->converter->fromUChar32 = ch; |
766 | *err = U_ILLEGAL_CHAR_FOUND; |
767 | break; |
768 | } |
769 | } |
770 | |
771 | /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ |
772 | temp[2] = (uint8_t) (ch >> 16 & 0x1F); |
773 | temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ |
774 | temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ |
775 | |
776 | for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) |
777 | { |
778 | if (myTarget < targetLimit) |
779 | { |
780 | *(myTarget++) = temp[indexToWrite]; |
781 | } |
782 | else |
783 | { |
784 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; |
785 | *err = U_BUFFER_OVERFLOW_ERROR; |
786 | } |
787 | } |
788 | } |
789 | |
790 | if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
791 | { |
792 | *err = U_BUFFER_OVERFLOW_ERROR; |
793 | } |
794 | |
795 | args->target = (char *) myTarget; |
796 | args->source = mySource; |
797 | } |
798 | |
799 | static void U_CALLCONV |
800 | T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, |
801 | UErrorCode * err) |
802 | { |
803 | const char16_t *mySource = args->source; |
804 | unsigned char *myTarget; |
805 | int32_t *myOffsets; |
806 | const char16_t *sourceLimit = args->sourceLimit; |
807 | const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
808 | UChar32 ch, ch2; |
809 | unsigned int indexToWrite; |
810 | unsigned char temp[sizeof(uint32_t)]; |
811 | int32_t offsetNum = 0; |
812 | |
813 | if(mySource >= sourceLimit) { |
814 | /* no input, nothing to do */ |
815 | return; |
816 | } |
817 | |
818 | /* write the BOM if necessary */ |
819 | if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
820 | static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 }; |
821 | ucnv_fromUWriteBytes(args->converter, |
822 | bom, 4, |
823 | &args->target, args->targetLimit, |
824 | &args->offsets, -1, |
825 | err); |
826 | args->converter->fromUnicodeStatus=0; |
827 | } |
828 | |
829 | myTarget = (unsigned char *) args->target; |
830 | myOffsets = args->offsets; |
831 | temp[3] = 0; |
832 | |
833 | if (args->converter->fromUChar32) |
834 | { |
835 | ch = args->converter->fromUChar32; |
836 | args->converter->fromUChar32 = 0; |
837 | goto lowsurogate; |
838 | } |
839 | |
840 | while (mySource < sourceLimit && myTarget < targetLimit) |
841 | { |
842 | ch = *(mySource++); |
843 | |
844 | if (U16_IS_SURROGATE(ch)) { |
845 | if (U16_IS_LEAD(ch)) |
846 | { |
847 | lowsurogate: |
848 | if (mySource < sourceLimit) |
849 | { |
850 | ch2 = *mySource; |
851 | if (U16_IS_TRAIL(ch2)) |
852 | { |
853 | ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; |
854 | mySource++; |
855 | } |
856 | else { |
857 | /* this is an unmatched trail code unit (2nd surrogate) */ |
858 | /* callback(illegal) */ |
859 | args->converter->fromUChar32 = ch; |
860 | *err = U_ILLEGAL_CHAR_FOUND; |
861 | break; |
862 | } |
863 | } |
864 | else { |
865 | /* ran out of source */ |
866 | args->converter->fromUChar32 = ch; |
867 | if (args->flush) { |
868 | /* this is an unmatched trail code unit (2nd surrogate) */ |
869 | /* callback(illegal) */ |
870 | *err = U_ILLEGAL_CHAR_FOUND; |
871 | } |
872 | break; |
873 | } |
874 | } |
875 | else { |
876 | /* this is an unmatched trail code unit (2nd surrogate) */ |
877 | /* callback(illegal) */ |
878 | args->converter->fromUChar32 = ch; |
879 | *err = U_ILLEGAL_CHAR_FOUND; |
880 | break; |
881 | } |
882 | } |
883 | |
884 | /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ |
885 | temp[2] = (uint8_t) (ch >> 16 & 0x1F); |
886 | temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ |
887 | temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ |
888 | |
889 | for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) |
890 | { |
891 | if (myTarget < targetLimit) |
892 | { |
893 | *(myTarget++) = temp[indexToWrite]; |
894 | *(myOffsets++) = offsetNum; |
895 | } |
896 | else |
897 | { |
898 | args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; |
899 | *err = U_BUFFER_OVERFLOW_ERROR; |
900 | } |
901 | } |
902 | offsetNum = offsetNum + 1 + (temp[2] != 0); |
903 | } |
904 | |
905 | if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
906 | { |
907 | *err = U_BUFFER_OVERFLOW_ERROR; |
908 | } |
909 | |
910 | args->target = (char *) myTarget; |
911 | args->source = mySource; |
912 | args->offsets = myOffsets; |
913 | } |
914 | |
915 | static UChar32 U_CALLCONV |
916 | T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args, |
917 | UErrorCode* err) |
918 | { |
919 | const uint8_t *mySource; |
920 | UChar32 myUChar; |
921 | int32_t length; |
922 | |
923 | mySource = (const uint8_t *)args->source; |
924 | if (mySource >= (const uint8_t *)args->sourceLimit) |
925 | { |
926 | /* no input */ |
927 | *err = U_INDEX_OUTOFBOUNDS_ERROR; |
928 | return 0xffff; |
929 | } |
930 | |
931 | length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); |
932 | if (length < 4) |
933 | { |
934 | /* got a partial character */ |
935 | uprv_memcpy(args->converter->toUBytes, mySource, length); |
936 | args->converter->toULength = (int8_t)length; |
937 | args->source = (const char *)(mySource + length); |
938 | *err = U_TRUNCATED_CHAR_FOUND; |
939 | return 0xffff; |
940 | } |
941 | |
942 | /* Don't even try to do a direct cast because the value may be on an odd address. */ |
943 | myUChar = ((UChar32)mySource[3] << 24) |
944 | | ((UChar32)mySource[2] << 16) |
945 | | ((UChar32)mySource[1] << 8) |
946 | | ((UChar32)mySource[0]); |
947 | |
948 | args->source = (const char *)(mySource + 4); |
949 | if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { |
950 | return myUChar; |
951 | } |
952 | |
953 | uprv_memcpy(args->converter->toUBytes, mySource, 4); |
954 | args->converter->toULength = 4; |
955 | |
956 | *err = U_ILLEGAL_CHAR_FOUND; |
957 | return 0xffff; |
958 | } |
959 | U_CDECL_END |
960 | static const UConverterImpl _UTF32LEImpl = { |
961 | UCNV_UTF32_LittleEndian, |
962 | |
963 | nullptr, |
964 | nullptr, |
965 | |
966 | nullptr, |
967 | nullptr, |
968 | nullptr, |
969 | |
970 | T_UConverter_toUnicode_UTF32_LE, |
971 | T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC, |
972 | T_UConverter_fromUnicode_UTF32_LE, |
973 | T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, |
974 | T_UConverter_getNextUChar_UTF32_LE, |
975 | |
976 | nullptr, |
977 | nullptr, |
978 | nullptr, |
979 | nullptr, |
980 | ucnv_getNonSurrogateUnicodeSet, |
981 | |
982 | nullptr, |
983 | nullptr |
984 | }; |
985 | |
986 | /* The 1232 CCSID refers to any version of Unicode with any endianness of UTF-32 */ |
987 | static const UConverterStaticData _UTF32LEStaticData = { |
988 | sizeof(UConverterStaticData), |
989 | "UTF-32LE" , |
990 | 1234, |
991 | UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4, |
992 | { 0xfd, 0xff, 0, 0 }, 4, false, false, |
993 | 0, |
994 | 0, |
995 | { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
996 | }; |
997 | |
998 | |
999 | const UConverterSharedData _UTF32LEData = |
1000 | UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32LEStaticData, &_UTF32LEImpl); |
1001 | |
1002 | /* UTF-32 (Detect BOM) ------------------------------------------------------ */ |
1003 | |
1004 | /* |
1005 | * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE |
1006 | * accordingly. |
1007 | * |
1008 | * State values: |
1009 | * 0 initial state |
1010 | * 1 saw 00 |
1011 | * 2 saw 00 00 |
1012 | * 3 saw 00 00 FE |
1013 | * 4 - |
1014 | * 5 saw FF |
1015 | * 6 saw FF FE |
1016 | * 7 saw FF FE 00 |
1017 | * 8 UTF-32BE mode |
1018 | * 9 UTF-32LE mode |
1019 | * |
1020 | * During detection: state&3==number of matching bytes so far. |
1021 | * |
1022 | * On output, emit U+FEFF as the first code point. |
1023 | */ |
1024 | U_CDECL_BEGIN |
1025 | static void U_CALLCONV |
1026 | _UTF32Reset(UConverter *cnv, UConverterResetChoice choice) { |
1027 | if(choice<=UCNV_RESET_TO_UNICODE) { |
1028 | /* reset toUnicode: state=0 */ |
1029 | cnv->mode=0; |
1030 | } |
1031 | if(choice!=UCNV_RESET_TO_UNICODE) { |
1032 | /* reset fromUnicode: prepare to output the UTF-32PE BOM */ |
1033 | cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; |
1034 | } |
1035 | } |
1036 | |
1037 | static void U_CALLCONV |
1038 | _UTF32Open(UConverter *cnv, |
1039 | UConverterLoadArgs *pArgs, |
1040 | UErrorCode *pErrorCode) { |
1041 | (void)pArgs; |
1042 | (void)pErrorCode; |
1043 | _UTF32Reset(cnv, UCNV_RESET_BOTH); |
1044 | } |
1045 | |
1046 | static const char utf32BOM[8]={ 0, 0, (char)0xfeu, (char)0xffu, (char)0xffu, (char)0xfeu, 0, 0 }; |
1047 | |
1048 | static void U_CALLCONV |
1049 | _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, |
1050 | UErrorCode *pErrorCode) { |
1051 | UConverter *cnv=pArgs->converter; |
1052 | const char *source=pArgs->source; |
1053 | const char *sourceLimit=pArgs->sourceLimit; |
1054 | int32_t *offsets=pArgs->offsets; |
1055 | |
1056 | int32_t state, offsetDelta; |
1057 | char b; |
1058 | |
1059 | state=cnv->mode; |
1060 | |
1061 | /* |
1062 | * If we detect a BOM in this buffer, then we must add the BOM size to the |
1063 | * offsets because the actual converter function will not see and count the BOM. |
1064 | * offsetDelta will have the number of the BOM bytes that are in the current buffer. |
1065 | */ |
1066 | offsetDelta=0; |
1067 | |
1068 | while(source<sourceLimit && U_SUCCESS(*pErrorCode)) { |
1069 | switch(state) { |
1070 | case 0: |
1071 | b=*source; |
1072 | if(b==0) { |
1073 | state=1; /* could be 00 00 FE FF */ |
1074 | } else if(b==(char)0xffu) { |
1075 | state=5; /* could be FF FE 00 00 */ |
1076 | } else { |
1077 | state=8; /* default to UTF-32BE */ |
1078 | continue; |
1079 | } |
1080 | ++source; |
1081 | break; |
1082 | case 1: |
1083 | case 2: |
1084 | case 3: |
1085 | case 5: |
1086 | case 6: |
1087 | case 7: |
1088 | if(*source==utf32BOM[state]) { |
1089 | ++state; |
1090 | ++source; |
1091 | if(state==4) { |
1092 | state=8; /* detect UTF-32BE */ |
1093 | offsetDelta=(int32_t)(source-pArgs->source); |
1094 | } else if(state==8) { |
1095 | state=9; /* detect UTF-32LE */ |
1096 | offsetDelta=(int32_t)(source-pArgs->source); |
1097 | } |
1098 | } else { |
1099 | /* switch to UTF-32BE and pass the previous bytes */ |
1100 | int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */ |
1101 | |
1102 | /* reset the source */ |
1103 | source=pArgs->source; |
1104 | |
1105 | if(count==(state&3)) { |
1106 | /* simple: all in the same buffer, just reset source */ |
1107 | } else { |
1108 | UBool oldFlush=pArgs->flush; |
1109 | |
1110 | /* some of the bytes are from a previous buffer, replay those first */ |
1111 | pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ |
1112 | pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */ |
1113 | pArgs->flush=false; /* this sourceLimit is not the real source stream limit */ |
1114 | |
1115 | /* no offsets: bytes from previous buffer, and not enough for output */ |
1116 | T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
1117 | |
1118 | /* restore real pointers; pArgs->source will be set in case 8/9 */ |
1119 | pArgs->sourceLimit=sourceLimit; |
1120 | pArgs->flush=oldFlush; |
1121 | } |
1122 | state=8; |
1123 | continue; |
1124 | } |
1125 | break; |
1126 | case 8: |
1127 | /* call UTF-32BE */ |
1128 | pArgs->source=source; |
1129 | if(offsets==nullptr) { |
1130 | T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
1131 | } else { |
1132 | T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode); |
1133 | } |
1134 | source=pArgs->source; |
1135 | break; |
1136 | case 9: |
1137 | /* call UTF-32LE */ |
1138 | pArgs->source=source; |
1139 | if(offsets==nullptr) { |
1140 | T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); |
1141 | } else { |
1142 | T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode); |
1143 | } |
1144 | source=pArgs->source; |
1145 | break; |
1146 | default: |
1147 | break; /* does not occur */ |
1148 | } |
1149 | } |
1150 | |
1151 | /* add BOM size to offsets - see comment at offsetDelta declaration */ |
1152 | if(offsets!=nullptr && offsetDelta!=0) { |
1153 | int32_t *offsetsLimit=pArgs->offsets; |
1154 | while(offsets<offsetsLimit) { |
1155 | *offsets++ += offsetDelta; |
1156 | } |
1157 | } |
1158 | |
1159 | pArgs->source=source; |
1160 | |
1161 | if(source==sourceLimit && pArgs->flush) { |
1162 | /* handle truncated input */ |
1163 | switch(state) { |
1164 | case 0: |
1165 | break; /* no input at all, nothing to do */ |
1166 | case 8: |
1167 | T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
1168 | break; |
1169 | case 9: |
1170 | T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); |
1171 | break; |
1172 | default: |
1173 | /* handle 0<state<8: call UTF-32BE with too-short input */ |
1174 | pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ |
1175 | pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */ |
1176 | |
1177 | /* no offsets: not enough for output */ |
1178 | T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
1179 | pArgs->source=source; |
1180 | pArgs->sourceLimit=sourceLimit; |
1181 | state=8; |
1182 | break; |
1183 | } |
1184 | } |
1185 | |
1186 | cnv->mode=state; |
1187 | } |
1188 | |
1189 | static UChar32 U_CALLCONV |
1190 | _UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs, |
1191 | UErrorCode *pErrorCode) { |
1192 | switch(pArgs->converter->mode) { |
1193 | case 8: |
1194 | return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode); |
1195 | case 9: |
1196 | return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode); |
1197 | default: |
1198 | return UCNV_GET_NEXT_UCHAR_USE_TO_U; |
1199 | } |
1200 | } |
1201 | U_CDECL_END |
1202 | static const UConverterImpl _UTF32Impl = { |
1203 | UCNV_UTF32, |
1204 | |
1205 | nullptr, |
1206 | nullptr, |
1207 | |
1208 | _UTF32Open, |
1209 | nullptr, |
1210 | _UTF32Reset, |
1211 | |
1212 | _UTF32ToUnicodeWithOffsets, |
1213 | _UTF32ToUnicodeWithOffsets, |
1214 | #if U_IS_BIG_ENDIAN |
1215 | T_UConverter_fromUnicode_UTF32_BE, |
1216 | T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, |
1217 | #else |
1218 | T_UConverter_fromUnicode_UTF32_LE, |
1219 | T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, |
1220 | #endif |
1221 | _UTF32GetNextUChar, |
1222 | |
1223 | nullptr, /* ### TODO implement getStarters for all Unicode encodings?! */ |
1224 | nullptr, |
1225 | nullptr, |
1226 | nullptr, |
1227 | ucnv_getNonSurrogateUnicodeSet, |
1228 | |
1229 | nullptr, |
1230 | nullptr |
1231 | }; |
1232 | |
1233 | /* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianness of UTF-32 */ |
1234 | static const UConverterStaticData _UTF32StaticData = { |
1235 | sizeof(UConverterStaticData), |
1236 | "UTF-32" , |
1237 | 1236, |
1238 | UCNV_IBM, UCNV_UTF32, 4, 4, |
1239 | #if U_IS_BIG_ENDIAN |
1240 | { 0, 0, 0xff, 0xfd }, 4, |
1241 | #else |
1242 | { 0xfd, 0xff, 0, 0 }, 4, |
1243 | #endif |
1244 | false, false, |
1245 | 0, |
1246 | 0, |
1247 | { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
1248 | }; |
1249 | |
1250 | const UConverterSharedData _UTF32Data = |
1251 | UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32StaticData, &_UTF32Impl); |
1252 | |
1253 | #endif |
1254 | |