1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * |
6 | * Copyright (C) 2001-2012, International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ******************************************************************************* |
10 | * file name: ustr_wcs.cpp |
11 | * encoding: UTF-8 |
12 | * tab size: 8 (not used) |
13 | * indentation:4 |
14 | * |
15 | * created on: 2004sep07 |
16 | * created by: Markus W. Scherer |
17 | * |
18 | * u_strToWCS() and u_strFromWCS() functions |
19 | * moved here from ustrtrns.c for better modularization. |
20 | */ |
21 | |
22 | #include "unicode/utypes.h" |
23 | #include "unicode/ustring.h" |
24 | #include "cstring.h" |
25 | #include "cwchar.h" |
26 | #include "cmemory.h" |
27 | #include "ustr_imp.h" |
28 | #include "ustr_cnv.h" |
29 | |
30 | #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION |
31 | |
32 | #define _STACK_BUFFER_CAPACITY 1000 |
33 | #define _BUFFER_CAPACITY_MULTIPLIER 2 |
34 | |
35 | #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) |
36 | // TODO: We should use CharString for char buffers and UnicodeString for char16_t buffers. |
37 | // Then we could change this to work only with wchar_t buffers. |
38 | static inline UBool |
39 | u_growAnyBufferFromStatic(void *context, |
40 | void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, |
41 | int32_t length, int32_t size) { |
42 | // Use char* not void* to avoid the compiler's strict-aliasing assumptions |
43 | // and related warnings. |
44 | char *newBuffer=(char *)uprv_malloc(reqCapacity*size); |
45 | if(newBuffer!=nullptr) { |
46 | if(length>0) { |
47 | uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size); |
48 | } |
49 | *pCapacity=reqCapacity; |
50 | } else { |
51 | *pCapacity=0; |
52 | } |
53 | |
54 | /* release the old pBuffer if it was not statically allocated */ |
55 | if(*pBuffer!=(char *)context) { |
56 | uprv_free(*pBuffer); |
57 | } |
58 | |
59 | *pBuffer=newBuffer; |
60 | return (UBool)(newBuffer!=nullptr); |
61 | } |
62 | |
63 | /* helper function */ |
64 | static wchar_t* |
65 | _strToWCS(wchar_t *dest, |
66 | int32_t destCapacity, |
67 | int32_t *pDestLength, |
68 | const char16_t *src, |
69 | int32_t srcLength, |
70 | UErrorCode *pErrorCode){ |
71 | |
72 | char stackBuffer [_STACK_BUFFER_CAPACITY]; |
73 | char* tempBuf = stackBuffer; |
74 | int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; |
75 | char* tempBufLimit = stackBuffer + tempBufCapacity; |
76 | UConverter* conv = nullptr; |
77 | char* saveBuf = tempBuf; |
78 | wchar_t* intTarget=nullptr; |
79 | int32_t intTargetCapacity=0; |
80 | int count=0,retVal=0; |
81 | |
82 | const char16_t *pSrcLimit =nullptr; |
83 | const char16_t *pSrc = src; |
84 | |
85 | conv = u_getDefaultConverter(pErrorCode); |
86 | |
87 | if(U_FAILURE(*pErrorCode)){ |
88 | return nullptr; |
89 | } |
90 | |
91 | if(srcLength == -1){ |
92 | srcLength = u_strlen(pSrc); |
93 | } |
94 | |
95 | pSrcLimit = pSrc + srcLength; |
96 | |
97 | for(;;) { |
98 | /* reset the error state */ |
99 | *pErrorCode = U_ZERO_ERROR; |
100 | |
101 | /* convert to chars using default converter */ |
102 | ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,nullptr,(UBool)(pSrc==pSrcLimit),pErrorCode); |
103 | count =(tempBuf - saveBuf); |
104 | |
105 | /* This should rarely occur */ |
106 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ |
107 | tempBuf = saveBuf; |
108 | |
109 | /* we don't have enough room on the stack grow the buffer */ |
110 | int32_t newCapacity = 2 * srcLength; |
111 | if(newCapacity <= tempBufCapacity) { |
112 | newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; |
113 | } |
114 | if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, |
115 | newCapacity, count, 1)) { |
116 | goto cleanup; |
117 | } |
118 | |
119 | saveBuf = tempBuf; |
120 | tempBufLimit = tempBuf + tempBufCapacity; |
121 | tempBuf = tempBuf + count; |
122 | |
123 | } else { |
124 | break; |
125 | } |
126 | } |
127 | |
128 | if(U_FAILURE(*pErrorCode)){ |
129 | goto cleanup; |
130 | } |
131 | |
132 | /* done with conversion null terminate the char buffer */ |
133 | if(count>=tempBufCapacity){ |
134 | tempBuf = saveBuf; |
135 | /* we don't have enough room on the stack grow the buffer */ |
136 | if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, |
137 | count+1, count, 1)) { |
138 | goto cleanup; |
139 | } |
140 | saveBuf = tempBuf; |
141 | } |
142 | |
143 | saveBuf[count]=0; |
144 | |
145 | |
146 | /* allocate more space than required |
147 | * here we assume that every char requires |
148 | * no more than 2 wchar_ts |
149 | */ |
150 | intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; |
151 | intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); |
152 | |
153 | if(intTarget){ |
154 | |
155 | int32_t nulLen = 0; |
156 | int32_t remaining = intTargetCapacity; |
157 | wchar_t* pIntTarget=intTarget; |
158 | tempBuf = saveBuf; |
159 | |
160 | /* now convert the mbs to wcs */ |
161 | for(;;){ |
162 | |
163 | /* we can call the system API since we are sure that |
164 | * there is atleast 1 null in the input |
165 | */ |
166 | retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); |
167 | |
168 | if(retVal==-1){ |
169 | *pErrorCode = U_INVALID_CHAR_FOUND; |
170 | break; |
171 | }else if(retVal== remaining){/* should never occur */ |
172 | int numWritten = (pIntTarget-intTarget); |
173 | u_growAnyBufferFromStatic(nullptr,(void**) &intTarget, |
174 | &intTargetCapacity, |
175 | intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, |
176 | numWritten, |
177 | sizeof(wchar_t)); |
178 | pIntTarget = intTarget; |
179 | remaining=intTargetCapacity; |
180 | |
181 | if(nulLen!=count){ /*there are embedded nulls*/ |
182 | pIntTarget+=numWritten; |
183 | remaining-=numWritten; |
184 | } |
185 | |
186 | }else{ |
187 | int32_t nulVal; |
188 | /*scan for nulls */ |
189 | /* we donot check for limit since tempBuf is null terminated */ |
190 | while(tempBuf[nulLen++] != 0){ |
191 | } |
192 | nulVal = (nulLen < srcLength) ? 1 : 0; |
193 | pIntTarget = pIntTarget + retVal+nulVal; |
194 | remaining -=(retVal+nulVal); |
195 | |
196 | /* check if we have reached the source limit*/ |
197 | if(nulLen>=(count)){ |
198 | break; |
199 | } |
200 | } |
201 | } |
202 | count = (int32_t)(pIntTarget-intTarget); |
203 | |
204 | if(0 < count && count <= destCapacity){ |
205 | uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t)); |
206 | } |
207 | |
208 | if(pDestLength){ |
209 | *pDestLength = count; |
210 | } |
211 | |
212 | /* free the allocated memory */ |
213 | uprv_free(intTarget); |
214 | |
215 | }else{ |
216 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
217 | } |
218 | cleanup: |
219 | /* are we still using stack buffer */ |
220 | if(stackBuffer != saveBuf){ |
221 | uprv_free(saveBuf); |
222 | } |
223 | u_terminateWChars(dest,destCapacity,count,pErrorCode); |
224 | |
225 | u_releaseDefaultConverter(conv); |
226 | |
227 | return dest; |
228 | } |
229 | #endif |
230 | |
231 | U_CAPI wchar_t* U_EXPORT2 |
232 | u_strToWCS(wchar_t *dest, |
233 | int32_t destCapacity, |
234 | int32_t *pDestLength, |
235 | const char16_t *src, |
236 | int32_t srcLength, |
237 | UErrorCode *pErrorCode){ |
238 | |
239 | /* args check */ |
240 | if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)){ |
241 | return nullptr; |
242 | } |
243 | |
244 | if( (src==nullptr && srcLength!=0) || srcLength < -1 || |
245 | (destCapacity<0) || (dest == nullptr && destCapacity > 0) |
246 | ) { |
247 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
248 | return nullptr; |
249 | } |
250 | |
251 | #ifdef U_WCHAR_IS_UTF16 |
252 | /* wchar_t is UTF-16 just do a memcpy */ |
253 | if(srcLength == -1){ |
254 | srcLength = u_strlen(src); |
255 | } |
256 | if(0 < srcLength && srcLength <= destCapacity){ |
257 | u_memcpy((char16_t *)dest, src, srcLength); |
258 | } |
259 | if(pDestLength){ |
260 | *pDestLength = srcLength; |
261 | } |
262 | |
263 | u_terminateUChars((char16_t *)dest,destCapacity,srcLength,pErrorCode); |
264 | |
265 | return dest; |
266 | |
267 | #elif defined U_WCHAR_IS_UTF32 |
268 | |
269 | return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, |
270 | src, srcLength, pErrorCode); |
271 | |
272 | #else |
273 | |
274 | return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); |
275 | |
276 | #endif |
277 | |
278 | } |
279 | |
280 | #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) |
281 | /* helper function */ |
282 | static char16_t* |
283 | _strFromWCS( char16_t *dest, |
284 | int32_t destCapacity, |
285 | int32_t *pDestLength, |
286 | const wchar_t *src, |
287 | int32_t srcLength, |
288 | UErrorCode *pErrorCode) |
289 | { |
290 | int32_t retVal =0, count =0 ; |
291 | UConverter* conv = nullptr; |
292 | char16_t* pTarget = nullptr; |
293 | char16_t* pTargetLimit = nullptr; |
294 | char16_t* target = nullptr; |
295 | |
296 | char16_t uStack [_STACK_BUFFER_CAPACITY]; |
297 | |
298 | wchar_t wStack[_STACK_BUFFER_CAPACITY]; |
299 | wchar_t* pWStack = wStack; |
300 | |
301 | |
302 | char cStack[_STACK_BUFFER_CAPACITY]; |
303 | int32_t cStackCap = _STACK_BUFFER_CAPACITY; |
304 | char* pCSrc=cStack; |
305 | char* pCSave=pCSrc; |
306 | char* pCSrcLimit=nullptr; |
307 | |
308 | const wchar_t* pSrc = src; |
309 | const wchar_t* pSrcLimit = nullptr; |
310 | |
311 | if(srcLength ==-1){ |
312 | /* if the wchar_t source is null terminated we can safely |
313 | * assume that there are no embedded nulls, this is a fast |
314 | * path for null terminated strings. |
315 | */ |
316 | for(;;){ |
317 | /* convert wchars to chars */ |
318 | retVal = uprv_wcstombs(pCSrc,src, cStackCap); |
319 | |
320 | if(retVal == -1){ |
321 | *pErrorCode = U_ILLEGAL_CHAR_FOUND; |
322 | goto cleanup; |
323 | }else if(retVal >= (cStackCap-1)){ |
324 | /* Should rarely occur */ |
325 | u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
326 | cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); |
327 | pCSave = pCSrc; |
328 | }else{ |
329 | /* converted every thing */ |
330 | pCSrc = pCSrc+retVal; |
331 | break; |
332 | } |
333 | } |
334 | |
335 | }else{ |
336 | /* here the source is not null terminated |
337 | * so it may have nulls embedded and we need to |
338 | * do some extra processing |
339 | */ |
340 | int32_t remaining =cStackCap; |
341 | |
342 | pSrcLimit = src + srcLength; |
343 | |
344 | for(;;){ |
345 | int32_t nulLen = 0; |
346 | |
347 | /* find nulls in the string */ |
348 | while(nulLen<srcLength && pSrc[nulLen++]!=0){ |
349 | } |
350 | |
351 | if((pSrc+nulLen) < pSrcLimit){ |
352 | /* check if we have enough room in pCSrc */ |
353 | if(remaining < (nulLen * MB_CUR_MAX)){ |
354 | /* should rarely occur */ |
355 | int32_t len = (pCSrc-pCSave); |
356 | pCSrc = pCSave; |
357 | /* we do not have enough room so grow the buffer*/ |
358 | u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
359 | _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); |
360 | |
361 | pCSave = pCSrc; |
362 | pCSrc = pCSave+len; |
363 | remaining = cStackCap-(pCSrc - pCSave); |
364 | } |
365 | |
366 | /* we have found a null so convert the |
367 | * chunk from beginning of non-null char to null |
368 | */ |
369 | retVal = uprv_wcstombs(pCSrc,pSrc,remaining); |
370 | |
371 | if(retVal==-1){ |
372 | /* an error occurred bail out */ |
373 | *pErrorCode = U_ILLEGAL_CHAR_FOUND; |
374 | goto cleanup; |
375 | } |
376 | |
377 | pCSrc += retVal+1 /* already null terminated */; |
378 | |
379 | pSrc += nulLen; /* skip past the null */ |
380 | srcLength-=nulLen; /* decrement the srcLength */ |
381 | remaining -= (pCSrc-pCSave); |
382 | |
383 | |
384 | }else{ |
385 | /* the source is not null terminated and we are |
386 | * end of source so we copy the source to a temp buffer |
387 | * null terminate it and convert wchar_ts to chars |
388 | */ |
389 | if(nulLen >= _STACK_BUFFER_CAPACITY){ |
390 | /* Should rarely occur */ |
391 | /* allocate new buffer buffer */ |
392 | pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); |
393 | if(pWStack==nullptr){ |
394 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
395 | goto cleanup; |
396 | } |
397 | } |
398 | if(nulLen>0){ |
399 | /* copy the contents to tempStack */ |
400 | uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t)); |
401 | } |
402 | |
403 | /* null terminate the tempBuffer */ |
404 | pWStack[nulLen] =0 ; |
405 | |
406 | if(remaining < (nulLen * MB_CUR_MAX)){ |
407 | /* Should rarely occur */ |
408 | int32_t len = (pCSrc-pCSave); |
409 | pCSrc = pCSave; |
410 | /* we do not have enough room so grow the buffer*/ |
411 | u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
412 | cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); |
413 | |
414 | pCSave = pCSrc; |
415 | pCSrc = pCSave+len; |
416 | remaining = cStackCap-(pCSrc - pCSave); |
417 | } |
418 | /* convert to chars */ |
419 | retVal = uprv_wcstombs(pCSrc,pWStack,remaining); |
420 | |
421 | pCSrc += retVal; |
422 | pSrc += nulLen; |
423 | srcLength-=nulLen; /* decrement the srcLength */ |
424 | break; |
425 | } |
426 | } |
427 | } |
428 | |
429 | /* OK..now we have converted from wchar_ts to chars now |
430 | * convert chars to UChars |
431 | */ |
432 | pCSrcLimit = pCSrc; |
433 | pCSrc = pCSave; |
434 | pTarget = target= dest; |
435 | pTargetLimit = dest + destCapacity; |
436 | |
437 | conv= u_getDefaultConverter(pErrorCode); |
438 | |
439 | if(U_FAILURE(*pErrorCode)|| conv==nullptr){ |
440 | goto cleanup; |
441 | } |
442 | |
443 | for(;;) { |
444 | |
445 | *pErrorCode = U_ZERO_ERROR; |
446 | |
447 | /* convert to stack buffer*/ |
448 | ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,nullptr,(UBool)(pCSrc==pCSrcLimit),pErrorCode); |
449 | |
450 | /* increment count to number written to stack */ |
451 | count+= pTarget - target; |
452 | |
453 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ |
454 | target = uStack; |
455 | pTarget = uStack; |
456 | pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; |
457 | } else { |
458 | break; |
459 | } |
460 | |
461 | } |
462 | |
463 | if(pDestLength){ |
464 | *pDestLength =count; |
465 | } |
466 | |
467 | u_terminateUChars(dest,destCapacity,count,pErrorCode); |
468 | |
469 | cleanup: |
470 | |
471 | if(cStack != pCSave){ |
472 | uprv_free(pCSave); |
473 | } |
474 | |
475 | if(wStack != pWStack){ |
476 | uprv_free(pWStack); |
477 | } |
478 | |
479 | u_releaseDefaultConverter(conv); |
480 | |
481 | return dest; |
482 | } |
483 | #endif |
484 | |
485 | U_CAPI char16_t* U_EXPORT2 |
486 | u_strFromWCS(char16_t *dest, |
487 | int32_t destCapacity, |
488 | int32_t *pDestLength, |
489 | const wchar_t *src, |
490 | int32_t srcLength, |
491 | UErrorCode *pErrorCode) |
492 | { |
493 | |
494 | /* args check */ |
495 | if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)){ |
496 | return nullptr; |
497 | } |
498 | |
499 | if( (src==nullptr && srcLength!=0) || srcLength < -1 || |
500 | (destCapacity<0) || (dest == nullptr && destCapacity > 0) |
501 | ) { |
502 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
503 | return nullptr; |
504 | } |
505 | |
506 | #ifdef U_WCHAR_IS_UTF16 |
507 | /* wchar_t is UTF-16 just do a memcpy */ |
508 | if(srcLength == -1){ |
509 | srcLength = u_strlen((const char16_t *)src); |
510 | } |
511 | if(0 < srcLength && srcLength <= destCapacity){ |
512 | u_memcpy(dest, (const char16_t *)src, srcLength); |
513 | } |
514 | if(pDestLength){ |
515 | *pDestLength = srcLength; |
516 | } |
517 | |
518 | u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); |
519 | |
520 | return dest; |
521 | |
522 | #elif defined U_WCHAR_IS_UTF32 |
523 | |
524 | return u_strFromUTF32(dest, destCapacity, pDestLength, |
525 | (UChar32*)src, srcLength, pErrorCode); |
526 | |
527 | #else |
528 | |
529 | return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); |
530 | |
531 | #endif |
532 | |
533 | } |
534 | |
535 | #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */ |
536 | |