| 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ******************************************************************************* |
| 5 | * |
| 6 | * Copyright (C) 2001-2012, International Business Machines |
| 7 | * Corporation and others. All Rights Reserved. |
| 8 | * |
| 9 | ******************************************************************************* |
| 10 | * file name: ustr_wcs.cpp |
| 11 | * encoding: UTF-8 |
| 12 | * tab size: 8 (not used) |
| 13 | * indentation:4 |
| 14 | * |
| 15 | * created on: 2004sep07 |
| 16 | * created by: Markus W. Scherer |
| 17 | * |
| 18 | * u_strToWCS() and u_strFromWCS() functions |
| 19 | * moved here from ustrtrns.c for better modularization. |
| 20 | */ |
| 21 | |
| 22 | #include "unicode/utypes.h" |
| 23 | #include "unicode/ustring.h" |
| 24 | #include "cstring.h" |
| 25 | #include "cwchar.h" |
| 26 | #include "cmemory.h" |
| 27 | #include "ustr_imp.h" |
| 28 | #include "ustr_cnv.h" |
| 29 | |
| 30 | #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION |
| 31 | |
| 32 | #define _STACK_BUFFER_CAPACITY 1000 |
| 33 | #define _BUFFER_CAPACITY_MULTIPLIER 2 |
| 34 | |
| 35 | #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) |
| 36 | // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers. |
| 37 | // Then we could change this to work only with wchar_t buffers. |
| 38 | static inline UBool |
| 39 | u_growAnyBufferFromStatic(void *context, |
| 40 | void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, |
| 41 | int32_t length, int32_t size) { |
| 42 | // Use char* not void* to avoid the compiler's strict-aliasing assumptions |
| 43 | // and related warnings. |
| 44 | char *newBuffer=(char *)uprv_malloc(reqCapacity*size); |
| 45 | if(newBuffer!=NULL) { |
| 46 | if(length>0) { |
| 47 | uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size); |
| 48 | } |
| 49 | *pCapacity=reqCapacity; |
| 50 | } else { |
| 51 | *pCapacity=0; |
| 52 | } |
| 53 | |
| 54 | /* release the old pBuffer if it was not statically allocated */ |
| 55 | if(*pBuffer!=(char *)context) { |
| 56 | uprv_free(*pBuffer); |
| 57 | } |
| 58 | |
| 59 | *pBuffer=newBuffer; |
| 60 | return (UBool)(newBuffer!=NULL); |
| 61 | } |
| 62 | |
| 63 | /* helper function */ |
| 64 | static wchar_t* |
| 65 | _strToWCS(wchar_t *dest, |
| 66 | int32_t destCapacity, |
| 67 | int32_t *pDestLength, |
| 68 | const UChar *src, |
| 69 | int32_t srcLength, |
| 70 | UErrorCode *pErrorCode){ |
| 71 | |
| 72 | char stackBuffer [_STACK_BUFFER_CAPACITY]; |
| 73 | char* tempBuf = stackBuffer; |
| 74 | int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; |
| 75 | char* tempBufLimit = stackBuffer + tempBufCapacity; |
| 76 | UConverter* conv = NULL; |
| 77 | char* saveBuf = tempBuf; |
| 78 | wchar_t* intTarget=NULL; |
| 79 | int32_t intTargetCapacity=0; |
| 80 | int count=0,retVal=0; |
| 81 | |
| 82 | const UChar *pSrcLimit =NULL; |
| 83 | const UChar *pSrc = src; |
| 84 | |
| 85 | conv = u_getDefaultConverter(pErrorCode); |
| 86 | |
| 87 | if(U_FAILURE(*pErrorCode)){ |
| 88 | return NULL; |
| 89 | } |
| 90 | |
| 91 | if(srcLength == -1){ |
| 92 | srcLength = u_strlen(pSrc); |
| 93 | } |
| 94 | |
| 95 | pSrcLimit = pSrc + srcLength; |
| 96 | |
| 97 | for(;;) { |
| 98 | /* reset the error state */ |
| 99 | *pErrorCode = U_ZERO_ERROR; |
| 100 | |
| 101 | /* convert to chars using default converter */ |
| 102 | ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode); |
| 103 | count =(tempBuf - saveBuf); |
| 104 | |
| 105 | /* This should rarely occur */ |
| 106 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ |
| 107 | tempBuf = saveBuf; |
| 108 | |
| 109 | /* we dont have enough room on the stack grow the buffer */ |
| 110 | int32_t newCapacity = 2 * srcLength; |
| 111 | if(newCapacity <= tempBufCapacity) { |
| 112 | newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; |
| 113 | } |
| 114 | if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, |
| 115 | newCapacity, count, 1)) { |
| 116 | goto cleanup; |
| 117 | } |
| 118 | |
| 119 | saveBuf = tempBuf; |
| 120 | tempBufLimit = tempBuf + tempBufCapacity; |
| 121 | tempBuf = tempBuf + count; |
| 122 | |
| 123 | } else { |
| 124 | break; |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | if(U_FAILURE(*pErrorCode)){ |
| 129 | goto cleanup; |
| 130 | } |
| 131 | |
| 132 | /* done with conversion null terminate the char buffer */ |
| 133 | if(count>=tempBufCapacity){ |
| 134 | tempBuf = saveBuf; |
| 135 | /* we dont have enough room on the stack grow the buffer */ |
| 136 | if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, |
| 137 | count+1, count, 1)) { |
| 138 | goto cleanup; |
| 139 | } |
| 140 | saveBuf = tempBuf; |
| 141 | } |
| 142 | |
| 143 | saveBuf[count]=0; |
| 144 | |
| 145 | |
| 146 | /* allocate more space than required |
| 147 | * here we assume that every char requires |
| 148 | * no more than 2 wchar_ts |
| 149 | */ |
| 150 | intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; |
| 151 | intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); |
| 152 | |
| 153 | if(intTarget){ |
| 154 | |
| 155 | int32_t nulLen = 0; |
| 156 | int32_t remaining = intTargetCapacity; |
| 157 | wchar_t* pIntTarget=intTarget; |
| 158 | tempBuf = saveBuf; |
| 159 | |
| 160 | /* now convert the mbs to wcs */ |
| 161 | for(;;){ |
| 162 | |
| 163 | /* we can call the system API since we are sure that |
| 164 | * there is atleast 1 null in the input |
| 165 | */ |
| 166 | retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); |
| 167 | |
| 168 | if(retVal==-1){ |
| 169 | *pErrorCode = U_INVALID_CHAR_FOUND; |
| 170 | break; |
| 171 | }else if(retVal== remaining){/* should never occur */ |
| 172 | int numWritten = (pIntTarget-intTarget); |
| 173 | u_growAnyBufferFromStatic(NULL,(void**) &intTarget, |
| 174 | &intTargetCapacity, |
| 175 | intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, |
| 176 | numWritten, |
| 177 | sizeof(wchar_t)); |
| 178 | pIntTarget = intTarget; |
| 179 | remaining=intTargetCapacity; |
| 180 | |
| 181 | if(nulLen!=count){ /*there are embedded nulls*/ |
| 182 | pIntTarget+=numWritten; |
| 183 | remaining-=numWritten; |
| 184 | } |
| 185 | |
| 186 | }else{ |
| 187 | int32_t nulVal; |
| 188 | /*scan for nulls */ |
| 189 | /* we donot check for limit since tempBuf is null terminated */ |
| 190 | while(tempBuf[nulLen++] != 0){ |
| 191 | } |
| 192 | nulVal = (nulLen < srcLength) ? 1 : 0; |
| 193 | pIntTarget = pIntTarget + retVal+nulVal; |
| 194 | remaining -=(retVal+nulVal); |
| 195 | |
| 196 | /* check if we have reached the source limit*/ |
| 197 | if(nulLen>=(count)){ |
| 198 | break; |
| 199 | } |
| 200 | } |
| 201 | } |
| 202 | count = (int32_t)(pIntTarget-intTarget); |
| 203 | |
| 204 | if(0 < count && count <= destCapacity){ |
| 205 | uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t)); |
| 206 | } |
| 207 | |
| 208 | if(pDestLength){ |
| 209 | *pDestLength = count; |
| 210 | } |
| 211 | |
| 212 | /* free the allocated memory */ |
| 213 | uprv_free(intTarget); |
| 214 | |
| 215 | }else{ |
| 216 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
| 217 | } |
| 218 | cleanup: |
| 219 | /* are we still using stack buffer */ |
| 220 | if(stackBuffer != saveBuf){ |
| 221 | uprv_free(saveBuf); |
| 222 | } |
| 223 | u_terminateWChars(dest,destCapacity,count,pErrorCode); |
| 224 | |
| 225 | u_releaseDefaultConverter(conv); |
| 226 | |
| 227 | return dest; |
| 228 | } |
| 229 | #endif |
| 230 | |
| 231 | U_CAPI wchar_t* U_EXPORT2 |
| 232 | u_strToWCS(wchar_t *dest, |
| 233 | int32_t destCapacity, |
| 234 | int32_t *pDestLength, |
| 235 | const UChar *src, |
| 236 | int32_t srcLength, |
| 237 | UErrorCode *pErrorCode){ |
| 238 | |
| 239 | /* args check */ |
| 240 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ |
| 241 | return NULL; |
| 242 | } |
| 243 | |
| 244 | if( (src==NULL && srcLength!=0) || srcLength < -1 || |
| 245 | (destCapacity<0) || (dest == NULL && destCapacity > 0) |
| 246 | ) { |
| 247 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 248 | return NULL; |
| 249 | } |
| 250 | |
| 251 | #ifdef U_WCHAR_IS_UTF16 |
| 252 | /* wchar_t is UTF-16 just do a memcpy */ |
| 253 | if(srcLength == -1){ |
| 254 | srcLength = u_strlen(src); |
| 255 | } |
| 256 | if(0 < srcLength && srcLength <= destCapacity){ |
| 257 | u_memcpy((UChar *)dest, src, srcLength); |
| 258 | } |
| 259 | if(pDestLength){ |
| 260 | *pDestLength = srcLength; |
| 261 | } |
| 262 | |
| 263 | u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode); |
| 264 | |
| 265 | return dest; |
| 266 | |
| 267 | #elif defined U_WCHAR_IS_UTF32 |
| 268 | |
| 269 | return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, |
| 270 | src, srcLength, pErrorCode); |
| 271 | |
| 272 | #else |
| 273 | |
| 274 | return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); |
| 275 | |
| 276 | #endif |
| 277 | |
| 278 | } |
| 279 | |
| 280 | #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) |
| 281 | /* helper function */ |
| 282 | static UChar* |
| 283 | _strFromWCS( UChar *dest, |
| 284 | int32_t destCapacity, |
| 285 | int32_t *pDestLength, |
| 286 | const wchar_t *src, |
| 287 | int32_t srcLength, |
| 288 | UErrorCode *pErrorCode) |
| 289 | { |
| 290 | int32_t retVal =0, count =0 ; |
| 291 | UConverter* conv = NULL; |
| 292 | UChar* pTarget = NULL; |
| 293 | UChar* pTargetLimit = NULL; |
| 294 | UChar* target = NULL; |
| 295 | |
| 296 | UChar uStack [_STACK_BUFFER_CAPACITY]; |
| 297 | |
| 298 | wchar_t wStack[_STACK_BUFFER_CAPACITY]; |
| 299 | wchar_t* pWStack = wStack; |
| 300 | |
| 301 | |
| 302 | char cStack[_STACK_BUFFER_CAPACITY]; |
| 303 | int32_t cStackCap = _STACK_BUFFER_CAPACITY; |
| 304 | char* pCSrc=cStack; |
| 305 | char* pCSave=pCSrc; |
| 306 | char* pCSrcLimit=NULL; |
| 307 | |
| 308 | const wchar_t* pSrc = src; |
| 309 | const wchar_t* pSrcLimit = NULL; |
| 310 | |
| 311 | if(srcLength ==-1){ |
| 312 | /* if the wchar_t source is null terminated we can safely |
| 313 | * assume that there are no embedded nulls, this is a fast |
| 314 | * path for null terminated strings. |
| 315 | */ |
| 316 | for(;;){ |
| 317 | /* convert wchars to chars */ |
| 318 | retVal = uprv_wcstombs(pCSrc,src, cStackCap); |
| 319 | |
| 320 | if(retVal == -1){ |
| 321 | *pErrorCode = U_ILLEGAL_CHAR_FOUND; |
| 322 | goto cleanup; |
| 323 | }else if(retVal >= (cStackCap-1)){ |
| 324 | /* Should rarely occur */ |
| 325 | u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
| 326 | cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); |
| 327 | pCSave = pCSrc; |
| 328 | }else{ |
| 329 | /* converted every thing */ |
| 330 | pCSrc = pCSrc+retVal; |
| 331 | break; |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | }else{ |
| 336 | /* here the source is not null terminated |
| 337 | * so it may have nulls embeded and we need to |
| 338 | * do some extra processing |
| 339 | */ |
| 340 | int32_t remaining =cStackCap; |
| 341 | |
| 342 | pSrcLimit = src + srcLength; |
| 343 | |
| 344 | for(;;){ |
| 345 | int32_t nulLen = 0; |
| 346 | |
| 347 | /* find nulls in the string */ |
| 348 | while(nulLen<srcLength && pSrc[nulLen++]!=0){ |
| 349 | } |
| 350 | |
| 351 | if((pSrc+nulLen) < pSrcLimit){ |
| 352 | /* check if we have enough room in pCSrc */ |
| 353 | if(remaining < (nulLen * MB_CUR_MAX)){ |
| 354 | /* should rarely occur */ |
| 355 | int32_t len = (pCSrc-pCSave); |
| 356 | pCSrc = pCSave; |
| 357 | /* we do not have enough room so grow the buffer*/ |
| 358 | u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
| 359 | _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); |
| 360 | |
| 361 | pCSave = pCSrc; |
| 362 | pCSrc = pCSave+len; |
| 363 | remaining = cStackCap-(pCSrc - pCSave); |
| 364 | } |
| 365 | |
| 366 | /* we have found a null so convert the |
| 367 | * chunk from begining of non-null char to null |
| 368 | */ |
| 369 | retVal = uprv_wcstombs(pCSrc,pSrc,remaining); |
| 370 | |
| 371 | if(retVal==-1){ |
| 372 | /* an error occurred bail out */ |
| 373 | *pErrorCode = U_ILLEGAL_CHAR_FOUND; |
| 374 | goto cleanup; |
| 375 | } |
| 376 | |
| 377 | pCSrc += retVal+1 /* already null terminated */; |
| 378 | |
| 379 | pSrc += nulLen; /* skip past the null */ |
| 380 | srcLength-=nulLen; /* decrement the srcLength */ |
| 381 | remaining -= (pCSrc-pCSave); |
| 382 | |
| 383 | |
| 384 | }else{ |
| 385 | /* the source is not null terminated and we are |
| 386 | * end of source so we copy the source to a temp buffer |
| 387 | * null terminate it and convert wchar_ts to chars |
| 388 | */ |
| 389 | if(nulLen >= _STACK_BUFFER_CAPACITY){ |
| 390 | /* Should rarely occcur */ |
| 391 | /* allocate new buffer buffer */ |
| 392 | pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); |
| 393 | if(pWStack==NULL){ |
| 394 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
| 395 | goto cleanup; |
| 396 | } |
| 397 | } |
| 398 | if(nulLen>0){ |
| 399 | /* copy the contents to tempStack */ |
| 400 | uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t)); |
| 401 | } |
| 402 | |
| 403 | /* null terminate the tempBuffer */ |
| 404 | pWStack[nulLen] =0 ; |
| 405 | |
| 406 | if(remaining < (nulLen * MB_CUR_MAX)){ |
| 407 | /* Should rarely occur */ |
| 408 | int32_t len = (pCSrc-pCSave); |
| 409 | pCSrc = pCSave; |
| 410 | /* we do not have enough room so grow the buffer*/ |
| 411 | u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
| 412 | cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); |
| 413 | |
| 414 | pCSave = pCSrc; |
| 415 | pCSrc = pCSave+len; |
| 416 | remaining = cStackCap-(pCSrc - pCSave); |
| 417 | } |
| 418 | /* convert to chars */ |
| 419 | retVal = uprv_wcstombs(pCSrc,pWStack,remaining); |
| 420 | |
| 421 | pCSrc += retVal; |
| 422 | pSrc += nulLen; |
| 423 | srcLength-=nulLen; /* decrement the srcLength */ |
| 424 | break; |
| 425 | } |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | /* OK..now we have converted from wchar_ts to chars now |
| 430 | * convert chars to UChars |
| 431 | */ |
| 432 | pCSrcLimit = pCSrc; |
| 433 | pCSrc = pCSave; |
| 434 | pTarget = target= dest; |
| 435 | pTargetLimit = dest + destCapacity; |
| 436 | |
| 437 | conv= u_getDefaultConverter(pErrorCode); |
| 438 | |
| 439 | if(U_FAILURE(*pErrorCode)|| conv==NULL){ |
| 440 | goto cleanup; |
| 441 | } |
| 442 | |
| 443 | for(;;) { |
| 444 | |
| 445 | *pErrorCode = U_ZERO_ERROR; |
| 446 | |
| 447 | /* convert to stack buffer*/ |
| 448 | ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode); |
| 449 | |
| 450 | /* increment count to number written to stack */ |
| 451 | count+= pTarget - target; |
| 452 | |
| 453 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ |
| 454 | target = uStack; |
| 455 | pTarget = uStack; |
| 456 | pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; |
| 457 | } else { |
| 458 | break; |
| 459 | } |
| 460 | |
| 461 | } |
| 462 | |
| 463 | if(pDestLength){ |
| 464 | *pDestLength =count; |
| 465 | } |
| 466 | |
| 467 | u_terminateUChars(dest,destCapacity,count,pErrorCode); |
| 468 | |
| 469 | cleanup: |
| 470 | |
| 471 | if(cStack != pCSave){ |
| 472 | uprv_free(pCSave); |
| 473 | } |
| 474 | |
| 475 | if(wStack != pWStack){ |
| 476 | uprv_free(pWStack); |
| 477 | } |
| 478 | |
| 479 | u_releaseDefaultConverter(conv); |
| 480 | |
| 481 | return dest; |
| 482 | } |
| 483 | #endif |
| 484 | |
| 485 | U_CAPI UChar* U_EXPORT2 |
| 486 | u_strFromWCS(UChar *dest, |
| 487 | int32_t destCapacity, |
| 488 | int32_t *pDestLength, |
| 489 | const wchar_t *src, |
| 490 | int32_t srcLength, |
| 491 | UErrorCode *pErrorCode) |
| 492 | { |
| 493 | |
| 494 | /* args check */ |
| 495 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ |
| 496 | return NULL; |
| 497 | } |
| 498 | |
| 499 | if( (src==NULL && srcLength!=0) || srcLength < -1 || |
| 500 | (destCapacity<0) || (dest == NULL && destCapacity > 0) |
| 501 | ) { |
| 502 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 503 | return NULL; |
| 504 | } |
| 505 | |
| 506 | #ifdef U_WCHAR_IS_UTF16 |
| 507 | /* wchar_t is UTF-16 just do a memcpy */ |
| 508 | if(srcLength == -1){ |
| 509 | srcLength = u_strlen((const UChar *)src); |
| 510 | } |
| 511 | if(0 < srcLength && srcLength <= destCapacity){ |
| 512 | u_memcpy(dest, (const UChar *)src, srcLength); |
| 513 | } |
| 514 | if(pDestLength){ |
| 515 | *pDestLength = srcLength; |
| 516 | } |
| 517 | |
| 518 | u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); |
| 519 | |
| 520 | return dest; |
| 521 | |
| 522 | #elif defined U_WCHAR_IS_UTF32 |
| 523 | |
| 524 | return u_strFromUTF32(dest, destCapacity, pDestLength, |
| 525 | (UChar32*)src, srcLength, pErrorCode); |
| 526 | |
| 527 | #else |
| 528 | |
| 529 | return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); |
| 530 | |
| 531 | #endif |
| 532 | |
| 533 | } |
| 534 | |
| 535 | #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */ |
| 536 | |