| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | |
| 5 | /*++ |
| 6 | |
| 7 | |
| 8 | |
| 9 | Module Name: |
| 10 | |
| 11 | unicode.cpp |
| 12 | |
| 13 | Abstract: |
| 14 | |
| 15 | Implementation of all functions related to Unicode support |
| 16 | |
| 17 | Revision History: |
| 18 | |
| 19 | |
| 20 | |
| 21 | --*/ |
| 22 | |
| 23 | #include "pal/thread.hpp" |
| 24 | |
| 25 | #include "pal/palinternal.h" |
| 26 | #include "pal/unicode_data.h" |
| 27 | #include "pal/dbgmsg.h" |
| 28 | #include "pal/file.h" |
| 29 | #include "pal/utf8.h" |
| 30 | #include "pal/locale.h" |
| 31 | #include "pal/cruntime.h" |
| 32 | #include "pal/stackstring.hpp" |
| 33 | |
| 34 | #if !(HAVE_PTHREAD_RWLOCK_T || HAVE_COREFOUNDATION) |
| 35 | #error Either pthread rwlocks or Core Foundation are required for Unicode support |
| 36 | #endif /* !(HAVE_PTHREAD_RWLOCK_T || HAVE_COREFOUNDATION) */ |
| 37 | |
| 38 | #include <pthread.h> |
| 39 | #include <locale.h> |
| 40 | #if HAVE_LIBINTL_H |
| 41 | #include <libintl.h> |
| 42 | #endif // HAVE_LIBINTL_H |
| 43 | #include <errno.h> |
| 44 | #if HAVE_COREFOUNDATION |
| 45 | #include <CoreFoundation/CoreFoundation.h> |
| 46 | #endif // HAVE_COREFOUNDATION |
| 47 | |
| 48 | #include <debugmacrosext.h> |
| 49 | |
| 50 | using namespace CorUnix; |
| 51 | |
| 52 | SET_DEFAULT_DEBUG_CHANNEL(UNICODE); |
| 53 | |
| 54 | #if HAVE_COREFOUNDATION |
| 55 | |
| 56 | static CP_MAPPING CP_TO_NATIVE_TABLE[] = { |
| 57 | { 65001, kCFStringEncodingUTF8, 4, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| 58 | { 1252, kCFStringEncodingWindowsLatin1, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| 59 | { 1251, kCFStringEncodingWindowsCyrillic, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| 60 | { 1253, kCFStringEncodingWindowsGreek, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| 61 | { 1254, kCFStringEncodingWindowsLatin5, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| 62 | { 1258, kCFStringEncodingWindowsVietnamese, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| 63 | { 932, kCFStringEncodingDOSJapanese, 2, { 129, 159, 224, 252, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| 64 | { 949, kCFStringEncodingDOSKorean, 2, { 129, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| 65 | { 950, kCFStringEncodingDOSChineseTrad, 2, { 129, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } |
| 66 | }; |
| 67 | |
| 68 | #else // HAVE_COREFOUNDATION |
| 69 | |
| 70 | static const CP_MAPPING CP_TO_NATIVE_TABLE[] = { |
| 71 | { 65001, "utf8" , 4, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } |
| 72 | }; |
| 73 | |
| 74 | #endif // HAVE_COREFOUNDATION |
| 75 | |
| 76 | // We hardcode the system's default codepage to be UTF-8. |
| 77 | // There are several reasons for this: |
| 78 | // - On OSX, HFS+ file names are encoded as UTF-8. |
| 79 | // - On OSX, When writing strings to the console, the Terminal.app will interpret them as UTF-8. |
| 80 | // - We want Ansi marshalling to mean marshal to UTF-8 on Mac and Linux |
| 81 | static const UINT PAL_ACP = 65001; |
| 82 | |
| 83 | #if !HAVE_COREFOUNDATION |
| 84 | /*++ |
| 85 | Function: |
| 86 | UnicodeDataComp |
| 87 | This is the comparison function used by the bsearch function to search |
| 88 | for unicode characters in the UnicodeData array. |
| 89 | |
| 90 | Parameter: |
| 91 | pnKey |
| 92 | The unicode character value to search for. |
| 93 | elem |
| 94 | A pointer to a UnicodeDataRec. |
| 95 | |
| 96 | Return value: |
| 97 | <0 if pnKey < elem->nUnicodeValue |
| 98 | 0 if pnKey == elem->nUnicodeValue |
| 99 | >0 if pnKey > elem->nUnicodeValue |
| 100 | --*/ |
| 101 | static int UnicodeDataComp(const void *pnKey, const void *elem) |
| 102 | { |
| 103 | WCHAR uValue = ((UnicodeDataRec*)elem)->nUnicodeValue; |
| 104 | WORD rangeValue = ((UnicodeDataRec*)elem)->rangeValue; |
| 105 | |
| 106 | if (*((INT*)pnKey) < uValue) |
| 107 | { |
| 108 | return -1; |
| 109 | } |
| 110 | else |
| 111 | { |
| 112 | if (*((INT*)pnKey) > (uValue + rangeValue)) |
| 113 | { |
| 114 | return 1; |
| 115 | } |
| 116 | else |
| 117 | { |
| 118 | return 0; |
| 119 | } |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | /*++ |
| 124 | Function: |
| 125 | GetUnicodeData |
| 126 | This function is used to get information about a Unicode character. |
| 127 | |
| 128 | Parameters: |
| 129 | nUnicodeValue |
| 130 | The numeric value of the Unicode character to get information about. |
| 131 | pDataRec |
| 132 | The UnicodeDataRec to fill in with the data for the Unicode character. |
| 133 | |
| 134 | Return value: |
| 135 | TRUE if the Unicode character was found. |
| 136 | |
| 137 | --*/ |
| 138 | BOOL GetUnicodeData(INT nUnicodeValue, UnicodeDataRec *pDataRec) |
| 139 | { |
| 140 | BOOL bRet; |
| 141 | if (nUnicodeValue <= UNICODE_DATA_DIRECT_ACCESS) |
| 142 | { |
| 143 | *pDataRec = UnicodeData[nUnicodeValue]; |
| 144 | bRet = TRUE; |
| 145 | } |
| 146 | else |
| 147 | { |
| 148 | UnicodeDataRec *dataRec; |
| 149 | INT nNumOfChars = UNICODE_DATA_SIZE; |
| 150 | dataRec = (UnicodeDataRec *) bsearch(&nUnicodeValue, UnicodeData, nNumOfChars, |
| 151 | sizeof(UnicodeDataRec), UnicodeDataComp); |
| 152 | if (dataRec == NULL) |
| 153 | { |
| 154 | bRet = FALSE; |
| 155 | } |
| 156 | else |
| 157 | { |
| 158 | bRet = TRUE; |
| 159 | *pDataRec = *dataRec; |
| 160 | } |
| 161 | } |
| 162 | return bRet; |
| 163 | } |
| 164 | #endif /* !HAVE_COREFOUNDATION */ |
| 165 | |
| 166 | /*++ |
| 167 | Function: |
| 168 | CODEPAGEGetData |
| 169 | |
| 170 | IN UINT CodePage - The code page the caller |
| 171 | is attempting to retrieve data on. |
| 172 | |
| 173 | Returns a pointer to structure, NULL otherwise. |
| 174 | --*/ |
| 175 | const CP_MAPPING * |
| 176 | CODEPAGEGetData( IN UINT CodePage ) |
| 177 | { |
| 178 | UINT nSize = sizeof( CP_TO_NATIVE_TABLE ) / sizeof( CP_TO_NATIVE_TABLE[ 0 ] ); |
| 179 | UINT nIndex = 0; |
| 180 | |
| 181 | if ( CP_ACP == CodePage ) |
| 182 | { |
| 183 | CodePage = PAL_ACP; |
| 184 | } |
| 185 | |
| 186 | /* checking if the CodePage is ACP and returning true if so */ |
| 187 | while (nIndex < nSize) |
| 188 | { |
| 189 | if ( ( CP_TO_NATIVE_TABLE[ nIndex ] ).nCodePage == CodePage ) |
| 190 | { |
| 191 | return &(CP_TO_NATIVE_TABLE[ nIndex ]); |
| 192 | } |
| 193 | nIndex++; |
| 194 | } |
| 195 | return NULL; |
| 196 | } |
| 197 | |
| 198 | #if HAVE_COREFOUNDATION |
| 199 | /*++ |
| 200 | Function : |
| 201 | |
| 202 | CODEPAGECPToCFStringEncoding - Gets the CFStringEncoding for |
| 203 | the given codepage. |
| 204 | |
| 205 | Returns the CFStringEncoding for the given codepage. |
| 206 | --*/ |
| 207 | CFStringEncoding CODEPAGECPToCFStringEncoding(UINT codepage) |
| 208 | { |
| 209 | const CP_MAPPING *cp_mapping = CODEPAGEGetData(codepage); |
| 210 | if (cp_mapping == NULL) |
| 211 | { |
| 212 | return kCFStringEncodingInvalidId; |
| 213 | } |
| 214 | else |
| 215 | { |
| 216 | return cp_mapping->nCFEncoding; |
| 217 | } |
| 218 | } |
| 219 | #endif // HAVE_COREFOUNDATION |
| 220 | |
| 221 | /*++ |
| 222 | Function: |
| 223 | CharNextA |
| 224 | |
| 225 | Parameters |
| 226 | |
| 227 | lpsz |
| 228 | [in] Pointer to a character in a null-terminated string. |
| 229 | |
| 230 | Return Values |
| 231 | |
| 232 | A pointer to the next character in the string, or to the terminating null character if at the end of the string, indicates success. |
| 233 | |
| 234 | If lpsz points to the terminating null character, the return value is equal to lpsz. |
| 235 | |
| 236 | See MSDN doc. |
| 237 | --*/ |
| 238 | LPSTR |
| 239 | PALAPI |
| 240 | CharNextA( |
| 241 | IN LPCSTR lpsz) |
| 242 | { |
| 243 | LPSTR pRet; |
| 244 | PERF_ENTRY(CharNextA); |
| 245 | ENTRY("CharNextA (lpsz=%p (%s))\n" , lpsz?lpsz:NULL, lpsz?lpsz:NULL); |
| 246 | |
| 247 | pRet = CharNextExA(GetACP(), lpsz, 0); |
| 248 | |
| 249 | LOGEXIT ("CharNextA returns LPSTR %p\n" , pRet); |
| 250 | PERF_EXIT(CharNextA); |
| 251 | return pRet; |
| 252 | } |
| 253 | |
| 254 | |
| 255 | /*++ |
| 256 | Function: |
| 257 | CharNextExA |
| 258 | |
| 259 | See MSDN doc. |
| 260 | --*/ |
| 261 | LPSTR |
| 262 | PALAPI |
| 263 | CharNextExA( |
| 264 | IN WORD CodePage, |
| 265 | IN LPCSTR lpCurrentChar, |
| 266 | IN DWORD dwFlags) |
| 267 | { |
| 268 | LPSTR pRet = (LPSTR) lpCurrentChar; |
| 269 | |
| 270 | PERF_ENTRY(CharNextExA); |
| 271 | ENTRY("CharNextExA (CodePage=%hu, lpCurrentChar=%p (%s), dwFlags=%#x)\n" , |
| 272 | CodePage, lpCurrentChar?lpCurrentChar:"NULL" , lpCurrentChar?lpCurrentChar:"NULL" , dwFlags); |
| 273 | |
| 274 | if ((lpCurrentChar != NULL) && (*lpCurrentChar != 0)) |
| 275 | { |
| 276 | pRet += (*(lpCurrentChar+1) != 0) && |
| 277 | IsDBCSLeadByteEx(CodePage, *lpCurrentChar) ? 2 : 1; |
| 278 | } |
| 279 | |
| 280 | LOGEXIT("CharNextExA returns LPSTR:%p (%s)\n" , pRet, pRet); |
| 281 | PERF_EXIT(CharNextExA); |
| 282 | return pRet; |
| 283 | } |
| 284 | |
| 285 | |
| 286 | /*++ |
| 287 | Function: |
| 288 | GetConsoleOutputCP |
| 289 | |
| 290 | See MSDN doc. |
| 291 | --*/ |
| 292 | UINT |
| 293 | PALAPI |
| 294 | GetConsoleOutputCP( |
| 295 | VOID) |
| 296 | { |
| 297 | UINT nRet = 0; |
| 298 | PERF_ENTRY(GetConsoleOutputCP); |
| 299 | ENTRY("GetConsoleOutputCP()\n" ); |
| 300 | nRet = GetACP(); |
| 301 | LOGEXIT("GetConsoleOutputCP returns UINT %d \n" , nRet ); |
| 302 | PERF_EXIT(GetConsoleOutputCP); |
| 303 | return nRet; |
| 304 | } |
| 305 | |
| 306 | |
| 307 | /*++ |
| 308 | Function: |
| 309 | IsValidCodePage |
| 310 | |
| 311 | See MSDN doc. |
| 312 | |
| 313 | Notes : |
| 314 | "pseudo code pages", like CP_ACP, aren't considered 'valid' in this context. |
| 315 | CP_UTF7 and CP_UTF8, however, *are* considered valid code pages, even though |
| 316 | MSDN fails to mention them in the IsValidCodePage entry. |
| 317 | Note : CP_UTF7 support isn't required for CoreCLR |
| 318 | --*/ |
| 319 | BOOL |
| 320 | PALAPI |
| 321 | IsValidCodePage( |
| 322 | IN UINT CodePage) |
| 323 | { |
| 324 | BOOL retval = FALSE; |
| 325 | |
| 326 | PERF_ENTRY(IsValidCodePage); |
| 327 | ENTRY("IsValidCodePage(%d)\n" , CodePage ); |
| 328 | |
| 329 | switch(CodePage) |
| 330 | { |
| 331 | case CP_ACP : /* fall through */ |
| 332 | case CP_OEMCP : /* fall through */ |
| 333 | case CP_MACCP : /* fall through */ |
| 334 | case CP_THREAD_ACP: |
| 335 | /* 'pseudo code pages' : not valid */ |
| 336 | retval = FALSE; |
| 337 | break; |
| 338 | case CP_UTF7: |
| 339 | /* valid in Win32, but not supported in the PAL */ |
| 340 | retval = FALSE; |
| 341 | break; |
| 342 | case CP_UTF8: |
| 343 | /* valid, but not part of CODEPAGEGetData's tables */ |
| 344 | retval = TRUE; |
| 345 | break; |
| 346 | default: |
| 347 | retval = (NULL != CODEPAGEGetData( CodePage )); |
| 348 | break; |
| 349 | } |
| 350 | |
| 351 | LOGEXIT("IsValidCodePage returns BOOL %d\n" ,retval); |
| 352 | PERF_EXIT(IsValidCodePage); |
| 353 | return retval; |
| 354 | } |
| 355 | |
| 356 | /*++ |
| 357 | Function: |
| 358 | GetCPInfo |
| 359 | |
| 360 | See MSDN doc. |
| 361 | --*/ |
| 362 | BOOL |
| 363 | PALAPI |
| 364 | GetCPInfo( |
| 365 | IN UINT CodePage, |
| 366 | OUT LPCPINFO lpCPInfo) |
| 367 | { |
| 368 | const CP_MAPPING * lpStruct = NULL; |
| 369 | BOOL bRet = FALSE; |
| 370 | |
| 371 | PERF_ENTRY(GetCPInfo); |
| 372 | ENTRY("GetCPInfo(CodePage=%hu, lpCPInfo=%p)\n" , CodePage, lpCPInfo); |
| 373 | |
| 374 | /*check if the input code page is valid*/ |
| 375 | if( CP_ACP != CodePage && !IsValidCodePage( CodePage ) ) |
| 376 | { |
| 377 | /* error, invalid argument */ |
| 378 | ERROR("CodePage(%d) parameter is invalid\n" ,CodePage); |
| 379 | SetLastError( ERROR_INVALID_PARAMETER ); |
| 380 | goto done; |
| 381 | } |
| 382 | |
| 383 | /*check if the lpCPInfo parameter is valid. */ |
| 384 | if( !lpCPInfo ) |
| 385 | { |
| 386 | /* error, invalid argument */ |
| 387 | ERROR("lpCPInfo cannot be NULL\n" ); |
| 388 | SetLastError( ERROR_INVALID_PARAMETER ); |
| 389 | goto done; |
| 390 | } |
| 391 | |
| 392 | if ( NULL != ( lpStruct = CODEPAGEGetData( CodePage ) ) ) |
| 393 | { |
| 394 | lpCPInfo->MaxCharSize = lpStruct->nMaxByteSize;; |
| 395 | memcpy( lpCPInfo->LeadByte, lpStruct->LeadByte , MAX_LEADBYTES ); |
| 396 | |
| 397 | /* Don't need to be set, according to the spec. */ |
| 398 | memset( lpCPInfo->DefaultChar, '?', MAX_DEFAULTCHAR ); |
| 399 | |
| 400 | bRet = TRUE; |
| 401 | } |
| 402 | |
| 403 | done: |
| 404 | LOGEXIT("GetCPInfo returns BOOL %d \n" ,bRet); |
| 405 | PERF_EXIT(GetCPInfo); |
| 406 | return bRet; |
| 407 | } |
| 408 | |
| 409 | |
| 410 | /*++ |
| 411 | Function: |
| 412 | GetACP |
| 413 | |
| 414 | See MSDN doc. |
| 415 | --*/ |
| 416 | UINT |
| 417 | PALAPI |
| 418 | GetACP(VOID) |
| 419 | { |
| 420 | PERF_ENTRY(GetACP); |
| 421 | ENTRY("GetACP(VOID)\n" ); |
| 422 | |
| 423 | LOGEXIT("GetACP returning UINT %d\n" , PAL_ACP ); |
| 424 | PERF_EXIT(GetACP); |
| 425 | |
| 426 | return PAL_ACP; |
| 427 | } |
| 428 | |
| 429 | |
| 430 | /*++ |
| 431 | Function: |
| 432 | IsDBCSLeadByteEx |
| 433 | |
| 434 | See MSDN doc. |
| 435 | --*/ |
| 436 | BOOL |
| 437 | PALAPI |
| 438 | IsDBCSLeadByteEx( |
| 439 | IN UINT CodePage, |
| 440 | IN BYTE TestChar) |
| 441 | { |
| 442 | CPINFO cpinfo; |
| 443 | SIZE_T i; |
| 444 | BOOL bRet = FALSE; |
| 445 | |
| 446 | PERF_ENTRY(IsDBCSLeadByteEx); |
| 447 | ENTRY("IsDBCSLeadByteEx(CodePage=%#x, TestChar=%d)\n" , CodePage, TestChar); |
| 448 | |
| 449 | /* Get the lead byte info with respect to the given codepage*/ |
| 450 | if( !GetCPInfo( CodePage, &cpinfo ) ) |
| 451 | { |
| 452 | ERROR("Error CodePage(%#x) parameter is invalid\n" , CodePage ); |
| 453 | SetLastError( ERROR_INVALID_PARAMETER ); |
| 454 | goto done; |
| 455 | } |
| 456 | |
| 457 | for( i=0; i < sizeof(cpinfo.LeadByte)/sizeof(cpinfo.LeadByte[0]); i += 2 ) |
| 458 | { |
| 459 | if( 0 == cpinfo.LeadByte[ i ] ) |
| 460 | { |
| 461 | goto done; |
| 462 | } |
| 463 | |
| 464 | /*check if the given char is in one of the lead byte ranges*/ |
| 465 | if( cpinfo.LeadByte[i] <= TestChar && TestChar<= cpinfo.LeadByte[i+1] ) |
| 466 | { |
| 467 | bRet = TRUE; |
| 468 | goto done; |
| 469 | } |
| 470 | } |
| 471 | done: |
| 472 | LOGEXIT("IsDBCSLeadByteEx returns BOOL %d\n" ,bRet); |
| 473 | PERF_EXIT(IsDBCSLeadByteEx); |
| 474 | return bRet; |
| 475 | } |
| 476 | |
| 477 | /*++ |
| 478 | Function: |
| 479 | IsDBCSLeadByte |
| 480 | |
| 481 | See MSDN doc. |
| 482 | --*/ |
| 483 | BOOL |
| 484 | PALAPI |
| 485 | IsDBCSLeadByte( |
| 486 | IN BYTE TestChar) |
| 487 | { |
| 488 | // UNIXTODO: Implement this! |
| 489 | ERROR("Needs Implementation!!!" ); |
| 490 | return FALSE; |
| 491 | } |
| 492 | |
| 493 | /*++ |
| 494 | Function: |
| 495 | MultiByteToWideChar |
| 496 | |
| 497 | See MSDN doc. |
| 498 | |
| 499 | --*/ |
| 500 | int |
| 501 | PALAPI |
| 502 | MultiByteToWideChar( |
| 503 | IN UINT CodePage, |
| 504 | IN DWORD dwFlags, |
| 505 | IN LPCSTR lpMultiByteStr, |
| 506 | IN int cbMultiByte, |
| 507 | OUT LPWSTR lpWideCharStr, |
| 508 | IN int cchWideChar) |
| 509 | { |
| 510 | INT retval =0; |
| 511 | #if HAVE_COREFOUNDATION |
| 512 | CFStringRef cfString = NULL; |
| 513 | CFStringEncoding cfEncoding; |
| 514 | int bytesToConvert; |
| 515 | #endif /* HAVE_COREFOUNDATION */ |
| 516 | |
| 517 | PERF_ENTRY(MultiByteToWideChar); |
| 518 | ENTRY("MultiByteToWideChar(CodePage=%u, dwFlags=%#x, lpMultiByteStr=%p (%s)," |
| 519 | " cbMultiByte=%d, lpWideCharStr=%p, cchWideChar=%d)\n" , |
| 520 | CodePage, dwFlags, lpMultiByteStr?lpMultiByteStr:"NULL" , lpMultiByteStr?lpMultiByteStr:"NULL" , |
| 521 | cbMultiByte, lpWideCharStr, cchWideChar); |
| 522 | |
| 523 | if (dwFlags & ~(MB_ERR_INVALID_CHARS | MB_PRECOMPOSED)) |
| 524 | { |
| 525 | ASSERT("Error dwFlags(0x%x) parameter is invalid\n" , dwFlags); |
| 526 | SetLastError(ERROR_INVALID_FLAGS); |
| 527 | goto EXIT; |
| 528 | } |
| 529 | |
| 530 | if ( (cbMultiByte == 0) || (cchWideChar < 0) || |
| 531 | (lpMultiByteStr == NULL) || |
| 532 | ((cchWideChar != 0) && |
| 533 | ((lpWideCharStr == NULL) || |
| 534 | (lpMultiByteStr == (LPSTR)lpWideCharStr))) ) |
| 535 | { |
| 536 | ERROR("Error lpMultiByteStr parameters are invalid\n" ); |
| 537 | SetLastError(ERROR_INVALID_PARAMETER); |
| 538 | goto EXIT; |
| 539 | } |
| 540 | |
| 541 | // Use UTF8ToUnicode on all systems, since it replaces |
| 542 | // invalid characters and Core Foundation doesn't do that. |
| 543 | if (CodePage == CP_UTF8 || (CodePage == CP_ACP && GetACP() == CP_UTF8)) |
| 544 | { |
| 545 | if (cbMultiByte <= -1) |
| 546 | { |
| 547 | cbMultiByte = strlen(lpMultiByteStr) + 1; |
| 548 | } |
| 549 | |
| 550 | retval = UTF8ToUnicode(lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar, dwFlags); |
| 551 | goto EXIT; |
| 552 | } |
| 553 | |
| 554 | #if !HAVE_COREFOUNDATION |
| 555 | ERROR( "This code page is not in the system.\n" ); |
| 556 | SetLastError( ERROR_INVALID_PARAMETER ); |
| 557 | goto EXIT; |
| 558 | #else /* !HAVE_COREFOUNDATION */ |
| 559 | bytesToConvert = cbMultiByte; |
| 560 | if (bytesToConvert == -1) |
| 561 | { |
| 562 | /* Plus one for the trailing '\0', which will end up |
| 563 | * in the CFString. */ |
| 564 | bytesToConvert = strlen(lpMultiByteStr) + 1; |
| 565 | } |
| 566 | |
| 567 | cfEncoding = CODEPAGECPToCFStringEncoding(CodePage); |
| 568 | if (cfEncoding == kCFStringEncodingInvalidId) |
| 569 | { |
| 570 | ERROR( "This code page is not in the system.\n" ); |
| 571 | SetLastError( ERROR_INVALID_PARAMETER ); |
| 572 | goto EXIT; |
| 573 | } |
| 574 | |
| 575 | cfString = CFStringCreateWithBytes(kCFAllocatorDefault, (UInt8*)lpMultiByteStr, |
| 576 | bytesToConvert, cfEncoding, TRUE); |
| 577 | if (cfString == NULL) |
| 578 | { |
| 579 | ERROR( "Failed to convert the string to the specified encoding.\n" ); |
| 580 | SetLastError( ERROR_NO_UNICODE_TRANSLATION ); |
| 581 | goto EXIT; |
| 582 | } |
| 583 | |
| 584 | if (cchWideChar != 0) |
| 585 | { |
| 586 | /* Do the conversion. */ |
| 587 | CFIndex length = CFStringGetLength(cfString); |
| 588 | if (length > cchWideChar) |
| 589 | { |
| 590 | ERROR("Error insufficient buffer\n" ); |
| 591 | SetLastError(ERROR_INSUFFICIENT_BUFFER); |
| 592 | retval = 0; |
| 593 | goto ReleaseString; |
| 594 | } |
| 595 | CFStringGetCharacters(cfString, CFRangeMake(0, length), |
| 596 | (UniChar*)lpWideCharStr); |
| 597 | retval = length; |
| 598 | } |
| 599 | else |
| 600 | { |
| 601 | /* Just return the number of wide characters needed. */ |
| 602 | retval = CFStringGetLength(cfString); |
| 603 | } |
| 604 | |
| 605 | ReleaseString: |
| 606 | if (cfString != NULL) |
| 607 | { |
| 608 | CFRelease(cfString); |
| 609 | } |
| 610 | #endif /* !HAVE_COREFOUNDATION */ |
| 611 | |
| 612 | EXIT: |
| 613 | |
| 614 | LOGEXIT("MultiByteToWideChar returns %d.\n" ,retval); |
| 615 | PERF_EXIT(MultiByteToWideChar); |
| 616 | return retval; |
| 617 | } |
| 618 | |
| 619 | |
| 620 | /*++ |
| 621 | Function: |
| 622 | WideCharToMultiByte |
| 623 | |
| 624 | See MSDN doc. |
| 625 | |
| 626 | --*/ |
| 627 | int |
| 628 | PALAPI |
| 629 | WideCharToMultiByte( |
| 630 | IN UINT CodePage, |
| 631 | IN DWORD dwFlags, |
| 632 | IN LPCWSTR lpWideCharStr, |
| 633 | IN int cchWideChar, |
| 634 | OUT LPSTR lpMultiByteStr, |
| 635 | IN int cbMultiByte, |
| 636 | IN LPCSTR lpDefaultChar, |
| 637 | OUT LPBOOL lpUsedDefaultChar) |
| 638 | { |
| 639 | INT retval =0; |
| 640 | char defaultChar = '?'; |
| 641 | BOOL usedDefaultChar = FALSE; |
| 642 | #if HAVE_COREFOUNDATION |
| 643 | CFStringRef cfString = NULL; |
| 644 | CFStringEncoding cfEncoding; |
| 645 | int charsToConvert; |
| 646 | CFIndex charsConverted; |
| 647 | CFIndex bytesConverted; |
| 648 | #endif /* !HAVE_COREFOUNDATION */ |
| 649 | |
| 650 | PERF_ENTRY(WideCharToMultiByte); |
| 651 | ENTRY("WideCharToMultiByte(CodePage=%u, dwFlags=%#x, lpWideCharStr=%p (%S), " |
| 652 | "cchWideChar=%d, lpMultiByteStr=%p, cbMultiByte=%d, " |
| 653 | "lpDefaultChar=%p, lpUsedDefaultChar=%p)\n" , |
| 654 | CodePage, dwFlags, lpWideCharStr?lpWideCharStr:W16_NULLSTRING, lpWideCharStr?lpWideCharStr:W16_NULLSTRING, |
| 655 | cchWideChar, lpMultiByteStr, cbMultiByte, |
| 656 | lpDefaultChar, lpUsedDefaultChar); |
| 657 | |
| 658 | if (dwFlags & ~WC_NO_BEST_FIT_CHARS) |
| 659 | { |
| 660 | ERROR("dwFlags %d invalid\n" , dwFlags); |
| 661 | SetLastError(ERROR_INVALID_FLAGS); |
| 662 | goto EXIT; |
| 663 | } |
| 664 | |
| 665 | // No special action is needed for WC_NO_BEST_FIT_CHARS. The default |
| 666 | // behavior of this API on Unix is not to find the best fit for a unicode |
| 667 | // character that does not map directly into a code point in the given |
| 668 | // code page. The best fit functionality is not available in wctomb on Unix |
| 669 | // and is better left unimplemented for security reasons anyway. |
| 670 | |
| 671 | if ((cchWideChar < -1) || (cbMultiByte < 0) || |
| 672 | (lpWideCharStr == NULL) || |
| 673 | ((cbMultiByte != 0) && |
| 674 | ((lpMultiByteStr == NULL) || |
| 675 | (lpWideCharStr == (LPWSTR)lpMultiByteStr))) ) |
| 676 | { |
| 677 | ERROR("Error lpWideCharStr parameters are invalid\n" ); |
| 678 | SetLastError(ERROR_INVALID_PARAMETER); |
| 679 | goto EXIT; |
| 680 | } |
| 681 | |
| 682 | if (lpDefaultChar != NULL) |
| 683 | { |
| 684 | defaultChar = *lpDefaultChar; |
| 685 | } |
| 686 | |
| 687 | // Use UnicodeToUTF8 on all systems because we use |
| 688 | // UTF8ToUnicode in MultiByteToWideChar() on all systems. |
| 689 | if (CodePage == CP_UTF8 || (CodePage == CP_ACP && GetACP() == CP_UTF8)) |
| 690 | { |
| 691 | if (cchWideChar == -1) |
| 692 | { |
| 693 | cchWideChar = PAL_wcslen(lpWideCharStr) + 1; |
| 694 | } |
| 695 | retval = UnicodeToUTF8(lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte); |
| 696 | goto EXIT; |
| 697 | } |
| 698 | |
| 699 | #if HAVE_COREFOUNDATION |
| 700 | charsToConvert = cchWideChar; |
| 701 | if (charsToConvert == -1) |
| 702 | { |
| 703 | LPCWSTR ptr = lpWideCharStr; |
| 704 | |
| 705 | charsToConvert = 0; |
| 706 | while(*ptr++ != 0) |
| 707 | { |
| 708 | charsToConvert++; |
| 709 | } |
| 710 | charsToConvert++; /* For the terminating '\0' */ |
| 711 | } |
| 712 | |
| 713 | cfEncoding = CODEPAGECPToCFStringEncoding(CodePage); |
| 714 | if (cfEncoding == kCFStringEncodingInvalidId) |
| 715 | { |
| 716 | ERROR( "This code page is not in the system.\n" ); |
| 717 | SetLastError(ERROR_INVALID_PARAMETER); |
| 718 | goto EXIT; |
| 719 | } |
| 720 | |
| 721 | cfString = CFStringCreateWithCharacters(kCFAllocatorDefault, |
| 722 | (const UniChar*)lpWideCharStr, charsToConvert); |
| 723 | if (cfString == NULL) |
| 724 | { |
| 725 | ERROR("CFString creation failed.\n" ); |
| 726 | SetLastError(ERROR_INVALID_PARAMETER); |
| 727 | goto EXIT; |
| 728 | } |
| 729 | |
| 730 | if (cbMultiByte == 0) |
| 731 | { |
| 732 | lpMultiByteStr = NULL; |
| 733 | } |
| 734 | charsConverted = CFStringGetBytes(cfString, |
| 735 | CFRangeMake(0, charsToConvert), |
| 736 | cfEncoding, '?', TRUE, (UInt8*)lpMultiByteStr, |
| 737 | cbMultiByte, &bytesConverted); |
| 738 | if (charsConverted != charsToConvert) |
| 739 | { |
| 740 | if (lpMultiByteStr != NULL) |
| 741 | { |
| 742 | // CFStringGetBytes can fail due to an insufficient buffer or for |
| 743 | // other reasons. We need to check if we're out of buffer space. |
| 744 | charsConverted = CFStringGetBytes(cfString, |
| 745 | CFRangeMake(0, charsToConvert), |
| 746 | cfEncoding, '?', TRUE, NULL, |
| 747 | 0, &bytesConverted); |
| 748 | if (cbMultiByte < bytesConverted) |
| 749 | { |
| 750 | ERROR("Insufficient buffer for CFStringGetBytes.\n" ); |
| 751 | SetLastError(ERROR_INSUFFICIENT_BUFFER); |
| 752 | goto ReleaseString; |
| 753 | } |
| 754 | } |
| 755 | ERROR("Not all characters were converted.\n" ); |
| 756 | SetLastError(ERROR_INVALID_PARAMETER); |
| 757 | goto ReleaseString; |
| 758 | } |
| 759 | retval = bytesConverted; |
| 760 | |
| 761 | ReleaseString: |
| 762 | if (cfString != NULL) |
| 763 | { |
| 764 | CFRelease(cfString); |
| 765 | } |
| 766 | #else /*HAVE_COREFOUNDATION */ |
| 767 | ERROR( "This code page is not in the system.\n" ); |
| 768 | SetLastError( ERROR_INVALID_PARAMETER ); |
| 769 | goto EXIT; |
| 770 | #endif /* HAVE_COREFOUNDATION */ |
| 771 | |
| 772 | EXIT: |
| 773 | |
| 774 | if ( lpUsedDefaultChar != NULL ) |
| 775 | { |
| 776 | *lpUsedDefaultChar = usedDefaultChar; |
| 777 | } |
| 778 | |
| 779 | /* Flag the cases when WC_NO_BEST_FIT_CHARS was not specified |
| 780 | * but we found characters that had to be replaced with default |
| 781 | * characters. Note that Windows would have attempted to find |
| 782 | * best fit characters under these conditions and that could pose |
| 783 | * a security risk. |
| 784 | */ |
| 785 | _ASSERT_MSG((dwFlags & WC_NO_BEST_FIT_CHARS) || !usedDefaultChar, |
| 786 | "WideCharToMultiByte found a string which doesn't round trip: (%p)%S " |
| 787 | "and WC_NO_BEST_FIT_CHARS was not specified\n" , |
| 788 | lpWideCharStr, lpWideCharStr); |
| 789 | |
| 790 | LOGEXIT("WideCharToMultiByte returns INT %d\n" , retval); |
| 791 | PERF_EXIT(WideCharToMultiByte); |
| 792 | return retval; |
| 793 | } |
| 794 | |
| 795 | extern char * g_szCoreCLRPath; |
| 796 | |
| 797 | /*++ |
| 798 | Function : |
| 799 | |
| 800 | PAL_BindResources - bind the resource domain to the path where the coreclr resides |
| 801 | |
| 802 | Returns TRUE if it succeeded, FALSE if it failed due to OOM |
| 803 | --*/ |
| 804 | BOOL |
| 805 | PALAPI |
| 806 | PAL_BindResources(IN LPCSTR lpDomain) |
| 807 | { |
| 808 | #if HAVE_LIBINTL_H |
| 809 | _ASSERTE(g_szCoreCLRPath != NULL); |
| 810 | char * coreCLRDirectoryPath; |
| 811 | PathCharString coreCLRDirectoryPathPS; |
| 812 | int len = strlen(g_szCoreCLRPath); |
| 813 | coreCLRDirectoryPath = coreCLRDirectoryPathPS.OpenStringBuffer(len); |
| 814 | if (NULL == coreCLRDirectoryPath) |
| 815 | { |
| 816 | return FALSE; |
| 817 | } |
| 818 | DWORD size = FILEGetDirectoryFromFullPathA(g_szCoreCLRPath, len, coreCLRDirectoryPath); |
| 819 | coreCLRDirectoryPathPS.CloseBuffer(size); |
| 820 | |
| 821 | LPCSTR boundPath = bindtextdomain(lpDomain, coreCLRDirectoryPath); |
| 822 | |
| 823 | return boundPath != NULL; |
| 824 | #else // HAVE_LIBINTL_H |
| 825 | // UNIXTODO: Implement for Unixes without libintl if necessary |
| 826 | return TRUE; |
| 827 | #endif // HAVE_LIBINTL_H |
| 828 | } |
| 829 | |
| 830 | /*++ |
| 831 | Function : |
| 832 | |
| 833 | PAL_GetResourceString - get localized string for a specified resource. |
| 834 | The string that is passed in should be the English string, since it |
| 835 | will be returned if an appropriately localized version is not found. |
| 836 | |
| 837 | Returns number of characters retrieved, 0 if it failed. |
| 838 | --*/ |
| 839 | int |
| 840 | PALAPI |
| 841 | PAL_GetResourceString( |
| 842 | IN LPCSTR lpDomain, |
| 843 | IN LPCSTR lpResourceStr, |
| 844 | OUT LPWSTR lpWideCharStr, |
| 845 | IN int cchWideChar |
| 846 | ) |
| 847 | { |
| 848 | #if HAVE_LIBINTL_H |
| 849 | // NOTE: dgettext returns the key if it fails to locate the appropriate |
| 850 | // resource. In our case, that will be the English string. |
| 851 | LPCSTR resourceString = dgettext(lpDomain, lpResourceStr); |
| 852 | #else // HAVE_LIBINTL_H |
| 853 | // UNIXTODO: Implement for OSX using the native localization API |
| 854 | |
| 855 | // This is a temporary solution until we add the real native resource support. |
| 856 | LPCSTR resourceString = lpResourceStr; |
| 857 | #endif // HAVE_LIBINTL_H |
| 858 | |
| 859 | int length = strlen(resourceString); |
| 860 | return UTF8ToUnicode(lpResourceStr, length + 1, lpWideCharStr, cchWideChar, 0); |
| 861 | } |
| 862 | |