1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*++ |
6 | |
7 | |
8 | |
9 | Module Name: |
10 | |
11 | unicode.cpp |
12 | |
13 | Abstract: |
14 | |
15 | Implementation of all functions related to Unicode support |
16 | |
17 | Revision History: |
18 | |
19 | |
20 | |
21 | --*/ |
22 | |
23 | #include "pal/thread.hpp" |
24 | |
25 | #include "pal/palinternal.h" |
26 | #include "pal/unicode_data.h" |
27 | #include "pal/dbgmsg.h" |
28 | #include "pal/file.h" |
29 | #include "pal/utf8.h" |
30 | #include "pal/locale.h" |
31 | #include "pal/cruntime.h" |
32 | #include "pal/stackstring.hpp" |
33 | |
34 | #if !(HAVE_PTHREAD_RWLOCK_T || HAVE_COREFOUNDATION) |
35 | #error Either pthread rwlocks or Core Foundation are required for Unicode support |
36 | #endif /* !(HAVE_PTHREAD_RWLOCK_T || HAVE_COREFOUNDATION) */ |
37 | |
38 | #include <pthread.h> |
39 | #include <locale.h> |
40 | #if HAVE_LIBINTL_H |
41 | #include <libintl.h> |
42 | #endif // HAVE_LIBINTL_H |
43 | #include <errno.h> |
44 | #if HAVE_COREFOUNDATION |
45 | #include <CoreFoundation/CoreFoundation.h> |
46 | #endif // HAVE_COREFOUNDATION |
47 | |
48 | #include <debugmacrosext.h> |
49 | |
50 | using namespace CorUnix; |
51 | |
52 | SET_DEFAULT_DEBUG_CHANNEL(UNICODE); |
53 | |
54 | #if HAVE_COREFOUNDATION |
55 | |
56 | static CP_MAPPING CP_TO_NATIVE_TABLE[] = { |
57 | { 65001, kCFStringEncodingUTF8, 4, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
58 | { 1252, kCFStringEncodingWindowsLatin1, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
59 | { 1251, kCFStringEncodingWindowsCyrillic, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
60 | { 1253, kCFStringEncodingWindowsGreek, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
61 | { 1254, kCFStringEncodingWindowsLatin5, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
62 | { 1258, kCFStringEncodingWindowsVietnamese, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
63 | { 932, kCFStringEncodingDOSJapanese, 2, { 129, 159, 224, 252, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
64 | { 949, kCFStringEncodingDOSKorean, 2, { 129, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
65 | { 950, kCFStringEncodingDOSChineseTrad, 2, { 129, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } |
66 | }; |
67 | |
68 | #else // HAVE_COREFOUNDATION |
69 | |
70 | static const CP_MAPPING CP_TO_NATIVE_TABLE[] = { |
71 | { 65001, "utf8" , 4, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } |
72 | }; |
73 | |
74 | #endif // HAVE_COREFOUNDATION |
75 | |
76 | // We hardcode the system's default codepage to be UTF-8. |
77 | // There are several reasons for this: |
78 | // - On OSX, HFS+ file names are encoded as UTF-8. |
79 | // - On OSX, When writing strings to the console, the Terminal.app will interpret them as UTF-8. |
80 | // - We want Ansi marshalling to mean marshal to UTF-8 on Mac and Linux |
81 | static const UINT PAL_ACP = 65001; |
82 | |
83 | #if !HAVE_COREFOUNDATION |
84 | /*++ |
85 | Function: |
86 | UnicodeDataComp |
87 | This is the comparison function used by the bsearch function to search |
88 | for unicode characters in the UnicodeData array. |
89 | |
90 | Parameter: |
91 | pnKey |
92 | The unicode character value to search for. |
93 | elem |
94 | A pointer to a UnicodeDataRec. |
95 | |
96 | Return value: |
97 | <0 if pnKey < elem->nUnicodeValue |
98 | 0 if pnKey == elem->nUnicodeValue |
99 | >0 if pnKey > elem->nUnicodeValue |
100 | --*/ |
101 | static int UnicodeDataComp(const void *pnKey, const void *elem) |
102 | { |
103 | WCHAR uValue = ((UnicodeDataRec*)elem)->nUnicodeValue; |
104 | WORD rangeValue = ((UnicodeDataRec*)elem)->rangeValue; |
105 | |
106 | if (*((INT*)pnKey) < uValue) |
107 | { |
108 | return -1; |
109 | } |
110 | else |
111 | { |
112 | if (*((INT*)pnKey) > (uValue + rangeValue)) |
113 | { |
114 | return 1; |
115 | } |
116 | else |
117 | { |
118 | return 0; |
119 | } |
120 | } |
121 | } |
122 | |
123 | /*++ |
124 | Function: |
125 | GetUnicodeData |
126 | This function is used to get information about a Unicode character. |
127 | |
128 | Parameters: |
129 | nUnicodeValue |
130 | The numeric value of the Unicode character to get information about. |
131 | pDataRec |
132 | The UnicodeDataRec to fill in with the data for the Unicode character. |
133 | |
134 | Return value: |
135 | TRUE if the Unicode character was found. |
136 | |
137 | --*/ |
138 | BOOL GetUnicodeData(INT nUnicodeValue, UnicodeDataRec *pDataRec) |
139 | { |
140 | BOOL bRet; |
141 | if (nUnicodeValue <= UNICODE_DATA_DIRECT_ACCESS) |
142 | { |
143 | *pDataRec = UnicodeData[nUnicodeValue]; |
144 | bRet = TRUE; |
145 | } |
146 | else |
147 | { |
148 | UnicodeDataRec *dataRec; |
149 | INT nNumOfChars = UNICODE_DATA_SIZE; |
150 | dataRec = (UnicodeDataRec *) bsearch(&nUnicodeValue, UnicodeData, nNumOfChars, |
151 | sizeof(UnicodeDataRec), UnicodeDataComp); |
152 | if (dataRec == NULL) |
153 | { |
154 | bRet = FALSE; |
155 | } |
156 | else |
157 | { |
158 | bRet = TRUE; |
159 | *pDataRec = *dataRec; |
160 | } |
161 | } |
162 | return bRet; |
163 | } |
164 | #endif /* !HAVE_COREFOUNDATION */ |
165 | |
166 | /*++ |
167 | Function: |
168 | CODEPAGEGetData |
169 | |
170 | IN UINT CodePage - The code page the caller |
171 | is attempting to retrieve data on. |
172 | |
173 | Returns a pointer to structure, NULL otherwise. |
174 | --*/ |
175 | const CP_MAPPING * |
176 | CODEPAGEGetData( IN UINT CodePage ) |
177 | { |
178 | UINT nSize = sizeof( CP_TO_NATIVE_TABLE ) / sizeof( CP_TO_NATIVE_TABLE[ 0 ] ); |
179 | UINT nIndex = 0; |
180 | |
181 | if ( CP_ACP == CodePage ) |
182 | { |
183 | CodePage = PAL_ACP; |
184 | } |
185 | |
186 | /* checking if the CodePage is ACP and returning true if so */ |
187 | while (nIndex < nSize) |
188 | { |
189 | if ( ( CP_TO_NATIVE_TABLE[ nIndex ] ).nCodePage == CodePage ) |
190 | { |
191 | return &(CP_TO_NATIVE_TABLE[ nIndex ]); |
192 | } |
193 | nIndex++; |
194 | } |
195 | return NULL; |
196 | } |
197 | |
198 | #if HAVE_COREFOUNDATION |
199 | /*++ |
200 | Function : |
201 | |
202 | CODEPAGECPToCFStringEncoding - Gets the CFStringEncoding for |
203 | the given codepage. |
204 | |
205 | Returns the CFStringEncoding for the given codepage. |
206 | --*/ |
207 | CFStringEncoding CODEPAGECPToCFStringEncoding(UINT codepage) |
208 | { |
209 | const CP_MAPPING *cp_mapping = CODEPAGEGetData(codepage); |
210 | if (cp_mapping == NULL) |
211 | { |
212 | return kCFStringEncodingInvalidId; |
213 | } |
214 | else |
215 | { |
216 | return cp_mapping->nCFEncoding; |
217 | } |
218 | } |
219 | #endif // HAVE_COREFOUNDATION |
220 | |
221 | /*++ |
222 | Function: |
223 | CharNextA |
224 | |
225 | Parameters |
226 | |
227 | lpsz |
228 | [in] Pointer to a character in a null-terminated string. |
229 | |
230 | Return Values |
231 | |
232 | A pointer to the next character in the string, or to the terminating null character if at the end of the string, indicates success. |
233 | |
234 | If lpsz points to the terminating null character, the return value is equal to lpsz. |
235 | |
236 | See MSDN doc. |
237 | --*/ |
238 | LPSTR |
239 | PALAPI |
240 | CharNextA( |
241 | IN LPCSTR lpsz) |
242 | { |
243 | LPSTR pRet; |
244 | PERF_ENTRY(CharNextA); |
245 | ENTRY("CharNextA (lpsz=%p (%s))\n" , lpsz?lpsz:NULL, lpsz?lpsz:NULL); |
246 | |
247 | pRet = CharNextExA(GetACP(), lpsz, 0); |
248 | |
249 | LOGEXIT ("CharNextA returns LPSTR %p\n" , pRet); |
250 | PERF_EXIT(CharNextA); |
251 | return pRet; |
252 | } |
253 | |
254 | |
255 | /*++ |
256 | Function: |
257 | CharNextExA |
258 | |
259 | See MSDN doc. |
260 | --*/ |
261 | LPSTR |
262 | PALAPI |
263 | CharNextExA( |
264 | IN WORD CodePage, |
265 | IN LPCSTR lpCurrentChar, |
266 | IN DWORD dwFlags) |
267 | { |
268 | LPSTR pRet = (LPSTR) lpCurrentChar; |
269 | |
270 | PERF_ENTRY(CharNextExA); |
271 | ENTRY("CharNextExA (CodePage=%hu, lpCurrentChar=%p (%s), dwFlags=%#x)\n" , |
272 | CodePage, lpCurrentChar?lpCurrentChar:"NULL" , lpCurrentChar?lpCurrentChar:"NULL" , dwFlags); |
273 | |
274 | if ((lpCurrentChar != NULL) && (*lpCurrentChar != 0)) |
275 | { |
276 | pRet += (*(lpCurrentChar+1) != 0) && |
277 | IsDBCSLeadByteEx(CodePage, *lpCurrentChar) ? 2 : 1; |
278 | } |
279 | |
280 | LOGEXIT("CharNextExA returns LPSTR:%p (%s)\n" , pRet, pRet); |
281 | PERF_EXIT(CharNextExA); |
282 | return pRet; |
283 | } |
284 | |
285 | |
286 | /*++ |
287 | Function: |
288 | GetConsoleOutputCP |
289 | |
290 | See MSDN doc. |
291 | --*/ |
292 | UINT |
293 | PALAPI |
294 | GetConsoleOutputCP( |
295 | VOID) |
296 | { |
297 | UINT nRet = 0; |
298 | PERF_ENTRY(GetConsoleOutputCP); |
299 | ENTRY("GetConsoleOutputCP()\n" ); |
300 | nRet = GetACP(); |
301 | LOGEXIT("GetConsoleOutputCP returns UINT %d \n" , nRet ); |
302 | PERF_EXIT(GetConsoleOutputCP); |
303 | return nRet; |
304 | } |
305 | |
306 | |
307 | /*++ |
308 | Function: |
309 | IsValidCodePage |
310 | |
311 | See MSDN doc. |
312 | |
313 | Notes : |
314 | "pseudo code pages", like CP_ACP, aren't considered 'valid' in this context. |
315 | CP_UTF7 and CP_UTF8, however, *are* considered valid code pages, even though |
316 | MSDN fails to mention them in the IsValidCodePage entry. |
317 | Note : CP_UTF7 support isn't required for CoreCLR |
318 | --*/ |
319 | BOOL |
320 | PALAPI |
321 | IsValidCodePage( |
322 | IN UINT CodePage) |
323 | { |
324 | BOOL retval = FALSE; |
325 | |
326 | PERF_ENTRY(IsValidCodePage); |
327 | ENTRY("IsValidCodePage(%d)\n" , CodePage ); |
328 | |
329 | switch(CodePage) |
330 | { |
331 | case CP_ACP : /* fall through */ |
332 | case CP_OEMCP : /* fall through */ |
333 | case CP_MACCP : /* fall through */ |
334 | case CP_THREAD_ACP: |
335 | /* 'pseudo code pages' : not valid */ |
336 | retval = FALSE; |
337 | break; |
338 | case CP_UTF7: |
339 | /* valid in Win32, but not supported in the PAL */ |
340 | retval = FALSE; |
341 | break; |
342 | case CP_UTF8: |
343 | /* valid, but not part of CODEPAGEGetData's tables */ |
344 | retval = TRUE; |
345 | break; |
346 | default: |
347 | retval = (NULL != CODEPAGEGetData( CodePage )); |
348 | break; |
349 | } |
350 | |
351 | LOGEXIT("IsValidCodePage returns BOOL %d\n" ,retval); |
352 | PERF_EXIT(IsValidCodePage); |
353 | return retval; |
354 | } |
355 | |
356 | /*++ |
357 | Function: |
358 | GetCPInfo |
359 | |
360 | See MSDN doc. |
361 | --*/ |
362 | BOOL |
363 | PALAPI |
364 | GetCPInfo( |
365 | IN UINT CodePage, |
366 | OUT LPCPINFO lpCPInfo) |
367 | { |
368 | const CP_MAPPING * lpStruct = NULL; |
369 | BOOL bRet = FALSE; |
370 | |
371 | PERF_ENTRY(GetCPInfo); |
372 | ENTRY("GetCPInfo(CodePage=%hu, lpCPInfo=%p)\n" , CodePage, lpCPInfo); |
373 | |
374 | /*check if the input code page is valid*/ |
375 | if( CP_ACP != CodePage && !IsValidCodePage( CodePage ) ) |
376 | { |
377 | /* error, invalid argument */ |
378 | ERROR("CodePage(%d) parameter is invalid\n" ,CodePage); |
379 | SetLastError( ERROR_INVALID_PARAMETER ); |
380 | goto done; |
381 | } |
382 | |
383 | /*check if the lpCPInfo parameter is valid. */ |
384 | if( !lpCPInfo ) |
385 | { |
386 | /* error, invalid argument */ |
387 | ERROR("lpCPInfo cannot be NULL\n" ); |
388 | SetLastError( ERROR_INVALID_PARAMETER ); |
389 | goto done; |
390 | } |
391 | |
392 | if ( NULL != ( lpStruct = CODEPAGEGetData( CodePage ) ) ) |
393 | { |
394 | lpCPInfo->MaxCharSize = lpStruct->nMaxByteSize;; |
395 | memcpy( lpCPInfo->LeadByte, lpStruct->LeadByte , MAX_LEADBYTES ); |
396 | |
397 | /* Don't need to be set, according to the spec. */ |
398 | memset( lpCPInfo->DefaultChar, '?', MAX_DEFAULTCHAR ); |
399 | |
400 | bRet = TRUE; |
401 | } |
402 | |
403 | done: |
404 | LOGEXIT("GetCPInfo returns BOOL %d \n" ,bRet); |
405 | PERF_EXIT(GetCPInfo); |
406 | return bRet; |
407 | } |
408 | |
409 | |
410 | /*++ |
411 | Function: |
412 | GetACP |
413 | |
414 | See MSDN doc. |
415 | --*/ |
416 | UINT |
417 | PALAPI |
418 | GetACP(VOID) |
419 | { |
420 | PERF_ENTRY(GetACP); |
421 | ENTRY("GetACP(VOID)\n" ); |
422 | |
423 | LOGEXIT("GetACP returning UINT %d\n" , PAL_ACP ); |
424 | PERF_EXIT(GetACP); |
425 | |
426 | return PAL_ACP; |
427 | } |
428 | |
429 | |
430 | /*++ |
431 | Function: |
432 | IsDBCSLeadByteEx |
433 | |
434 | See MSDN doc. |
435 | --*/ |
436 | BOOL |
437 | PALAPI |
438 | IsDBCSLeadByteEx( |
439 | IN UINT CodePage, |
440 | IN BYTE TestChar) |
441 | { |
442 | CPINFO cpinfo; |
443 | SIZE_T i; |
444 | BOOL bRet = FALSE; |
445 | |
446 | PERF_ENTRY(IsDBCSLeadByteEx); |
447 | ENTRY("IsDBCSLeadByteEx(CodePage=%#x, TestChar=%d)\n" , CodePage, TestChar); |
448 | |
449 | /* Get the lead byte info with respect to the given codepage*/ |
450 | if( !GetCPInfo( CodePage, &cpinfo ) ) |
451 | { |
452 | ERROR("Error CodePage(%#x) parameter is invalid\n" , CodePage ); |
453 | SetLastError( ERROR_INVALID_PARAMETER ); |
454 | goto done; |
455 | } |
456 | |
457 | for( i=0; i < sizeof(cpinfo.LeadByte)/sizeof(cpinfo.LeadByte[0]); i += 2 ) |
458 | { |
459 | if( 0 == cpinfo.LeadByte[ i ] ) |
460 | { |
461 | goto done; |
462 | } |
463 | |
464 | /*check if the given char is in one of the lead byte ranges*/ |
465 | if( cpinfo.LeadByte[i] <= TestChar && TestChar<= cpinfo.LeadByte[i+1] ) |
466 | { |
467 | bRet = TRUE; |
468 | goto done; |
469 | } |
470 | } |
471 | done: |
472 | LOGEXIT("IsDBCSLeadByteEx returns BOOL %d\n" ,bRet); |
473 | PERF_EXIT(IsDBCSLeadByteEx); |
474 | return bRet; |
475 | } |
476 | |
477 | /*++ |
478 | Function: |
479 | IsDBCSLeadByte |
480 | |
481 | See MSDN doc. |
482 | --*/ |
483 | BOOL |
484 | PALAPI |
485 | IsDBCSLeadByte( |
486 | IN BYTE TestChar) |
487 | { |
488 | // UNIXTODO: Implement this! |
489 | ERROR("Needs Implementation!!!" ); |
490 | return FALSE; |
491 | } |
492 | |
493 | /*++ |
494 | Function: |
495 | MultiByteToWideChar |
496 | |
497 | See MSDN doc. |
498 | |
499 | --*/ |
500 | int |
501 | PALAPI |
502 | MultiByteToWideChar( |
503 | IN UINT CodePage, |
504 | IN DWORD dwFlags, |
505 | IN LPCSTR lpMultiByteStr, |
506 | IN int cbMultiByte, |
507 | OUT LPWSTR lpWideCharStr, |
508 | IN int cchWideChar) |
509 | { |
510 | INT retval =0; |
511 | #if HAVE_COREFOUNDATION |
512 | CFStringRef cfString = NULL; |
513 | CFStringEncoding cfEncoding; |
514 | int bytesToConvert; |
515 | #endif /* HAVE_COREFOUNDATION */ |
516 | |
517 | PERF_ENTRY(MultiByteToWideChar); |
518 | ENTRY("MultiByteToWideChar(CodePage=%u, dwFlags=%#x, lpMultiByteStr=%p (%s)," |
519 | " cbMultiByte=%d, lpWideCharStr=%p, cchWideChar=%d)\n" , |
520 | CodePage, dwFlags, lpMultiByteStr?lpMultiByteStr:"NULL" , lpMultiByteStr?lpMultiByteStr:"NULL" , |
521 | cbMultiByte, lpWideCharStr, cchWideChar); |
522 | |
523 | if (dwFlags & ~(MB_ERR_INVALID_CHARS | MB_PRECOMPOSED)) |
524 | { |
525 | ASSERT("Error dwFlags(0x%x) parameter is invalid\n" , dwFlags); |
526 | SetLastError(ERROR_INVALID_FLAGS); |
527 | goto EXIT; |
528 | } |
529 | |
530 | if ( (cbMultiByte == 0) || (cchWideChar < 0) || |
531 | (lpMultiByteStr == NULL) || |
532 | ((cchWideChar != 0) && |
533 | ((lpWideCharStr == NULL) || |
534 | (lpMultiByteStr == (LPSTR)lpWideCharStr))) ) |
535 | { |
536 | ERROR("Error lpMultiByteStr parameters are invalid\n" ); |
537 | SetLastError(ERROR_INVALID_PARAMETER); |
538 | goto EXIT; |
539 | } |
540 | |
541 | // Use UTF8ToUnicode on all systems, since it replaces |
542 | // invalid characters and Core Foundation doesn't do that. |
543 | if (CodePage == CP_UTF8 || (CodePage == CP_ACP && GetACP() == CP_UTF8)) |
544 | { |
545 | if (cbMultiByte <= -1) |
546 | { |
547 | cbMultiByte = strlen(lpMultiByteStr) + 1; |
548 | } |
549 | |
550 | retval = UTF8ToUnicode(lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar, dwFlags); |
551 | goto EXIT; |
552 | } |
553 | |
554 | #if !HAVE_COREFOUNDATION |
555 | ERROR( "This code page is not in the system.\n" ); |
556 | SetLastError( ERROR_INVALID_PARAMETER ); |
557 | goto EXIT; |
558 | #else /* !HAVE_COREFOUNDATION */ |
559 | bytesToConvert = cbMultiByte; |
560 | if (bytesToConvert == -1) |
561 | { |
562 | /* Plus one for the trailing '\0', which will end up |
563 | * in the CFString. */ |
564 | bytesToConvert = strlen(lpMultiByteStr) + 1; |
565 | } |
566 | |
567 | cfEncoding = CODEPAGECPToCFStringEncoding(CodePage); |
568 | if (cfEncoding == kCFStringEncodingInvalidId) |
569 | { |
570 | ERROR( "This code page is not in the system.\n" ); |
571 | SetLastError( ERROR_INVALID_PARAMETER ); |
572 | goto EXIT; |
573 | } |
574 | |
575 | cfString = CFStringCreateWithBytes(kCFAllocatorDefault, (UInt8*)lpMultiByteStr, |
576 | bytesToConvert, cfEncoding, TRUE); |
577 | if (cfString == NULL) |
578 | { |
579 | ERROR( "Failed to convert the string to the specified encoding.\n" ); |
580 | SetLastError( ERROR_NO_UNICODE_TRANSLATION ); |
581 | goto EXIT; |
582 | } |
583 | |
584 | if (cchWideChar != 0) |
585 | { |
586 | /* Do the conversion. */ |
587 | CFIndex length = CFStringGetLength(cfString); |
588 | if (length > cchWideChar) |
589 | { |
590 | ERROR("Error insufficient buffer\n" ); |
591 | SetLastError(ERROR_INSUFFICIENT_BUFFER); |
592 | retval = 0; |
593 | goto ReleaseString; |
594 | } |
595 | CFStringGetCharacters(cfString, CFRangeMake(0, length), |
596 | (UniChar*)lpWideCharStr); |
597 | retval = length; |
598 | } |
599 | else |
600 | { |
601 | /* Just return the number of wide characters needed. */ |
602 | retval = CFStringGetLength(cfString); |
603 | } |
604 | |
605 | ReleaseString: |
606 | if (cfString != NULL) |
607 | { |
608 | CFRelease(cfString); |
609 | } |
610 | #endif /* !HAVE_COREFOUNDATION */ |
611 | |
612 | EXIT: |
613 | |
614 | LOGEXIT("MultiByteToWideChar returns %d.\n" ,retval); |
615 | PERF_EXIT(MultiByteToWideChar); |
616 | return retval; |
617 | } |
618 | |
619 | |
620 | /*++ |
621 | Function: |
622 | WideCharToMultiByte |
623 | |
624 | See MSDN doc. |
625 | |
626 | --*/ |
627 | int |
628 | PALAPI |
629 | WideCharToMultiByte( |
630 | IN UINT CodePage, |
631 | IN DWORD dwFlags, |
632 | IN LPCWSTR lpWideCharStr, |
633 | IN int cchWideChar, |
634 | OUT LPSTR lpMultiByteStr, |
635 | IN int cbMultiByte, |
636 | IN LPCSTR lpDefaultChar, |
637 | OUT LPBOOL lpUsedDefaultChar) |
638 | { |
639 | INT retval =0; |
640 | char defaultChar = '?'; |
641 | BOOL usedDefaultChar = FALSE; |
642 | #if HAVE_COREFOUNDATION |
643 | CFStringRef cfString = NULL; |
644 | CFStringEncoding cfEncoding; |
645 | int charsToConvert; |
646 | CFIndex charsConverted; |
647 | CFIndex bytesConverted; |
648 | #endif /* !HAVE_COREFOUNDATION */ |
649 | |
650 | PERF_ENTRY(WideCharToMultiByte); |
651 | ENTRY("WideCharToMultiByte(CodePage=%u, dwFlags=%#x, lpWideCharStr=%p (%S), " |
652 | "cchWideChar=%d, lpMultiByteStr=%p, cbMultiByte=%d, " |
653 | "lpDefaultChar=%p, lpUsedDefaultChar=%p)\n" , |
654 | CodePage, dwFlags, lpWideCharStr?lpWideCharStr:W16_NULLSTRING, lpWideCharStr?lpWideCharStr:W16_NULLSTRING, |
655 | cchWideChar, lpMultiByteStr, cbMultiByte, |
656 | lpDefaultChar, lpUsedDefaultChar); |
657 | |
658 | if (dwFlags & ~WC_NO_BEST_FIT_CHARS) |
659 | { |
660 | ERROR("dwFlags %d invalid\n" , dwFlags); |
661 | SetLastError(ERROR_INVALID_FLAGS); |
662 | goto EXIT; |
663 | } |
664 | |
665 | // No special action is needed for WC_NO_BEST_FIT_CHARS. The default |
666 | // behavior of this API on Unix is not to find the best fit for a unicode |
667 | // character that does not map directly into a code point in the given |
668 | // code page. The best fit functionality is not available in wctomb on Unix |
669 | // and is better left unimplemented for security reasons anyway. |
670 | |
671 | if ((cchWideChar < -1) || (cbMultiByte < 0) || |
672 | (lpWideCharStr == NULL) || |
673 | ((cbMultiByte != 0) && |
674 | ((lpMultiByteStr == NULL) || |
675 | (lpWideCharStr == (LPWSTR)lpMultiByteStr))) ) |
676 | { |
677 | ERROR("Error lpWideCharStr parameters are invalid\n" ); |
678 | SetLastError(ERROR_INVALID_PARAMETER); |
679 | goto EXIT; |
680 | } |
681 | |
682 | if (lpDefaultChar != NULL) |
683 | { |
684 | defaultChar = *lpDefaultChar; |
685 | } |
686 | |
687 | // Use UnicodeToUTF8 on all systems because we use |
688 | // UTF8ToUnicode in MultiByteToWideChar() on all systems. |
689 | if (CodePage == CP_UTF8 || (CodePage == CP_ACP && GetACP() == CP_UTF8)) |
690 | { |
691 | if (cchWideChar == -1) |
692 | { |
693 | cchWideChar = PAL_wcslen(lpWideCharStr) + 1; |
694 | } |
695 | retval = UnicodeToUTF8(lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte); |
696 | goto EXIT; |
697 | } |
698 | |
699 | #if HAVE_COREFOUNDATION |
700 | charsToConvert = cchWideChar; |
701 | if (charsToConvert == -1) |
702 | { |
703 | LPCWSTR ptr = lpWideCharStr; |
704 | |
705 | charsToConvert = 0; |
706 | while(*ptr++ != 0) |
707 | { |
708 | charsToConvert++; |
709 | } |
710 | charsToConvert++; /* For the terminating '\0' */ |
711 | } |
712 | |
713 | cfEncoding = CODEPAGECPToCFStringEncoding(CodePage); |
714 | if (cfEncoding == kCFStringEncodingInvalidId) |
715 | { |
716 | ERROR( "This code page is not in the system.\n" ); |
717 | SetLastError(ERROR_INVALID_PARAMETER); |
718 | goto EXIT; |
719 | } |
720 | |
721 | cfString = CFStringCreateWithCharacters(kCFAllocatorDefault, |
722 | (const UniChar*)lpWideCharStr, charsToConvert); |
723 | if (cfString == NULL) |
724 | { |
725 | ERROR("CFString creation failed.\n" ); |
726 | SetLastError(ERROR_INVALID_PARAMETER); |
727 | goto EXIT; |
728 | } |
729 | |
730 | if (cbMultiByte == 0) |
731 | { |
732 | lpMultiByteStr = NULL; |
733 | } |
734 | charsConverted = CFStringGetBytes(cfString, |
735 | CFRangeMake(0, charsToConvert), |
736 | cfEncoding, '?', TRUE, (UInt8*)lpMultiByteStr, |
737 | cbMultiByte, &bytesConverted); |
738 | if (charsConverted != charsToConvert) |
739 | { |
740 | if (lpMultiByteStr != NULL) |
741 | { |
742 | // CFStringGetBytes can fail due to an insufficient buffer or for |
743 | // other reasons. We need to check if we're out of buffer space. |
744 | charsConverted = CFStringGetBytes(cfString, |
745 | CFRangeMake(0, charsToConvert), |
746 | cfEncoding, '?', TRUE, NULL, |
747 | 0, &bytesConverted); |
748 | if (cbMultiByte < bytesConverted) |
749 | { |
750 | ERROR("Insufficient buffer for CFStringGetBytes.\n" ); |
751 | SetLastError(ERROR_INSUFFICIENT_BUFFER); |
752 | goto ReleaseString; |
753 | } |
754 | } |
755 | ERROR("Not all characters were converted.\n" ); |
756 | SetLastError(ERROR_INVALID_PARAMETER); |
757 | goto ReleaseString; |
758 | } |
759 | retval = bytesConverted; |
760 | |
761 | ReleaseString: |
762 | if (cfString != NULL) |
763 | { |
764 | CFRelease(cfString); |
765 | } |
766 | #else /*HAVE_COREFOUNDATION */ |
767 | ERROR( "This code page is not in the system.\n" ); |
768 | SetLastError( ERROR_INVALID_PARAMETER ); |
769 | goto EXIT; |
770 | #endif /* HAVE_COREFOUNDATION */ |
771 | |
772 | EXIT: |
773 | |
774 | if ( lpUsedDefaultChar != NULL ) |
775 | { |
776 | *lpUsedDefaultChar = usedDefaultChar; |
777 | } |
778 | |
779 | /* Flag the cases when WC_NO_BEST_FIT_CHARS was not specified |
780 | * but we found characters that had to be replaced with default |
781 | * characters. Note that Windows would have attempted to find |
782 | * best fit characters under these conditions and that could pose |
783 | * a security risk. |
784 | */ |
785 | _ASSERT_MSG((dwFlags & WC_NO_BEST_FIT_CHARS) || !usedDefaultChar, |
786 | "WideCharToMultiByte found a string which doesn't round trip: (%p)%S " |
787 | "and WC_NO_BEST_FIT_CHARS was not specified\n" , |
788 | lpWideCharStr, lpWideCharStr); |
789 | |
790 | LOGEXIT("WideCharToMultiByte returns INT %d\n" , retval); |
791 | PERF_EXIT(WideCharToMultiByte); |
792 | return retval; |
793 | } |
794 | |
795 | extern char * g_szCoreCLRPath; |
796 | |
797 | /*++ |
798 | Function : |
799 | |
800 | PAL_BindResources - bind the resource domain to the path where the coreclr resides |
801 | |
802 | Returns TRUE if it succeeded, FALSE if it failed due to OOM |
803 | --*/ |
804 | BOOL |
805 | PALAPI |
806 | PAL_BindResources(IN LPCSTR lpDomain) |
807 | { |
808 | #if HAVE_LIBINTL_H |
809 | _ASSERTE(g_szCoreCLRPath != NULL); |
810 | char * coreCLRDirectoryPath; |
811 | PathCharString coreCLRDirectoryPathPS; |
812 | int len = strlen(g_szCoreCLRPath); |
813 | coreCLRDirectoryPath = coreCLRDirectoryPathPS.OpenStringBuffer(len); |
814 | if (NULL == coreCLRDirectoryPath) |
815 | { |
816 | return FALSE; |
817 | } |
818 | DWORD size = FILEGetDirectoryFromFullPathA(g_szCoreCLRPath, len, coreCLRDirectoryPath); |
819 | coreCLRDirectoryPathPS.CloseBuffer(size); |
820 | |
821 | LPCSTR boundPath = bindtextdomain(lpDomain, coreCLRDirectoryPath); |
822 | |
823 | return boundPath != NULL; |
824 | #else // HAVE_LIBINTL_H |
825 | // UNIXTODO: Implement for Unixes without libintl if necessary |
826 | return TRUE; |
827 | #endif // HAVE_LIBINTL_H |
828 | } |
829 | |
830 | /*++ |
831 | Function : |
832 | |
833 | PAL_GetResourceString - get localized string for a specified resource. |
834 | The string that is passed in should be the English string, since it |
835 | will be returned if an appropriately localized version is not found. |
836 | |
837 | Returns number of characters retrieved, 0 if it failed. |
838 | --*/ |
839 | int |
840 | PALAPI |
841 | PAL_GetResourceString( |
842 | IN LPCSTR lpDomain, |
843 | IN LPCSTR lpResourceStr, |
844 | OUT LPWSTR lpWideCharStr, |
845 | IN int cchWideChar |
846 | ) |
847 | { |
848 | #if HAVE_LIBINTL_H |
849 | // NOTE: dgettext returns the key if it fails to locate the appropriate |
850 | // resource. In our case, that will be the English string. |
851 | LPCSTR resourceString = dgettext(lpDomain, lpResourceStr); |
852 | #else // HAVE_LIBINTL_H |
853 | // UNIXTODO: Implement for OSX using the native localization API |
854 | |
855 | // This is a temporary solution until we add the real native resource support. |
856 | LPCSTR resourceString = lpResourceStr; |
857 | #endif // HAVE_LIBINTL_H |
858 | |
859 | int length = strlen(resourceString); |
860 | return UTF8ToUnicode(lpResourceStr, length + 1, lpWideCharStr, cchWideChar, 0); |
861 | } |
862 | |