unicode.cpp source code [CoreCLR/pal/src/locale/unicode.cpp]

1	// Licensed to the .NET Foundation under one or more agreements.
2	// The .NET Foundation licenses this file to you under the MIT license.
3	// See the LICENSE file in the project root for more information.
4
5	/++*
6
7
8
9	Module Name:
10
11	unicode.cpp
12
13	Abstract:
14
15	Implementation of all functions related to Unicode support
16
17	Revision History:
18
19
20
21	--/*
22
23	#include "pal/thread.hpp"
24
25	#include "pal/palinternal.h"
26	#include "pal/unicode_data.h"
27	#include "pal/dbgmsg.h"
28	#include "pal/file.h"
29	#include "pal/utf8.h"
30	#include "pal/locale.h"
31	#include "pal/cruntime.h"
32	#include "pal/stackstring.hpp"
33
34	#if !(HAVE_PTHREAD_RWLOCK_T \|\| HAVE_COREFOUNDATION)
35	#error Either pthread rwlocks or Core Foundation are required for Unicode support
36	#endif /* !(HAVE_PTHREAD_RWLOCK_T \|\| HAVE_COREFOUNDATION) */
37
38	#include <pthread.h>
39	#include <locale.h>
40	#if HAVE_LIBINTL_H
41	#include <libintl.h>
42	#endif // HAVE_LIBINTL_H
43	#include <errno.h>
44	#if HAVE_COREFOUNDATION
45	#include <CoreFoundation/CoreFoundation.h>
46	#endif // HAVE_COREFOUNDATION
47
48	#include <debugmacrosext.h>
49
50	using namespace CorUnix;
51
52	SET_DEFAULT_DEBUG_CHANNEL(UNICODE);
53
54	#if HAVE_COREFOUNDATION
55
56	static CP_MAPPING CP_TO_NATIVE_TABLE[] = {
57	{ `65001`, kCFStringEncodingUTF8, `4`, { `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` } },
58	{ `1252`, kCFStringEncodingWindowsLatin1, `1`, { `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` } },
59	{ `1251`, kCFStringEncodingWindowsCyrillic, `1`, { `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` } },
60	{ `1253`, kCFStringEncodingWindowsGreek, `1`, { `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` } },
61	{ `1254`, kCFStringEncodingWindowsLatin5, `1`, { `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` } },
62	{ `1258`, kCFStringEncodingWindowsVietnamese, `1`, { `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` } },
63	{ `932`, kCFStringEncodingDOSJapanese, `2`, { `129`, `159`, `224`, `252`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` } },
64	{ `949`, kCFStringEncodingDOSKorean, `2`, { `129`, `254`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` } },
65	{ `950`, kCFStringEncodingDOSChineseTrad, `2`, { `129`, `254`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` } }
66	};
67
68	#else // HAVE_COREFOUNDATION
69
70	static const CP_MAPPING CP_TO_NATIVE_TABLE[] = {
71	{ `65001`, "utf8", `4`, { `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` } }
72	};
73
74	#endif // HAVE_COREFOUNDATION
75
76	// We hardcode the system's default codepage to be UTF-8.
77	// There are several reasons for this:
78	// - On OSX, HFS+ file names are encoded as UTF-8.
79	// - On OSX, When writing strings to the console, the Terminal.app will interpret them as UTF-8.
80	// - We want Ansi marshalling to mean marshal to UTF-8 on Mac and Linux
81	static const UINT PAL_ACP = `65001`;
82
83	#if !HAVE_COREFOUNDATION
84	/++*
85	Function:
86	UnicodeDataComp
87	This is the comparison function used by the bsearch function to search
88	for unicode characters in the UnicodeData array.
89
90	Parameter:
91	pnKey
92	The unicode character value to search for.
93	elem
94	A pointer to a UnicodeDataRec.
95
96	Return value:
97	<0 if pnKey < elem->nUnicodeValue
98	0 if pnKey == elem->nUnicodeValue
99	>0 if pnKey > elem->nUnicodeValue
100	--/*
101	static int UnicodeDataComp(const void pnKey, const* void *elem)
102	{
103	WCHAR uValue = ((UnicodeDataRec*)elem)->nUnicodeValue;
104	WORD rangeValue = ((UnicodeDataRec*)elem)->rangeValue;
105
106	if (((INT)pnKey) < uValue)
107	{
108	return -`1`;
109	}
110	else
111	{
112	if (((INT)pnKey) > (uValue + rangeValue))
113	{
114	return `1`;
115	}
116	else
117	{
118	return `0`;
119	}
120	}
121	}
122
123	/++*
124	Function:
125	GetUnicodeData
126	This function is used to get information about a Unicode character.
127
128	Parameters:
129	nUnicodeValue
130	The numeric value of the Unicode character to get information about.
131	pDataRec
132	The UnicodeDataRec to fill in with the data for the Unicode character.
133
134	Return value:
135	TRUE if the Unicode character was found.
136
137	--/*
138	BOOL GetUnicodeData(INT nUnicodeValue, UnicodeDataRec *pDataRec)
139	{
140	BOOL bRet;
141	if (nUnicodeValue <= UNICODE_DATA_DIRECT_ACCESS)
142	{
143	*pDataRec = UnicodeData[nUnicodeValue];
144	bRet = TRUE;
145	}
146	else
147	{
148	UnicodeDataRec *dataRec;
149	INT nNumOfChars = UNICODE_DATA_SIZE;
150	dataRec = (UnicodeDataRec *) bsearch(&nUnicodeValue, UnicodeData, nNumOfChars,
151	sizeof(UnicodeDataRec), UnicodeDataComp);
152	if (dataRec == NULL)
153	{
154	bRet = FALSE;
155	}
156	else
157	{
158	bRet = TRUE;
159	pDataRec = dataRec;
160	}
161	}
162	return bRet;
163	}
164	#endif /* !HAVE_COREFOUNDATION */
165
166	/++*
167	Function:
168	CODEPAGEGetData
169
170	IN UINT CodePage - The code page the caller
171	is attempting to retrieve data on.
172
173	Returns a pointer to structure, NULL otherwise.
174	--/*
175	const CP_MAPPING *
176	CODEPAGEGetData( IN UINT CodePage )
177	{
178	UINT nSize = sizeof( CP_TO_NATIVE_TABLE ) / sizeof( CP_TO_NATIVE_TABLE[ `0` ] );
179	UINT nIndex = `0`;
180
181	if ( CP_ACP == CodePage )
182	{
183	CodePage = PAL_ACP;
184	}
185
186	/ checking if the CodePage is ACP and returning true if so /
187	while (nIndex < nSize)
188	{
189	if ( ( CP_TO_NATIVE_TABLE[ nIndex ] ).nCodePage == CodePage )
190	{
191	return &(CP_TO_NATIVE_TABLE[ nIndex ]);
192	}
193	nIndex++;
194	}
195	return NULL;
196	}
197
198	#if HAVE_COREFOUNDATION
199	/++*
200	Function :
201
202	CODEPAGECPToCFStringEncoding - Gets the CFStringEncoding for
203	the given codepage.
204
205	Returns the CFStringEncoding for the given codepage.
206	--/*
207	CFStringEncoding CODEPAGECPToCFStringEncoding(UINT codepage)
208	{
209	const CP_MAPPING *cp_mapping = CODEPAGEGetData(codepage);
210	if (cp_mapping == NULL)
211	{
212	return kCFStringEncodingInvalidId;
213	}
214	else
215	{
216	return cp_mapping->nCFEncoding;
217	}
218	}
219	#endif // HAVE_COREFOUNDATION
220
221	/++*
222	Function:
223	CharNextA
224
225	Parameters
226
227	lpsz
228	[in] Pointer to a character in a null-terminated string.
229
230	Return Values
231
232	A pointer to the next character in the string, or to the terminating null character if at the end of the string, indicates success.
233
234	If lpsz points to the terminating null character, the return value is equal to lpsz.
235
236	See MSDN doc.
237	--/*
238	LPSTR
239	PALAPI
240	CharNextA(
241	IN LPCSTR lpsz)
242	{
243	LPSTR pRet;
244	PERF_ENTRY(CharNextA);
245	ENTRY("CharNextA (lpsz=%p (%s))\n", lpsz?lpsz:NULL, lpsz?lpsz:NULL);
246
247	pRet = CharNextExA(GetACP(), lpsz, `0`);
248
249	LOGEXIT ("CharNextA returns LPSTR %p\n", pRet);
250	PERF_EXIT(CharNextA);
251	return pRet;
252	}
253
254
255	/++*
256	Function:
257	CharNextExA
258
259	See MSDN doc.
260	--/*
261	LPSTR
262	PALAPI
263	CharNextExA(
264	IN WORD CodePage,
265	IN LPCSTR lpCurrentChar,
266	IN DWORD dwFlags)
267	{
268	LPSTR pRet = (LPSTR) lpCurrentChar;
269
270	PERF_ENTRY(CharNextExA);
271	ENTRY("CharNextExA (CodePage=%hu, lpCurrentChar=%p (%s), dwFlags=%#x)\n",
272	CodePage, lpCurrentChar?lpCurrentChar:"NULL", lpCurrentChar?lpCurrentChar:"NULL", dwFlags);
273
274	if ((lpCurrentChar != NULL) && (*lpCurrentChar != `0`))
275	{
276	pRet += (*(lpCurrentChar+`1`) != `0`) &&
277	IsDBCSLeadByteEx(CodePage, *lpCurrentChar) ? `2` : `1`;
278	}
279
280	LOGEXIT("CharNextExA returns LPSTR:%p (%s)\n", pRet, pRet);
281	PERF_EXIT(CharNextExA);
282	return pRet;
283	}
284
285
286	/++*
287	Function:
288	GetConsoleOutputCP
289
290	See MSDN doc.
291	--/*
292	UINT
293	PALAPI
294	GetConsoleOutputCP(
295	VOID)
296	{
297	UINT nRet = `0`;
298	PERF_ENTRY(GetConsoleOutputCP);
299	ENTRY("GetConsoleOutputCP()\n");
300	nRet = GetACP();
301	LOGEXIT("GetConsoleOutputCP returns UINT %d \n", nRet );
302	PERF_EXIT(GetConsoleOutputCP);
303	return nRet;
304	}
305
306
307	/++*
308	Function:
309	IsValidCodePage
310
311	See MSDN doc.
312
313	Notes :
314	"pseudo code pages", like CP_ACP, aren't considered 'valid' in this context.
315	CP_UTF7 and CP_UTF8, however, are* considered valid code pages, even though*
316	MSDN fails to mention them in the IsValidCodePage entry.
317	Note : CP_UTF7 support isn't required for CoreCLR
318	--/*
319	BOOL
320	PALAPI
321	IsValidCodePage(
322	IN UINT CodePage)
323	{
324	BOOL retval = FALSE;
325
326	PERF_ENTRY(IsValidCodePage);
327	ENTRY("IsValidCodePage(%d)\n", CodePage );
328
329	switch(CodePage)
330	{
331	case CP_ACP : / fall through /
332	case CP_OEMCP : / fall through /
333	case CP_MACCP : / fall through /
334	case CP_THREAD_ACP:
335	/ 'pseudo code pages' : not valid /
336	retval = FALSE;
337	break;
338	case CP_UTF7:
339	/ valid in Win32, but not supported in the PAL /
340	retval = FALSE;
341	break;
342	case CP_UTF8:
343	/ valid, but not part of CODEPAGEGetData's tables /
344	retval = TRUE;
345	break;
346	default:
347	retval = (NULL != CODEPAGEGetData( CodePage ));
348	break;
349	}
350
351	LOGEXIT("IsValidCodePage returns BOOL %d\n",retval);
352	PERF_EXIT(IsValidCodePage);
353	return retval;
354	}
355
356	/++*
357	Function:
358	GetCPInfo
359
360	See MSDN doc.
361	--/*
362	BOOL
363	PALAPI
364	GetCPInfo(
365	IN UINT CodePage,
366	OUT LPCPINFO lpCPInfo)
367	{
368	const CP_MAPPING * lpStruct = NULL;
369	BOOL bRet = FALSE;
370
371	PERF_ENTRY(GetCPInfo);
372	ENTRY("GetCPInfo(CodePage=%hu, lpCPInfo=%p)\n", CodePage, lpCPInfo);
373
374	/check if the input code page is valid/
375	if( CP_ACP != CodePage && !IsValidCodePage( CodePage ) )
376	{
377	/ error, invalid argument /
378	ERROR("CodePage(%d) parameter is invalid\n",CodePage);
379	SetLastError( ERROR_INVALID_PARAMETER );
380	goto done;
381	}
382
383	/check if the lpCPInfo parameter is valid. /
384	if( !lpCPInfo )
385	{
386	/ error, invalid argument /
387	ERROR("lpCPInfo cannot be NULL\n" );
388	SetLastError( ERROR_INVALID_PARAMETER );
389	goto done;
390	}
391
392	if ( NULL != ( lpStruct = CODEPAGEGetData( CodePage ) ) )
393	{
394	lpCPInfo->MaxCharSize = lpStruct->nMaxByteSize;;
395	memcpy( lpCPInfo->LeadByte, lpStruct->LeadByte , MAX_LEADBYTES );
396
397	/ Don't need to be set, according to the spec. /
398	memset( lpCPInfo->DefaultChar, `'?'`, MAX_DEFAULTCHAR );
399
400	bRet = TRUE;
401	}
402
403	done:
404	LOGEXIT("GetCPInfo returns BOOL %d \n",bRet);
405	PERF_EXIT(GetCPInfo);
406	return bRet;
407	}
408
409
410	/++*
411	Function:
412	GetACP
413
414	See MSDN doc.
415	--/*
416	UINT
417	PALAPI
418	GetACP(VOID)
419	{
420	PERF_ENTRY(GetACP);
421	ENTRY("GetACP(VOID)\n");
422
423	LOGEXIT("GetACP returning UINT %d\n", PAL_ACP );
424	PERF_EXIT(GetACP);
425
426	return PAL_ACP;
427	}
428
429
430	/++*
431	Function:
432	IsDBCSLeadByteEx
433
434	See MSDN doc.
435	--/*
436	BOOL
437	PALAPI
438	IsDBCSLeadByteEx(
439	IN UINT CodePage,
440	IN BYTE TestChar)
441	{
442	CPINFO cpinfo;
443	SIZE_T i;
444	BOOL bRet = FALSE;
445
446	PERF_ENTRY(IsDBCSLeadByteEx);
447	ENTRY("IsDBCSLeadByteEx(CodePage=%#x, TestChar=%d)\n", CodePage, TestChar);
448
449	/ Get the lead byte info with respect to the given codepage/
450	if( !GetCPInfo( CodePage, &cpinfo ) )
451	{
452	ERROR("Error CodePage(%#x) parameter is invalid\n", CodePage );
453	SetLastError( ERROR_INVALID_PARAMETER );
454	goto done;
455	}
456
457	for( i=`0`; i < sizeof(cpinfo.LeadByte)/sizeof(cpinfo.LeadByte[`0`]); i += `2` )
458	{
459	if( `0` == cpinfo.LeadByte[ i ] )
460	{
461	goto done;
462	}
463
464	/check if the given char is in one of the lead byte ranges/
465	if( cpinfo.LeadByte[i] <= TestChar && TestChar<= cpinfo.LeadByte[i+`1`] )
466	{
467	bRet = TRUE;
468	goto done;
469	}
470	}
471	done:
472	LOGEXIT("IsDBCSLeadByteEx returns BOOL %d\n",bRet);
473	PERF_EXIT(IsDBCSLeadByteEx);
474	return bRet;
475	}
476
477	/++*
478	Function:
479	IsDBCSLeadByte
480
481	See MSDN doc.
482	--/*
483	BOOL
484	PALAPI
485	IsDBCSLeadByte(
486	IN BYTE TestChar)
487	{
488	// UNIXTODO: Implement this!
489	ERROR("Needs Implementation!!!");
490	return FALSE;
491	}
492
493	/++*
494	Function:
495	MultiByteToWideChar
496
497	See MSDN doc.
498
499	--/*
500	int
501	PALAPI
502	MultiByteToWideChar(
503	IN UINT CodePage,
504	IN DWORD dwFlags,
505	IN LPCSTR lpMultiByteStr,
506	IN int cbMultiByte,
507	OUT LPWSTR lpWideCharStr,
508	IN int cchWideChar)
509	{
510	INT retval =`0`;
511	#if HAVE_COREFOUNDATION
512	CFStringRef cfString = NULL;
513	CFStringEncoding cfEncoding;
514	int bytesToConvert;
515	#endif /* HAVE_COREFOUNDATION */
516
517	PERF_ENTRY(MultiByteToWideChar);
518	ENTRY("MultiByteToWideChar(CodePage=%u, dwFlags=%#x, lpMultiByteStr=%p (%s),"
519	" cbMultiByte=%d, lpWideCharStr=%p, cchWideChar=%d)\n",
520	CodePage, dwFlags, lpMultiByteStr?lpMultiByteStr:"NULL", lpMultiByteStr?lpMultiByteStr:"NULL",
521	cbMultiByte, lpWideCharStr, cchWideChar);
522
523	if (dwFlags & ~(MB_ERR_INVALID_CHARS \| MB_PRECOMPOSED))
524	{
525	ASSERT("Error dwFlags(0x%x) parameter is invalid\n", dwFlags);
526	SetLastError(ERROR_INVALID_FLAGS);
527	goto EXIT;
528	}
529
530	if ( (cbMultiByte == `0`) \|\| (cchWideChar < `0`) \|\|
531	(lpMultiByteStr == NULL) \|\|
532	((cchWideChar != `0`) &&
533	((lpWideCharStr == NULL) \|\|
534	(lpMultiByteStr == (LPSTR)lpWideCharStr))) )
535	{
536	ERROR("Error lpMultiByteStr parameters are invalid\n");
537	SetLastError(ERROR_INVALID_PARAMETER);
538	goto EXIT;
539	}
540
541	// Use UTF8ToUnicode on all systems, since it replaces
542	// invalid characters and Core Foundation doesn't do that.
543	if (CodePage == CP_UTF8 \|\| (CodePage == CP_ACP && GetACP() == CP_UTF8))
544	{
545	if (cbMultiByte <= -`1`)
546	{
547	cbMultiByte = strlen(lpMultiByteStr) + `1`;
548	}
549
550	retval = UTF8ToUnicode(lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar, dwFlags);
551	goto EXIT;
552	}
553
554	#if !HAVE_COREFOUNDATION
555	ERROR( "This code page is not in the system.\n" );
556	SetLastError( ERROR_INVALID_PARAMETER );
557	goto EXIT;
558	#else /* !HAVE_COREFOUNDATION */
559	bytesToConvert = cbMultiByte;
560	if (bytesToConvert == -`1`)
561	{
562	/ Plus one for the trailing '\0', which will end up*
563	* in the CFString. */
564	bytesToConvert = strlen(lpMultiByteStr) + `1`;
565	}
566
567	cfEncoding = CODEPAGECPToCFStringEncoding(CodePage);
568	if (cfEncoding == kCFStringEncodingInvalidId)
569	{
570	ERROR( "This code page is not in the system.\n" );
571	SetLastError( ERROR_INVALID_PARAMETER );
572	goto EXIT;
573	}
574
575	cfString = CFStringCreateWithBytes(kCFAllocatorDefault, (UInt8*)lpMultiByteStr,
576	bytesToConvert, cfEncoding, TRUE);
577	if (cfString == NULL)
578	{
579	ERROR( "Failed to convert the string to the specified encoding.\n" );
580	SetLastError( ERROR_NO_UNICODE_TRANSLATION );
581	goto EXIT;
582	}
583
584	if (cchWideChar != `0`)
585	{
586	/ Do the conversion. /
587	CFIndex length = CFStringGetLength(cfString);
588	if (length > cchWideChar)
589	{
590	ERROR("Error insufficient buffer\n");
591	SetLastError(ERROR_INSUFFICIENT_BUFFER);
592	retval = `0`;
593	goto ReleaseString;
594	}
595	CFStringGetCharacters(cfString, CFRangeMake(`0`, length),
596	(UniChar*)lpWideCharStr);
597	retval = length;
598	}
599	else
600	{
601	/ Just return the number of wide characters needed. /
602	retval = CFStringGetLength(cfString);
603	}
604
605	ReleaseString:
606	if (cfString != NULL)
607	{
608	CFRelease(cfString);
609	}
610	#endif /* !HAVE_COREFOUNDATION */
611
612	EXIT:
613
614	LOGEXIT("MultiByteToWideChar returns %d.\n",retval);
615	PERF_EXIT(MultiByteToWideChar);
616	return retval;
617	}
618
619
620	/++*
621	Function:
622	WideCharToMultiByte
623
624	See MSDN doc.
625
626	--/*
627	int
628	PALAPI
629	WideCharToMultiByte(
630	IN UINT CodePage,
631	IN DWORD dwFlags,
632	IN LPCWSTR lpWideCharStr,
633	IN int cchWideChar,
634	OUT LPSTR lpMultiByteStr,
635	IN int cbMultiByte,
636	IN LPCSTR lpDefaultChar,
637	OUT LPBOOL lpUsedDefaultChar)
638	{
639	INT retval =`0`;
640	char defaultChar = `'?'`;
641	BOOL usedDefaultChar = FALSE;
642	#if HAVE_COREFOUNDATION
643	CFStringRef cfString = NULL;
644	CFStringEncoding cfEncoding;
645	int charsToConvert;
646	CFIndex charsConverted;
647	CFIndex bytesConverted;
648	#endif /* !HAVE_COREFOUNDATION */
649
650	PERF_ENTRY(WideCharToMultiByte);
651	ENTRY("WideCharToMultiByte(CodePage=%u, dwFlags=%#x, lpWideCharStr=%p (%S), "
652	"cchWideChar=%d, lpMultiByteStr=%p, cbMultiByte=%d, "
653	"lpDefaultChar=%p, lpUsedDefaultChar=%p)\n",
654	CodePage, dwFlags, lpWideCharStr?lpWideCharStr:W16_NULLSTRING, lpWideCharStr?lpWideCharStr:W16_NULLSTRING,
655	cchWideChar, lpMultiByteStr, cbMultiByte,
656	lpDefaultChar, lpUsedDefaultChar);
657
658	if (dwFlags & ~WC_NO_BEST_FIT_CHARS)
659	{
660	ERROR("dwFlags %d invalid\n", dwFlags);
661	SetLastError(ERROR_INVALID_FLAGS);
662	goto EXIT;
663	}
664
665	// No special action is needed for WC_NO_BEST_FIT_CHARS. The default
666	// behavior of this API on Unix is not to find the best fit for a unicode
667	// character that does not map directly into a code point in the given
668	// code page. The best fit functionality is not available in wctomb on Unix
669	// and is better left unimplemented for security reasons anyway.
670
671	if ((cchWideChar < -`1`) \|\| (cbMultiByte < `0`) \|\|
672	(lpWideCharStr == NULL) \|\|
673	((cbMultiByte != `0`) &&
674	((lpMultiByteStr == NULL) \|\|
675	(lpWideCharStr == (LPWSTR)lpMultiByteStr))) )
676	{
677	ERROR("Error lpWideCharStr parameters are invalid\n");
678	SetLastError(ERROR_INVALID_PARAMETER);
679	goto EXIT;
680	}
681
682	if (lpDefaultChar != NULL)
683	{
684	defaultChar = *lpDefaultChar;
685	}
686
687	// Use UnicodeToUTF8 on all systems because we use
688	// UTF8ToUnicode in MultiByteToWideChar() on all systems.
689	if (CodePage == CP_UTF8 \|\| (CodePage == CP_ACP && GetACP() == CP_UTF8))
690	{
691	if (cchWideChar == -`1`)
692	{
693	cchWideChar = PAL_wcslen(lpWideCharStr) + `1`;
694	}
695	retval = UnicodeToUTF8(lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte);
696	goto EXIT;
697	}
698
699	#if HAVE_COREFOUNDATION
700	charsToConvert = cchWideChar;
701	if (charsToConvert == -`1`)
702	{
703	LPCWSTR ptr = lpWideCharStr;
704
705	charsToConvert = `0`;
706	while(*ptr++ != `0`)
707	{
708	charsToConvert++;
709	}
710	charsToConvert++; / For the terminating '\0' /
711	}
712
713	cfEncoding = CODEPAGECPToCFStringEncoding(CodePage);
714	if (cfEncoding == kCFStringEncodingInvalidId)
715	{
716	ERROR( "This code page is not in the system.\n" );
717	SetLastError(ERROR_INVALID_PARAMETER);
718	goto EXIT;
719	}
720
721	cfString = CFStringCreateWithCharacters(kCFAllocatorDefault,
722	(const UniChar*)lpWideCharStr, charsToConvert);
723	if (cfString == NULL)
724	{
725	ERROR("CFString creation failed.\n");
726	SetLastError(ERROR_INVALID_PARAMETER);
727	goto EXIT;
728	}
729
730	if (cbMultiByte == `0`)
731	{
732	lpMultiByteStr = NULL;
733	}
734	charsConverted = CFStringGetBytes(cfString,
735	CFRangeMake(`0`, charsToConvert),
736	cfEncoding, `'?'`, TRUE, (UInt8*)lpMultiByteStr,
737	cbMultiByte, &bytesConverted);
738	if (charsConverted != charsToConvert)
739	{
740	if (lpMultiByteStr != NULL)
741	{
742	// CFStringGetBytes can fail due to an insufficient buffer or for
743	// other reasons. We need to check if we're out of buffer space.
744	charsConverted = CFStringGetBytes(cfString,
745	CFRangeMake(`0`, charsToConvert),
746	cfEncoding, `'?'`, TRUE, NULL,
747	`0`, &bytesConverted);
748	if (cbMultiByte < bytesConverted)
749	{
750	ERROR("Insufficient buffer for CFStringGetBytes.\n");
751	SetLastError(ERROR_INSUFFICIENT_BUFFER);
752	goto ReleaseString;
753	}
754	}
755	ERROR("Not all characters were converted.\n");
756	SetLastError(ERROR_INVALID_PARAMETER);
757	goto ReleaseString;
758	}
759	retval = bytesConverted;
760
761	ReleaseString:
762	if (cfString != NULL)
763	{
764	CFRelease(cfString);
765	}
766	#else /HAVE_COREFOUNDATION /
767	ERROR( "This code page is not in the system.\n" );
768	SetLastError( ERROR_INVALID_PARAMETER );
769	goto EXIT;
770	#endif /* HAVE_COREFOUNDATION */
771
772	EXIT:
773
774	if ( lpUsedDefaultChar != NULL )
775	{
776	*lpUsedDefaultChar = usedDefaultChar;
777	}
778
779	/ Flag the cases when WC_NO_BEST_FIT_CHARS was not specified*
780	* but we found characters that had to be replaced with default
781	* characters. Note that Windows would have attempted to find
782	* best fit characters under these conditions and that could pose
783	* a security risk.
784	*/
785	_ASSERT_MSG((dwFlags & WC_NO_BEST_FIT_CHARS) \|\| !usedDefaultChar,
786	"WideCharToMultiByte found a string which doesn't round trip: (%p)%S "
787	"and WC_NO_BEST_FIT_CHARS was not specified\n",
788	lpWideCharStr, lpWideCharStr);
789
790	LOGEXIT("WideCharToMultiByte returns INT %d\n", retval);
791	PERF_EXIT(WideCharToMultiByte);
792	return retval;
793	}
794
795	extern char * g_szCoreCLRPath;
796
797	/++*
798	Function :
799
800	PAL_BindResources - bind the resource domain to the path where the coreclr resides
801
802	Returns TRUE if it succeeded, FALSE if it failed due to OOM
803	--/*
804	BOOL
805	PALAPI
806	PAL_BindResources(IN LPCSTR lpDomain)
807	{
808	#if HAVE_LIBINTL_H
809	_ASSERTE(g_szCoreCLRPath != NULL);
810	char * coreCLRDirectoryPath;
811	PathCharString coreCLRDirectoryPathPS;
812	int len = strlen(g_szCoreCLRPath);
813	coreCLRDirectoryPath = coreCLRDirectoryPathPS.OpenStringBuffer(len);
814	if (NULL == coreCLRDirectoryPath)
815	{
816	return FALSE;
817	}
818	DWORD size = FILEGetDirectoryFromFullPathA(g_szCoreCLRPath, len, coreCLRDirectoryPath);
819	coreCLRDirectoryPathPS.CloseBuffer(size);
820
821	LPCSTR boundPath = bindtextdomain(lpDomain, coreCLRDirectoryPath);
822
823	return boundPath != NULL;
824	#else // HAVE_LIBINTL_H
825	// UNIXTODO: Implement for Unixes without libintl if necessary
826	return TRUE;
827	#endif // HAVE_LIBINTL_H
828	}
829
830	/++*
831	Function :
832
833	PAL_GetResourceString - get localized string for a specified resource.
834	The string that is passed in should be the English string, since it
835	will be returned if an appropriately localized version is not found.
836
837	Returns number of characters retrieved, 0 if it failed.
838	--/*
839	int
840	PALAPI
841	PAL_GetResourceString(
842	IN LPCSTR lpDomain,
843	IN LPCSTR lpResourceStr,
844	OUT LPWSTR lpWideCharStr,
845	IN int cchWideChar
846	)
847	{
848	#if HAVE_LIBINTL_H
849	// NOTE: dgettext returns the key if it fails to locate the appropriate
850	// resource. In our case, that will be the English string.
851	LPCSTR resourceString = dgettext(lpDomain, lpResourceStr);
852	#else // HAVE_LIBINTL_H
853	// UNIXTODO: Implement for OSX using the native localization API
854
855	// This is a temporary solution until we add the real native resource support.
856	LPCSTR resourceString = lpResourceStr;
857	#endif // HAVE_LIBINTL_H
858
859	int length = strlen(resourceString);
860	return UTF8ToUnicode(lpResourceStr, length + `1`, lpWideCharStr, cchWideChar, `0`);
861	}
862

Definitions

CP_TO_NATIVE_TABLE
PAL_ACP
UnicodeDataComp
GetUnicodeData
CODEPAGEGetData
CharNextA
CharNextExA
GetConsoleOutputCP
IsValidCodePage
GetCPInfo
GetACP
IsDBCSLeadByteEx
IsDBCSLeadByte
MultiByteToWideChar
WideCharToMultiByte
PAL_BindResources

Browse the source code of CoreCLR/pal/src/locale/unicode.cpp

Definitions