loclikely.cpp source code [engine/third_party/icu/source/common/loclikely.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	*
6	* Copyright (C) 1997-2016, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	*******************************************************************************
10	* file name: loclikely.cpp
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2010feb25
16	* created by: Markus W. Scherer
17	*
18	* Code for likely and minimized locale subtags, separated out from other .cpp files
19	* that then do not depend on resource bundle code and likely-subtags data.
20	*/
21
22	#include "unicode/bytestream.h"
23	#include "unicode/utypes.h"
24	#include "unicode/locid.h"
25	#include "unicode/putil.h"
26	#include "unicode/uchar.h"
27	#include "unicode/uloc.h"
28	#include "unicode/ures.h"
29	#include "unicode/uscript.h"
30	#include "bytesinkutil.h"
31	#include "charstr.h"
32	#include "cmemory.h"
33	#include "cstring.h"
34	#include "ulocimp.h"
35	#include "ustr_imp.h"
36
37	/**
38	* These are the canonical strings for unknown languages, scripts and regions.
39	**/
40	static const char* const unknownLanguage = "und";
41	static const char* const unknownScript = "Zzzz";
42	static const char* const unknownRegion = "ZZ";
43
44	/**
45	* This function looks for the localeID in the likelySubtags resource.
46	*
47	* @param localeID The tag to find.
48	* @param buffer A buffer to hold the matching entry
49	* @param bufferLength The length of the output buffer
50	* @return A pointer to "buffer" if found, or a null pointer if not.
51	*/
52	static const char* U_CALLCONV
53	findLikelySubtags(const char* localeID,
54	char* buffer,
55	int32_t bufferLength,
56	UErrorCode* err) {
57	const char* result = NULL;
58
59	if (!U_FAILURE(*err)) {
60	int32_t resLen = `0`;
61	const UChar* s = NULL;
62	UErrorCode tmpErr = U_ZERO_ERROR;
63	icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
64	if (U_SUCCESS(tmpErr)) {
65	icu::CharString und;
66	if (localeID != NULL) {
67	if (*localeID == `'\0'`) {
68	localeID = unknownLanguage;
69	} else if (*localeID == `'_'`) {
70	und.append(unknownLanguage, *err);
71	und.append(localeID, *err);
72	if (U_FAILURE(*err)) {
73	return NULL;
74	}
75	localeID = und.data();
76	}
77	}
78	s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
79
80	if (U_FAILURE(tmpErr)) {
81	/*
82	* If a resource is missing, it's not really an error, it's
83	* just that we don't have any data for that particular locale ID.
84	*/
85	if (tmpErr != U_MISSING_RESOURCE_ERROR) {
86	*err = tmpErr;
87	}
88	}
89	else if (resLen >= bufferLength) {
90	/ The buffer should never overflow. /
91	*err = U_INTERNAL_PROGRAM_ERROR;
92	}
93	else {
94	u_UCharsToChars(s, buffer, resLen + `1`);
95	if (resLen >= `3` &&
96	uprv_strnicmp(buffer, unknownLanguage, `3`) == `0` &&
97	(resLen == `3` \|\| buffer[`3`] == `'_'`)) {
98	uprv_memmove(buffer, buffer + `3`, resLen - `3` + `1`);
99	}
100	result = buffer;
101	}
102	} else {
103	*err = tmpErr;
104	}
105	}
106
107	return result;
108	}
109
110	/**
111	* Append a tag to a buffer, adding the separator if necessary. The buffer
112	* must be large enough to contain the resulting tag plus any separator
113	* necessary. The tag must not be a zero-length string.
114	*
115	* @param tag The tag to add.
116	* @param tagLength The length of the tag.
117	* @param buffer The output buffer.
118	* @param bufferLength The length of the output buffer. This is an input/ouput parameter.
119	**/
120	static void U_CALLCONV
121	appendTag(
122	const char* tag,
123	int32_t tagLength,
124	char* buffer,
125	int32_t* bufferLength,
126	UBool withSeparator) {
127
128	if (withSeparator) {
129	buffer[*bufferLength] = `'_'`;
130	++(*bufferLength);
131	}
132
133	uprv_memmove(
134	&buffer[*bufferLength],
135	tag,
136	tagLength);
137
138	*bufferLength += tagLength;
139	}
140
141	/**
142	* Create a tag string from the supplied parameters. The lang, script and region
143	* parameters may be NULL pointers. If they are, their corresponding length parameters
144	* must be less than or equal to 0.
145	*
146	* If any of the language, script or region parameters are empty, and the alternateTags
147	* parameter is not NULL, it will be parsed for potential language, script and region tags
148	* to be used when constructing the new tag. If the alternateTags parameter is NULL, or
149	* it contains no language tag, the default tag for the unknown language is used.
150	*
151	* If the length of the new string exceeds the capacity of the output buffer,
152	* the function copies as many bytes to the output buffer as it can, and returns
153	* the error U_BUFFER_OVERFLOW_ERROR.
154	*
155	* If an illegal argument is provided, the function returns the error
156	* U_ILLEGAL_ARGUMENT_ERROR.
157	*
158	* Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
159	* the tag string fits in the output buffer, but the null terminator doesn't.
160	*
161	* @param lang The language tag to use.
162	* @param langLength The length of the language tag.
163	* @param script The script tag to use.
164	* @param scriptLength The length of the script tag.
165	* @param region The region tag to use.
166	* @param regionLength The length of the region tag.
167	* @param trailing Any trailing data to append to the new tag.
168	* @param trailingLength The length of the trailing data.
169	* @param alternateTags A string containing any alternate tags.
170	* @param sink The output sink receiving the tag string.
171	* @param err A pointer to a UErrorCode for error reporting.
172	**/
173	static void U_CALLCONV
174	createTagStringWithAlternates(
175	const char* lang,
176	int32_t langLength,
177	const char* script,
178	int32_t scriptLength,
179	const char* region,
180	int32_t regionLength,
181	const char* trailing,
182	int32_t trailingLength,
183	const char* alternateTags,
184	icu::ByteSink& sink,
185	UErrorCode* err) {
186
187	if (U_FAILURE(*err)) {
188	goto error;
189	}
190	else if (langLength >= ULOC_LANG_CAPACITY \|\|
191	scriptLength >= ULOC_SCRIPT_CAPACITY \|\|
192	regionLength >= ULOC_COUNTRY_CAPACITY) {
193	goto error;
194	}
195	else {
196	/**
197	* ULOC_FULLNAME_CAPACITY will provide enough capacity
198	* that we can build a string that contains the language,
199	* script and region code without worrying about overrunning
200	* the user-supplied buffer.
201	**/
202	char tagBuffer[ULOC_FULLNAME_CAPACITY];
203	int32_t tagLength = `0`;
204	UBool regionAppended = FALSE;
205
206	if (langLength > `0`) {
207	appendTag(
208	lang,
209	langLength,
210	tagBuffer,
211	&tagLength,
212	/withSeparator=/FALSE);
213	}
214	else if (alternateTags == NULL) {
215	/*
216	* Use the empty string for an unknown language, if
217	* we found no language.
218	*/
219	}
220	else {
221	/*
222	* Parse the alternateTags string for the language.
223	*/
224	char alternateLang[ULOC_LANG_CAPACITY];
225	int32_t alternateLangLength = sizeof(alternateLang);
226
227	alternateLangLength =
228	uloc_getLanguage(
229	alternateTags,
230	alternateLang,
231	alternateLangLength,
232	err);
233	if(U_FAILURE(*err) \|\|
234	alternateLangLength >= ULOC_LANG_CAPACITY) {
235	goto error;
236	}
237	else if (alternateLangLength == `0`) {
238	/*
239	* Use the empty string for an unknown language, if
240	* we found no language.
241	*/
242	}
243	else {
244	appendTag(
245	alternateLang,
246	alternateLangLength,
247	tagBuffer,
248	&tagLength,
249	/withSeparator=/FALSE);
250	}
251	}
252
253	if (scriptLength > `0`) {
254	appendTag(
255	script,
256	scriptLength,
257	tagBuffer,
258	&tagLength,
259	/withSeparator=/TRUE);
260	}
261	else if (alternateTags != NULL) {
262	/*
263	* Parse the alternateTags string for the script.
264	*/
265	char alternateScript[ULOC_SCRIPT_CAPACITY];
266
267	const int32_t alternateScriptLength =
268	uloc_getScript(
269	alternateTags,
270	alternateScript,
271	sizeof(alternateScript),
272	err);
273
274	if (U_FAILURE(*err) \|\|
275	alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
276	goto error;
277	}
278	else if (alternateScriptLength > `0`) {
279	appendTag(
280	alternateScript,
281	alternateScriptLength,
282	tagBuffer,
283	&tagLength,
284	/withSeparator=/TRUE);
285	}
286	}
287
288	if (regionLength > `0`) {
289	appendTag(
290	region,
291	regionLength,
292	tagBuffer,
293	&tagLength,
294	/withSeparator=/TRUE);
295
296	regionAppended = TRUE;
297	}
298	else if (alternateTags != NULL) {
299	/*
300	* Parse the alternateTags string for the region.
301	*/
302	char alternateRegion[ULOC_COUNTRY_CAPACITY];
303
304	const int32_t alternateRegionLength =
305	uloc_getCountry(
306	alternateTags,
307	alternateRegion,
308	sizeof(alternateRegion),
309	err);
310	if (U_FAILURE(*err) \|\|
311	alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
312	goto error;
313	}
314	else if (alternateRegionLength > `0`) {
315	appendTag(
316	alternateRegion,
317	alternateRegionLength,
318	tagBuffer,
319	&tagLength,
320	/withSeparator=/TRUE);
321
322	regionAppended = TRUE;
323	}
324	}
325
326	/**
327	* Copy the partial tag from our internal buffer to the supplied
328	* target.
329	**/
330	sink.Append(tagBuffer, tagLength);
331
332	if (trailingLength > `0`) {
333	if (*trailing != `'@'`) {
334	sink.Append("_", `1`);
335	if (!regionAppended) {
336	/ extra separator is required /
337	sink.Append("_", `1`);
338	}
339	}
340
341	/*
342	* Copy the trailing data into the supplied buffer.
343	*/
344	sink.Append(trailing, trailingLength);
345	}
346
347	return;
348	}
349
350	error:
351
352	/**
353	* An overflow indicates the locale ID passed in
354	* is ill-formed. If we got here, and there was
355	* no previous error, it's an implicit overflow.
356	**/
357	if (*err == U_BUFFER_OVERFLOW_ERROR \|\|
358	U_SUCCESS(*err)) {
359	*err = U_ILLEGAL_ARGUMENT_ERROR;
360	}
361	}
362
363	/**
364	* Create a tag string from the supplied parameters. The lang, script and region
365	* parameters may be NULL pointers. If they are, their corresponding length parameters
366	* must be less than or equal to 0. If the lang parameter is an empty string, the
367	* default value for an unknown language is written to the output buffer.
368	*
369	* If the length of the new string exceeds the capacity of the output buffer,
370	* the function copies as many bytes to the output buffer as it can, and returns
371	* the error U_BUFFER_OVERFLOW_ERROR.
372	*
373	* If an illegal argument is provided, the function returns the error
374	* U_ILLEGAL_ARGUMENT_ERROR.
375	*
376	* @param lang The language tag to use.
377	* @param langLength The length of the language tag.
378	* @param script The script tag to use.
379	* @param scriptLength The length of the script tag.
380	* @param region The region tag to use.
381	* @param regionLength The length of the region tag.
382	* @param trailing Any trailing data to append to the new tag.
383	* @param trailingLength The length of the trailing data.
384	* @param sink The output sink receiving the tag string.
385	* @param err A pointer to a UErrorCode for error reporting.
386	**/
387	static void U_CALLCONV
388	createTagString(
389	const char* lang,
390	int32_t langLength,
391	const char* script,
392	int32_t scriptLength,
393	const char* region,
394	int32_t regionLength,
395	const char* trailing,
396	int32_t trailingLength,
397	icu::ByteSink& sink,
398	UErrorCode* err)
399	{
400	createTagStringWithAlternates(
401	lang,
402	langLength,
403	script,
404	scriptLength,
405	region,
406	regionLength,
407	trailing,
408	trailingLength,
409	NULL,
410	sink,
411	err);
412	}
413
414	/**
415	* Parse the language, script, and region subtags from a tag string, and copy the
416	* results into the corresponding output parameters. The buffers are null-terminated,
417	* unless overflow occurs.
418	*
419	* The langLength, scriptLength, and regionLength parameters are input/output
420	* parameters, and must contain the capacity of their corresponding buffers on
421	* input. On output, they will contain the actual length of the buffers, not
422	* including the null terminator.
423	*
424	* If the length of any of the output subtags exceeds the capacity of the corresponding
425	* buffer, the function copies as many bytes to the output buffer as it can, and returns
426	* the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
427	* occurs.
428	*
429	* If an illegal argument is provided, the function returns the error
430	* U_ILLEGAL_ARGUMENT_ERROR.
431	*
432	* @param localeID The locale ID to parse.
433	* @param lang The language tag buffer.
434	* @param langLength The length of the language tag.
435	* @param script The script tag buffer.
436	* @param scriptLength The length of the script tag.
437	* @param region The region tag buffer.
438	* @param regionLength The length of the region tag.
439	* @param err A pointer to a UErrorCode for error reporting.
440	* @return The number of chars of the localeID parameter consumed.
441	**/
442	static int32_t U_CALLCONV
443	parseTagString(
444	const char* localeID,
445	char* lang,
446	int32_t* langLength,
447	char* script,
448	int32_t* scriptLength,
449	char* region,
450	int32_t* regionLength,
451	UErrorCode* err)
452	{
453	const char* position = localeID;
454	int32_t subtagLength = `0`;
455
456	if(U_FAILURE(*err) \|\|
457	localeID == NULL \|\|
458	lang == NULL \|\|
459	langLength == NULL \|\|
460	script == NULL \|\|
461	scriptLength == NULL \|\|
462	region == NULL \|\|
463	regionLength == NULL) {
464	goto error;
465	}
466
467	subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
468	u_terminateChars(lang, *langLength, subtagLength, err);
469
470	/*
471	* Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
472	* to be an error, because it indicates the user-supplied tag is
473	* not well-formed.
474	*/
475	if(U_FAILURE(*err)) {
476	goto error;
477	}
478
479	*langLength = subtagLength;
480
481	/*
482	* If no language was present, use the empty string instead.
483	* Otherwise, move past any separator.
484	*/
485	if (_isIDSeparator(*position)) {
486	++position;
487	}
488
489	subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
490	u_terminateChars(script, *scriptLength, subtagLength, err);
491
492	if(U_FAILURE(*err)) {
493	goto error;
494	}
495
496	*scriptLength = subtagLength;
497
498	if (*scriptLength > `0`) {
499	if (uprv_strnicmp(script, unknownScript, *scriptLength) == `0`) {
500	/**
501	* If the script part is the "unknown" script, then don't return it.
502	**/
503	*scriptLength = `0`;
504	}
505
506	/*
507	* Move past any separator.
508	*/
509	if (_isIDSeparator(*position)) {
510	++position;
511	}
512	}
513
514	subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
515	u_terminateChars(region, *regionLength, subtagLength, err);
516
517	if(U_FAILURE(*err)) {
518	goto error;
519	}
520
521	*regionLength = subtagLength;
522
523	if (*regionLength > `0`) {
524	if (uprv_strnicmp(region, unknownRegion, *regionLength) == `0`) {
525	/**
526	* If the region part is the "unknown" region, then don't return it.
527	**/
528	*regionLength = `0`;
529	}
530	} else if (position != `0` && position != `'@'`) {
531	/ back up over consumed trailing separator /
532	--position;
533	}
534
535	exit:
536
537	return (int32_t)(position - localeID);
538
539	error:
540
541	/**
542	* If we get here, we have no explicit error, it's the result of an
543	* illegal argument.
544	**/
545	if (!U_FAILURE(*err)) {
546	*err = U_ILLEGAL_ARGUMENT_ERROR;
547	}
548
549	goto exit;
550	}
551
552	static UBool U_CALLCONV
553	createLikelySubtagsString(
554	const char* lang,
555	int32_t langLength,
556	const char* script,
557	int32_t scriptLength,
558	const char* region,
559	int32_t regionLength,
560	const char* variants,
561	int32_t variantsLength,
562	icu::ByteSink& sink,
563	UErrorCode* err) {
564	/**
565	* ULOC_FULLNAME_CAPACITY will provide enough capacity
566	* that we can build a string that contains the language,
567	* script and region code without worrying about overrunning
568	* the user-supplied buffer.
569	**/
570	char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
571
572	if(U_FAILURE(*err)) {
573	goto error;
574	}
575
576	/**
577	* Try the language with the script and region first.
578	**/
579	if (scriptLength > `0` && regionLength > `0`) {
580
581	const char* likelySubtags = NULL;
582
583	icu::CharString tagBuffer;
584	{
585	icu::CharStringByteSink sink(&tagBuffer);
586	createTagString(
587	lang,
588	langLength,
589	script,
590	scriptLength,
591	region,
592	regionLength,
593	NULL,
594	`0`,
595	sink,
596	err);
597	}
598	if(U_FAILURE(*err)) {
599	goto error;
600	}
601
602	likelySubtags =
603	findLikelySubtags(
604	tagBuffer.data(),
605	likelySubtagsBuffer,
606	sizeof(likelySubtagsBuffer),
607	err);
608	if(U_FAILURE(*err)) {
609	goto error;
610	}
611
612	if (likelySubtags != NULL) {
613	/ Always use the language tag from the*
614	maximal string, since it may be more
615	specific than the one provided. /*
616	createTagStringWithAlternates(
617	NULL,
618	`0`,
619	NULL,
620	`0`,
621	NULL,
622	`0`,
623	variants,
624	variantsLength,
625	likelySubtags,
626	sink,
627	err);
628	return TRUE;
629	}
630	}
631
632	/**
633	* Try the language with just the script.
634	**/
635	if (scriptLength > `0`) {
636
637	const char* likelySubtags = NULL;
638
639	icu::CharString tagBuffer;
640	{
641	icu::CharStringByteSink sink(&tagBuffer);
642	createTagString(
643	lang,
644	langLength,
645	script,
646	scriptLength,
647	NULL,
648	`0`,
649	NULL,
650	`0`,
651	sink,
652	err);
653	}
654	if(U_FAILURE(*err)) {
655	goto error;
656	}
657
658	likelySubtags =
659	findLikelySubtags(
660	tagBuffer.data(),
661	likelySubtagsBuffer,
662	sizeof(likelySubtagsBuffer),
663	err);
664	if(U_FAILURE(*err)) {
665	goto error;
666	}
667
668	if (likelySubtags != NULL) {
669	/ Always use the language tag from the*
670	maximal string, since it may be more
671	specific than the one provided. /*
672	createTagStringWithAlternates(
673	NULL,
674	`0`,
675	NULL,
676	`0`,
677	region,
678	regionLength,
679	variants,
680	variantsLength,
681	likelySubtags,
682	sink,
683	err);
684	return TRUE;
685	}
686	}
687
688	/**
689	* Try the language with just the region.
690	**/
691	if (regionLength > `0`) {
692
693	const char* likelySubtags = NULL;
694
695	icu::CharString tagBuffer;
696	{
697	icu::CharStringByteSink sink(&tagBuffer);
698	createTagString(
699	lang,
700	langLength,
701	NULL,
702	`0`,
703	region,
704	regionLength,
705	NULL,
706	`0`,
707	sink,
708	err);
709	}
710	if(U_FAILURE(*err)) {
711	goto error;
712	}
713
714	likelySubtags =
715	findLikelySubtags(
716	tagBuffer.data(),
717	likelySubtagsBuffer,
718	sizeof(likelySubtagsBuffer),
719	err);
720	if(U_FAILURE(*err)) {
721	goto error;
722	}
723
724	if (likelySubtags != NULL) {
725	/ Always use the language tag from the*
726	maximal string, since it may be more
727	specific than the one provided. /*
728	createTagStringWithAlternates(
729	NULL,
730	`0`,
731	script,
732	scriptLength,
733	NULL,
734	`0`,
735	variants,
736	variantsLength,
737	likelySubtags,
738	sink,
739	err);
740	return TRUE;
741	}
742	}
743
744	/**
745	* Finally, try just the language.
746	**/
747	{
748	const char* likelySubtags = NULL;
749
750	icu::CharString tagBuffer;
751	{
752	icu::CharStringByteSink sink(&tagBuffer);
753	createTagString(
754	lang,
755	langLength,
756	NULL,
757	`0`,
758	NULL,
759	`0`,
760	NULL,
761	`0`,
762	sink,
763	err);
764	}
765	if(U_FAILURE(*err)) {
766	goto error;
767	}
768
769	likelySubtags =
770	findLikelySubtags(
771	tagBuffer.data(),
772	likelySubtagsBuffer,
773	sizeof(likelySubtagsBuffer),
774	err);
775	if(U_FAILURE(*err)) {
776	goto error;
777	}
778
779	if (likelySubtags != NULL) {
780	/ Always use the language tag from the*
781	maximal string, since it may be more
782	specific than the one provided. /*
783	createTagStringWithAlternates(
784	NULL,
785	`0`,
786	script,
787	scriptLength,
788	region,
789	regionLength,
790	variants,
791	variantsLength,
792	likelySubtags,
793	sink,
794	err);
795	return TRUE;
796	}
797	}
798
799	return FALSE;
800
801	error:
802
803	if (!U_FAILURE(*err)) {
804	*err = U_ILLEGAL_ARGUMENT_ERROR;
805	}
806
807	return FALSE;
808	}
809
810	#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
811	int32_t count = 0; \
812	int32_t i; \
813	for (i = 0; i < trailingLength; i++) { \
814	if (trailing[i] == '-' \|\| trailing[i] == '_') { \
815	count = 0; \
816	if (count > 8) { \
817	goto error; \
818	} \
819	} else if (trailing[i] == '@') { \
820	break; \
821	} else if (count > 8) { \
822	goto error; \
823	} else { \
824	count++; \
825	} \
826	} \
827	} UPRV_BLOCK_MACRO_END
828
829	static UBool
830	_uloc_addLikelySubtags(const char* localeID,
831	icu::ByteSink& sink,
832	UErrorCode* err) {
833	char lang[ULOC_LANG_CAPACITY];
834	int32_t langLength = sizeof(lang);
835	char script[ULOC_SCRIPT_CAPACITY];
836	int32_t scriptLength = sizeof(script);
837	char region[ULOC_COUNTRY_CAPACITY];
838	int32_t regionLength = sizeof(region);
839	const char* trailing = "";
840	int32_t trailingLength = `0`;
841	int32_t trailingIndex = `0`;
842	UBool success = FALSE;
843
844	if(U_FAILURE(*err)) {
845	goto error;
846	}
847	if (localeID == NULL) {
848	goto error;
849	}
850
851	trailingIndex = parseTagString(
852	localeID,
853	lang,
854	&langLength,
855	script,
856	&scriptLength,
857	region,
858	&regionLength,
859	err);
860	if(U_FAILURE(*err)) {
861	/ Overflow indicates an illegal argument error /
862	if (*err == U_BUFFER_OVERFLOW_ERROR) {
863	*err = U_ILLEGAL_ARGUMENT_ERROR;
864	}
865
866	goto error;
867	}
868
869	/ Find the length of the trailing portion. /
870	while (_isIDSeparator(localeID[trailingIndex])) {
871	trailingIndex++;
872	}
873	trailing = &localeID[trailingIndex];
874	trailingLength = (int32_t)uprv_strlen(trailing);
875
876	CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
877
878	success =
879	createLikelySubtagsString(
880	lang,
881	langLength,
882	script,
883	scriptLength,
884	region,
885	regionLength,
886	trailing,
887	trailingLength,
888	sink,
889	err);
890
891	if (!success) {
892	const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
893
894	/*
895	* If we get here, we need to return localeID.
896	*/
897	sink.Append(localeID, localIDLength);
898	}
899
900	return success;
901
902	error:
903
904	if (!U_FAILURE(*err)) {
905	*err = U_ILLEGAL_ARGUMENT_ERROR;
906	}
907	return FALSE;
908	}
909
910	// Add likely subtags to the sink
911	// return true if the value in the sink is produced by a match during the lookup
912	// return false if the value in the sink is the same as input because there are
913	// no match after the lookup.
914	static UBool _ulocimp_addLikelySubtags(const char, icu::ByteSink&, UErrorCode);
915
916	static void
917	_uloc_minimizeSubtags(const char* localeID,
918	icu::ByteSink& sink,
919	UErrorCode* err) {
920	icu::CharString maximizedTagBuffer;
921
922	char lang[ULOC_LANG_CAPACITY];
923	int32_t langLength = sizeof(lang);
924	char script[ULOC_SCRIPT_CAPACITY];
925	int32_t scriptLength = sizeof(script);
926	char region[ULOC_COUNTRY_CAPACITY];
927	int32_t regionLength = sizeof(region);
928	const char* trailing = "";
929	int32_t trailingLength = `0`;
930	int32_t trailingIndex = `0`;
931	UBool successGetMax = FALSE;
932
933	if(U_FAILURE(*err)) {
934	goto error;
935	}
936	else if (localeID == NULL) {
937	goto error;
938	}
939
940	trailingIndex =
941	parseTagString(
942	localeID,
943	lang,
944	&langLength,
945	script,
946	&scriptLength,
947	region,
948	&regionLength,
949	err);
950	if(U_FAILURE(*err)) {
951
952	/ Overflow indicates an illegal argument error /
953	if (*err == U_BUFFER_OVERFLOW_ERROR) {
954	*err = U_ILLEGAL_ARGUMENT_ERROR;
955	}
956
957	goto error;
958	}
959
960	/ Find the spot where the variants or the keywords begin, if any. /
961	while (_isIDSeparator(localeID[trailingIndex])) {
962	trailingIndex++;
963	}
964	trailing = &localeID[trailingIndex];
965	trailingLength = (int32_t)uprv_strlen(trailing);
966
967	CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
968
969	{
970	icu::CharString base;
971	{
972	icu::CharStringByteSink baseSink(&base);
973	createTagString(
974	lang,
975	langLength,
976	script,
977	scriptLength,
978	region,
979	regionLength,
980	NULL,
981	`0`,
982	baseSink,
983	err);
984	}
985
986	/**
987	* First, we need to first get the maximization
988	* from AddLikelySubtags.
989	**/
990	{
991	icu::CharStringByteSink maxSink(&maximizedTagBuffer);
992	successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
993	}
994	}
995
996	if(U_FAILURE(*err)) {
997	goto error;
998	}
999
1000	if (!successGetMax) {
1001	/**
1002	* If we got here, return the locale ID parameter unchanged.
1003	**/
1004	const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1005	sink.Append(localeID, localeIDLength);
1006	return;
1007	}
1008
1009	// In the following, the lang, script, region are referring to those in
1010	// the maximizedTagBuffer, not the one in the localeID.
1011	langLength = sizeof(lang);
1012	scriptLength = sizeof(script);
1013	regionLength = sizeof(region);
1014	parseTagString(
1015	maximizedTagBuffer.data(),
1016	lang,
1017	&langLength,
1018	script,
1019	&scriptLength,
1020	region,
1021	&regionLength,
1022	err);
1023	if(U_FAILURE(*err)) {
1024	goto error;
1025	}
1026
1027	/**
1028	* Start first with just the language.
1029	**/
1030	{
1031	icu::CharString tagBuffer;
1032	{
1033	icu::CharStringByteSink tagSink(&tagBuffer);
1034	createLikelySubtagsString(
1035	lang,
1036	langLength,
1037	NULL,
1038	`0`,
1039	NULL,
1040	`0`,
1041	NULL,
1042	`0`,
1043	tagSink,
1044	err);
1045	}
1046
1047	if(U_FAILURE(*err)) {
1048	goto error;
1049	}
1050	else if (!tagBuffer.isEmpty() &&
1051	uprv_strnicmp(
1052	maximizedTagBuffer.data(),
1053	tagBuffer.data(),
1054	tagBuffer.length()) == `0`) {
1055
1056	createTagString(
1057	lang,
1058	langLength,
1059	NULL,
1060	`0`,
1061	NULL,
1062	`0`,
1063	trailing,
1064	trailingLength,
1065	sink,
1066	err);
1067	return;
1068	}
1069	}
1070
1071	/**
1072	* Next, try the language and region.
1073	**/
1074	if (regionLength > `0`) {
1075
1076	icu::CharString tagBuffer;
1077	{
1078	icu::CharStringByteSink tagSink(&tagBuffer);
1079	createLikelySubtagsString(
1080	lang,
1081	langLength,
1082	NULL,
1083	`0`,
1084	region,
1085	regionLength,
1086	NULL,
1087	`0`,
1088	tagSink,
1089	err);
1090	}
1091
1092	if(U_FAILURE(*err)) {
1093	goto error;
1094	}
1095	else if (!tagBuffer.isEmpty() &&
1096	uprv_strnicmp(
1097	maximizedTagBuffer.data(),
1098	tagBuffer.data(),
1099	tagBuffer.length()) == `0`) {
1100
1101	createTagString(
1102	lang,
1103	langLength,
1104	NULL,
1105	`0`,
1106	region,
1107	regionLength,
1108	trailing,
1109	trailingLength,
1110	sink,
1111	err);
1112	return;
1113	}
1114	}
1115
1116	/**
1117	* Finally, try the language and script. This is our last chance,
1118	* since trying with all three subtags would only yield the
1119	* maximal version that we already have.
1120	**/
1121	if (scriptLength > `0`) {
1122	icu::CharString tagBuffer;
1123	{
1124	icu::CharStringByteSink tagSink(&tagBuffer);
1125	createLikelySubtagsString(
1126	lang,
1127	langLength,
1128	script,
1129	scriptLength,
1130	NULL,
1131	`0`,
1132	NULL,
1133	`0`,
1134	tagSink,
1135	err);
1136	}
1137
1138	if(U_FAILURE(*err)) {
1139	goto error;
1140	}
1141	else if (!tagBuffer.isEmpty() &&
1142	uprv_strnicmp(
1143	maximizedTagBuffer.data(),
1144	tagBuffer.data(),
1145	tagBuffer.length()) == `0`) {
1146
1147	createTagString(
1148	lang,
1149	langLength,
1150	script,
1151	scriptLength,
1152	NULL,
1153	`0`,
1154	trailing,
1155	trailingLength,
1156	sink,
1157	err);
1158	return;
1159	}
1160	}
1161
1162	{
1163	/**
1164	* If we got here, return the max + trail.
1165	**/
1166	createTagString(
1167	lang,
1168	langLength,
1169	script,
1170	scriptLength,
1171	region,
1172	regionLength,
1173	trailing,
1174	trailingLength,
1175	sink,
1176	err);
1177	return;
1178	}
1179
1180	error:
1181
1182	if (!U_FAILURE(*err)) {
1183	*err = U_ILLEGAL_ARGUMENT_ERROR;
1184	}
1185	}
1186
1187	static UBool
1188	do_canonicalize(const char* localeID,
1189	char* buffer,
1190	int32_t bufferCapacity,
1191	UErrorCode* err)
1192	{
1193	uloc_canonicalize(
1194	localeID,
1195	buffer,
1196	bufferCapacity,
1197	err);
1198
1199	if (*err == U_STRING_NOT_TERMINATED_WARNING \|\|
1200	*err == U_BUFFER_OVERFLOW_ERROR) {
1201	*err = U_ILLEGAL_ARGUMENT_ERROR;
1202
1203	return FALSE;
1204	}
1205	else if (U_FAILURE(*err)) {
1206
1207	return FALSE;
1208	}
1209	else {
1210	return TRUE;
1211	}
1212	}
1213
1214	U_CAPI int32_t U_EXPORT2
1215	uloc_addLikelySubtags(const char* localeID,
1216	char* maximizedLocaleID,
1217	int32_t maximizedLocaleIDCapacity,
1218	UErrorCode* status) {
1219	if (U_FAILURE(*status)) {
1220	return `0`;
1221	}
1222
1223	icu::CheckedArrayByteSink sink(
1224	maximizedLocaleID, maximizedLocaleIDCapacity);
1225
1226	ulocimp_addLikelySubtags(localeID, sink, status);
1227	int32_t reslen = sink.NumberOfBytesAppended();
1228
1229	if (U_FAILURE(*status)) {
1230	return sink.Overflowed() ? reslen : -`1`;
1231	}
1232
1233	if (sink.Overflowed()) {
1234	*status = U_BUFFER_OVERFLOW_ERROR;
1235	} else {
1236	u_terminateChars(
1237	maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
1238	}
1239
1240	return reslen;
1241	}
1242
1243	static UBool
1244	_ulocimp_addLikelySubtags(const char* localeID,
1245	icu::ByteSink& sink,
1246	UErrorCode* status) {
1247	char localeBuffer[ULOC_FULLNAME_CAPACITY];
1248
1249	if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1250	return _uloc_addLikelySubtags(localeBuffer, sink, status);
1251	}
1252	return FALSE;
1253	}
1254
1255	U_CAPI void U_EXPORT2
1256	ulocimp_addLikelySubtags(const char* localeID,
1257	icu::ByteSink& sink,
1258	UErrorCode* status) {
1259	_ulocimp_addLikelySubtags(localeID, sink, status);
1260	}
1261
1262	U_CAPI int32_t U_EXPORT2
1263	uloc_minimizeSubtags(const char* localeID,
1264	char* minimizedLocaleID,
1265	int32_t minimizedLocaleIDCapacity,
1266	UErrorCode* status) {
1267	if (U_FAILURE(*status)) {
1268	return `0`;
1269	}
1270
1271	icu::CheckedArrayByteSink sink(
1272	minimizedLocaleID, minimizedLocaleIDCapacity);
1273
1274	ulocimp_minimizeSubtags(localeID, sink, status);
1275	int32_t reslen = sink.NumberOfBytesAppended();
1276
1277	if (U_FAILURE(*status)) {
1278	return sink.Overflowed() ? reslen : -`1`;
1279	}
1280
1281	if (sink.Overflowed()) {
1282	*status = U_BUFFER_OVERFLOW_ERROR;
1283	} else {
1284	u_terminateChars(
1285	minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
1286	}
1287
1288	return reslen;
1289	}
1290
1291	U_CAPI void U_EXPORT2
1292	ulocimp_minimizeSubtags(const char* localeID,
1293	icu::ByteSink& sink,
1294	UErrorCode* status) {
1295	char localeBuffer[ULOC_FULLNAME_CAPACITY];
1296
1297	if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1298	_uloc_minimizeSubtags(localeBuffer, sink, status);
1299	}
1300	}
1301
1302	// Pairs of (language subtag, + or -) for finding out fast if common languages
1303	// are LTR (minus) or RTL (plus).
1304	static const char LANG_DIR_STRING[] =
1305	"root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1306
1307	// Implemented here because this calls ulocimp_addLikelySubtags().
1308	U_CAPI UBool U_EXPORT2
1309	uloc_isRightToLeft(const char *locale) {
1310	UErrorCode errorCode = U_ZERO_ERROR;
1311	char script[`8`];
1312	int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1313	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING \|\|
1314	scriptLength == `0`) {
1315	// Fastpath: We know the likely scripts and their writing direction
1316	// for some common languages.
1317	errorCode = U_ZERO_ERROR;
1318	char lang[`8`];
1319	int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1320	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1321	return FALSE;
1322	}
1323	if (langLength > `0`) {
1324	const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1325	if (langPtr != NULL) {
1326	switch (langPtr[langLength]) {
1327	case `'-'`: return FALSE;
1328	case `'+'`: return TRUE;
1329	default: break; // partial match of a longer code
1330	}
1331	}
1332	}
1333	// Otherwise, find the likely script.
1334	errorCode = U_ZERO_ERROR;
1335	icu::CharString likely;
1336	{
1337	icu::CharStringByteSink sink(&likely);
1338	ulocimp_addLikelySubtags(locale, sink, &errorCode);
1339	}
1340	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1341	return FALSE;
1342	}
1343	scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
1344	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING \|\|
1345	scriptLength == `0`) {
1346	return FALSE;
1347	}
1348	}
1349	UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1350	return uscript_isRightToLeft(scriptCode);
1351	}
1352
1353	U_NAMESPACE_BEGIN
1354
1355	UBool
1356	Locale::isRightToLeft() const {
1357	return uloc_isRightToLeft(getBaseName());
1358	}
1359
1360	U_NAMESPACE_END
1361
1362	// The following must at least allow for rg key value (6) plus terminator (1).
1363	#define ULOC_RG_BUFLEN 8
1364
1365	U_CAPI int32_t U_EXPORT2
1366	ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1367	char region, int32_t regionCapacity, UErrorCode status) {
1368	if (U_FAILURE(*status)) {
1369	return `0`;
1370	}
1371	char rgBuf[ULOC_RG_BUFLEN];
1372	UErrorCode rgStatus = U_ZERO_ERROR;
1373
1374	// First check for rg keyword value
1375	int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1376	if (U_FAILURE(rgStatus) \|\| rgLen != `6`) {
1377	rgLen = `0`;
1378	} else {
1379	// rgBuf guaranteed to be zero terminated here, with text len 6
1380	char *rgPtr = rgBuf;
1381	for (; *rgPtr!= `0`; rgPtr++) {
1382	rgPtr = uprv_toupper(rgPtr);
1383	}
1384	rgLen = (uprv_strcmp(rgBuf+`2`, "ZZZZ") == `0`)? `2`: `0`;
1385	}
1386
1387	if (rgLen == `0`) {
1388	// No valid rg keyword value, try for unicode_region_subtag
1389	rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1390	if (U_FAILURE(*status)) {
1391	rgLen = `0`;
1392	} else if (rgLen == `0` && inferRegion) {
1393	// no unicode_region_subtag but inferRegion TRUE, try likely subtags
1394	rgStatus = U_ZERO_ERROR;
1395	icu::CharString locBuf;
1396	{
1397	icu::CharStringByteSink sink(&locBuf);
1398	ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
1399	}
1400	if (U_SUCCESS(rgStatus)) {
1401	rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
1402	if (U_FAILURE(*status)) {
1403	rgLen = `0`;
1404	}
1405	}
1406	}
1407	}
1408
1409	rgBuf[rgLen] = `0`;
1410	uprv_strncpy(region, rgBuf, regionCapacity);
1411	return u_terminateChars(region, regionCapacity, rgLen, status);
1412	}
1413
1414

Browse the source code of engine/third_party/icu/source/common/loclikely.cpp