loclikely.cpp source code [Godot/thirdparty/icu4c/common/loclikely.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	*
6	* Copyright (C) 1997-2016, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	*******************************************************************************
10	* file name: loclikely.cpp
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2010feb25
16	* created by: Markus W. Scherer
17	*
18	* Code for likely and minimized locale subtags, separated out from other .cpp files
19	* that then do not depend on resource bundle code and likely-subtags data.
20	*/
21
22	#include "unicode/bytestream.h"
23	#include "unicode/utypes.h"
24	#include "unicode/locid.h"
25	#include "unicode/putil.h"
26	#include "unicode/uchar.h"
27	#include "unicode/uloc.h"
28	#include "unicode/ures.h"
29	#include "unicode/uscript.h"
30	#include "bytesinkutil.h"
31	#include "charstr.h"
32	#include "cmemory.h"
33	#include "cstring.h"
34	#include "ulocimp.h"
35	#include "ustr_imp.h"
36
37	/**
38	* These are the canonical strings for unknown languages, scripts and regions.
39	**/
40	static const char* const unknownLanguage = "und";
41	static const char* const unknownScript = "Zzzz";
42	static const char* const unknownRegion = "ZZ";
43
44	/**
45	* This function looks for the localeID in the likelySubtags resource.
46	*
47	* @param localeID The tag to find.
48	* @param buffer A buffer to hold the matching entry
49	* @param bufferLength The length of the output buffer
50	* @return A pointer to "buffer" if found, or a null pointer if not.
51	*/
52	static const char* U_CALLCONV
53	findLikelySubtags(const char* localeID,
54	char* buffer,
55	int32_t bufferLength,
56	UErrorCode* err) {
57	const char* result = nullptr;
58
59	if (!U_FAILURE(*err)) {
60	int32_t resLen = `0`;
61	const char16_t* s = nullptr;
62	UErrorCode tmpErr = U_ZERO_ERROR;
63	icu::LocalUResourceBundlePointer subtags(ures_openDirect(nullptr, "likelySubtags", &tmpErr));
64	if (U_SUCCESS(tmpErr)) {
65	icu::CharString und;
66	if (localeID != nullptr) {
67	if (*localeID == `'\0'`) {
68	localeID = unknownLanguage;
69	} else if (*localeID == `'_'`) {
70	und.append(unknownLanguage, *err);
71	und.append(localeID, *err);
72	if (U_FAILURE(*err)) {
73	return nullptr;
74	}
75	localeID = und.data();
76	}
77	}
78	s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
79
80	if (U_FAILURE(tmpErr)) {
81	/*
82	* If a resource is missing, it's not really an error, it's
83	* just that we don't have any data for that particular locale ID.
84	*/
85	if (tmpErr != U_MISSING_RESOURCE_ERROR) {
86	*err = tmpErr;
87	}
88	}
89	else if (resLen >= bufferLength) {
90	/ The buffer should never overflow. /
91	*err = U_INTERNAL_PROGRAM_ERROR;
92	}
93	else {
94	u_UCharsToChars(s, buffer, resLen + `1`);
95	if (resLen >= `3` &&
96	uprv_strnicmp(buffer, unknownLanguage, `3`) == `0` &&
97	(resLen == `3` \|\| buffer[`3`] == `'_'`)) {
98	uprv_memmove(buffer, buffer + `3`, resLen - `3` + `1`);
99	}
100	result = buffer;
101	}
102	} else {
103	*err = tmpErr;
104	}
105	}
106
107	return result;
108	}
109
110	/**
111	* Append a tag to a buffer, adding the separator if necessary. The buffer
112	* must be large enough to contain the resulting tag plus any separator
113	* necessary. The tag must not be a zero-length string.
114	*
115	* @param tag The tag to add.
116	* @param tagLength The length of the tag.
117	* @param buffer The output buffer.
118	* @param bufferLength The length of the output buffer. This is an input/output parameter.
119	**/
120	static void U_CALLCONV
121	appendTag(
122	const char* tag,
123	int32_t tagLength,
124	char* buffer,
125	int32_t* bufferLength,
126	UBool withSeparator) {
127
128	if (withSeparator) {
129	buffer[*bufferLength] = `'_'`;
130	++(*bufferLength);
131	}
132
133	uprv_memmove(
134	&buffer[*bufferLength],
135	tag,
136	tagLength);
137
138	*bufferLength += tagLength;
139	}
140
141	/**
142	* Create a tag string from the supplied parameters. The lang, script and region
143	* parameters may be nullptr pointers. If they are, their corresponding length parameters
144	* must be less than or equal to 0.
145	*
146	* If any of the language, script or region parameters are empty, and the alternateTags
147	* parameter is not nullptr, it will be parsed for potential language, script and region tags
148	* to be used when constructing the new tag. If the alternateTags parameter is nullptr, or
149	* it contains no language tag, the default tag for the unknown language is used.
150	*
151	* If the length of the new string exceeds the capacity of the output buffer,
152	* the function copies as many bytes to the output buffer as it can, and returns
153	* the error U_BUFFER_OVERFLOW_ERROR.
154	*
155	* If an illegal argument is provided, the function returns the error
156	* U_ILLEGAL_ARGUMENT_ERROR.
157	*
158	* Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
159	* the tag string fits in the output buffer, but the null terminator doesn't.
160	*
161	* @param lang The language tag to use.
162	* @param langLength The length of the language tag.
163	* @param script The script tag to use.
164	* @param scriptLength The length of the script tag.
165	* @param region The region tag to use.
166	* @param regionLength The length of the region tag.
167	* @param trailing Any trailing data to append to the new tag.
168	* @param trailingLength The length of the trailing data.
169	* @param alternateTags A string containing any alternate tags.
170	* @param sink The output sink receiving the tag string.
171	* @param err A pointer to a UErrorCode for error reporting.
172	**/
173	static void U_CALLCONV
174	createTagStringWithAlternates(
175	const char* lang,
176	int32_t langLength,
177	const char* script,
178	int32_t scriptLength,
179	const char* region,
180	int32_t regionLength,
181	const char* trailing,
182	int32_t trailingLength,
183	const char* alternateTags,
184	icu::ByteSink& sink,
185	UErrorCode* err) {
186
187	if (U_FAILURE(*err)) {
188	goto error;
189	}
190	else if (langLength >= ULOC_LANG_CAPACITY \|\|
191	scriptLength >= ULOC_SCRIPT_CAPACITY \|\|
192	regionLength >= ULOC_COUNTRY_CAPACITY) {
193	goto error;
194	}
195	else {
196	/**
197	* ULOC_FULLNAME_CAPACITY will provide enough capacity
198	* that we can build a string that contains the language,
199	* script and region code without worrying about overrunning
200	* the user-supplied buffer.
201	**/
202	char tagBuffer[ULOC_FULLNAME_CAPACITY];
203	int32_t tagLength = `0`;
204	UBool regionAppended = false;
205
206	if (langLength > `0`) {
207	appendTag(
208	lang,
209	langLength,
210	tagBuffer,
211	&tagLength,
212	/withSeparator=/false);
213	}
214	else if (alternateTags == nullptr) {
215	/*
216	* Use the empty string for an unknown language, if
217	* we found no language.
218	*/
219	}
220	else {
221	/*
222	* Parse the alternateTags string for the language.
223	*/
224	char alternateLang[ULOC_LANG_CAPACITY];
225	int32_t alternateLangLength = sizeof(alternateLang);
226
227	alternateLangLength =
228	uloc_getLanguage(
229	alternateTags,
230	alternateLang,
231	alternateLangLength,
232	err);
233	if(U_FAILURE(*err) \|\|
234	alternateLangLength >= ULOC_LANG_CAPACITY) {
235	goto error;
236	}
237	else if (alternateLangLength == `0`) {
238	/*
239	* Use the empty string for an unknown language, if
240	* we found no language.
241	*/
242	}
243	else {
244	appendTag(
245	alternateLang,
246	alternateLangLength,
247	tagBuffer,
248	&tagLength,
249	/withSeparator=/false);
250	}
251	}
252
253	if (scriptLength > `0`) {
254	appendTag(
255	script,
256	scriptLength,
257	tagBuffer,
258	&tagLength,
259	/withSeparator=/true);
260	}
261	else if (alternateTags != nullptr) {
262	/*
263	* Parse the alternateTags string for the script.
264	*/
265	char alternateScript[ULOC_SCRIPT_CAPACITY];
266
267	const int32_t alternateScriptLength =
268	uloc_getScript(
269	alternateTags,
270	alternateScript,
271	sizeof(alternateScript),
272	err);
273
274	if (U_FAILURE(*err) \|\|
275	alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
276	goto error;
277	}
278	else if (alternateScriptLength > `0`) {
279	appendTag(
280	alternateScript,
281	alternateScriptLength,
282	tagBuffer,
283	&tagLength,
284	/withSeparator=/true);
285	}
286	}
287
288	if (regionLength > `0`) {
289	appendTag(
290	region,
291	regionLength,
292	tagBuffer,
293	&tagLength,
294	/withSeparator=/true);
295
296	regionAppended = true;
297	}
298	else if (alternateTags != nullptr) {
299	/*
300	* Parse the alternateTags string for the region.
301	*/
302	char alternateRegion[ULOC_COUNTRY_CAPACITY];
303
304	const int32_t alternateRegionLength =
305	uloc_getCountry(
306	alternateTags,
307	alternateRegion,
308	sizeof(alternateRegion),
309	err);
310	if (U_FAILURE(*err) \|\|
311	alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
312	goto error;
313	}
314	else if (alternateRegionLength > `0`) {
315	appendTag(
316	alternateRegion,
317	alternateRegionLength,
318	tagBuffer,
319	&tagLength,
320	/withSeparator=/true);
321
322	regionAppended = true;
323	}
324	}
325
326	/**
327	* Copy the partial tag from our internal buffer to the supplied
328	* target.
329	**/
330	sink.Append(tagBuffer, tagLength);
331
332	if (trailingLength > `0`) {
333	if (*trailing != `'@'`) {
334	sink.Append("_", `1`);
335	if (!regionAppended) {
336	/ extra separator is required /
337	sink.Append("_", `1`);
338	}
339	}
340
341	/*
342	* Copy the trailing data into the supplied buffer.
343	*/
344	sink.Append(trailing, trailingLength);
345	}
346
347	return;
348	}
349
350	error:
351
352	/**
353	* An overflow indicates the locale ID passed in
354	* is ill-formed. If we got here, and there was
355	* no previous error, it's an implicit overflow.
356	**/
357	if (*err == U_BUFFER_OVERFLOW_ERROR \|\|
358	U_SUCCESS(*err)) {
359	*err = U_ILLEGAL_ARGUMENT_ERROR;
360	}
361	}
362
363	/**
364	* Create a tag string from the supplied parameters. The lang, script and region
365	* parameters may be nullptr pointers. If they are, their corresponding length parameters
366	* must be less than or equal to 0. If the lang parameter is an empty string, the
367	* default value for an unknown language is written to the output buffer.
368	*
369	* If the length of the new string exceeds the capacity of the output buffer,
370	* the function copies as many bytes to the output buffer as it can, and returns
371	* the error U_BUFFER_OVERFLOW_ERROR.
372	*
373	* If an illegal argument is provided, the function returns the error
374	* U_ILLEGAL_ARGUMENT_ERROR.
375	*
376	* @param lang The language tag to use.
377	* @param langLength The length of the language tag.
378	* @param script The script tag to use.
379	* @param scriptLength The length of the script tag.
380	* @param region The region tag to use.
381	* @param regionLength The length of the region tag.
382	* @param trailing Any trailing data to append to the new tag.
383	* @param trailingLength The length of the trailing data.
384	* @param sink The output sink receiving the tag string.
385	* @param err A pointer to a UErrorCode for error reporting.
386	**/
387	static void U_CALLCONV
388	createTagString(
389	const char* lang,
390	int32_t langLength,
391	const char* script,
392	int32_t scriptLength,
393	const char* region,
394	int32_t regionLength,
395	const char* trailing,
396	int32_t trailingLength,
397	icu::ByteSink& sink,
398	UErrorCode* err)
399	{
400	createTagStringWithAlternates(
401	lang,
402	langLength,
403	script,
404	scriptLength,
405	region,
406	regionLength,
407	trailing,
408	trailingLength,
409	nullptr,
410	sink,
411	err);
412	}
413
414	/**
415	* Parse the language, script, and region subtags from a tag string, and copy the
416	* results into the corresponding output parameters. The buffers are null-terminated,
417	* unless overflow occurs.
418	*
419	* The langLength, scriptLength, and regionLength parameters are input/output
420	* parameters, and must contain the capacity of their corresponding buffers on
421	* input. On output, they will contain the actual length of the buffers, not
422	* including the null terminator.
423	*
424	* If the length of any of the output subtags exceeds the capacity of the corresponding
425	* buffer, the function copies as many bytes to the output buffer as it can, and returns
426	* the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
427	* occurs.
428	*
429	* If an illegal argument is provided, the function returns the error
430	* U_ILLEGAL_ARGUMENT_ERROR.
431	*
432	* @param localeID The locale ID to parse.
433	* @param lang The language tag buffer.
434	* @param langLength The length of the language tag.
435	* @param script The script tag buffer.
436	* @param scriptLength The length of the script tag.
437	* @param region The region tag buffer.
438	* @param regionLength The length of the region tag.
439	* @param err A pointer to a UErrorCode for error reporting.
440	* @return The number of chars of the localeID parameter consumed.
441	**/
442	static int32_t U_CALLCONV
443	parseTagString(
444	const char* localeID,
445	char* lang,
446	int32_t* langLength,
447	char* script,
448	int32_t* scriptLength,
449	char* region,
450	int32_t* regionLength,
451	UErrorCode* err)
452	{
453	const char* position = localeID;
454	int32_t subtagLength = `0`;
455
456	if(U_FAILURE(*err) \|\|
457	localeID == nullptr \|\|
458	lang == nullptr \|\|
459	langLength == nullptr \|\|
460	script == nullptr \|\|
461	scriptLength == nullptr \|\|
462	region == nullptr \|\|
463	regionLength == nullptr) {
464	goto error;
465	}
466
467	subtagLength = ulocimp_getLanguage(position, &position, err).extract(lang, langLength, *err);
468
469	/*
470	* Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
471	* to be an error, because it indicates the user-supplied tag is
472	* not well-formed.
473	*/
474	if(U_FAILURE(*err)) {
475	goto error;
476	}
477
478	*langLength = subtagLength;
479
480	/*
481	* If no language was present, use the empty string instead.
482	* Otherwise, move past any separator.
483	*/
484	if (_isIDSeparator(*position)) {
485	++position;
486	}
487
488	subtagLength = ulocimp_getScript(position, &position, err).extract(script, scriptLength, *err);
489
490	if(U_FAILURE(*err)) {
491	goto error;
492	}
493
494	*scriptLength = subtagLength;
495
496	if (*scriptLength > `0`) {
497	if (uprv_strnicmp(script, unknownScript, *scriptLength) == `0`) {
498	/**
499	* If the script part is the "unknown" script, then don't return it.
500	**/
501	*scriptLength = `0`;
502	}
503
504	/*
505	* Move past any separator.
506	*/
507	if (_isIDSeparator(*position)) {
508	++position;
509	}
510	}
511
512	subtagLength = ulocimp_getCountry(position, &position, err).extract(region, regionLength, *err);
513
514	if(U_FAILURE(*err)) {
515	goto error;
516	}
517
518	*regionLength = subtagLength;
519
520	if (*regionLength > `0`) {
521	if (uprv_strnicmp(region, unknownRegion, *regionLength) == `0`) {
522	/**
523	* If the region part is the "unknown" region, then don't return it.
524	**/
525	*regionLength = `0`;
526	}
527	} else if (position != `0` && position != `'@'`) {
528	/ back up over consumed trailing separator /
529	--position;
530	}
531
532	exit:
533
534	return (int32_t)(position - localeID);
535
536	error:
537
538	/**
539	* If we get here, we have no explicit error, it's the result of an
540	* illegal argument.
541	**/
542	if (!U_FAILURE(*err)) {
543	*err = U_ILLEGAL_ARGUMENT_ERROR;
544	}
545
546	goto exit;
547	}
548
549	static UBool U_CALLCONV
550	createLikelySubtagsString(
551	const char* lang,
552	int32_t langLength,
553	const char* script,
554	int32_t scriptLength,
555	const char* region,
556	int32_t regionLength,
557	const char* variants,
558	int32_t variantsLength,
559	icu::ByteSink& sink,
560	UErrorCode* err) {
561	/**
562	* ULOC_FULLNAME_CAPACITY will provide enough capacity
563	* that we can build a string that contains the language,
564	* script and region code without worrying about overrunning
565	* the user-supplied buffer.
566	**/
567	char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
568
569	if(U_FAILURE(*err)) {
570	goto error;
571	}
572
573	/**
574	* Try the language with the script and region first.
575	**/
576	if (scriptLength > `0` && regionLength > `0`) {
577
578	const char* likelySubtags = nullptr;
579
580	icu::CharString tagBuffer;
581	{
582	icu::CharStringByteSink sink(&tagBuffer);
583	createTagString(
584	lang,
585	langLength,
586	script,
587	scriptLength,
588	region,
589	regionLength,
590	nullptr,
591	`0`,
592	sink,
593	err);
594	}
595	if(U_FAILURE(*err)) {
596	goto error;
597	}
598
599	likelySubtags =
600	findLikelySubtags(
601	tagBuffer.data(),
602	likelySubtagsBuffer,
603	sizeof(likelySubtagsBuffer),
604	err);
605	if(U_FAILURE(*err)) {
606	goto error;
607	}
608
609	if (likelySubtags != nullptr) {
610	/ Always use the language tag from the*
611	maximal string, since it may be more
612	specific than the one provided. /*
613	createTagStringWithAlternates(
614	nullptr,
615	`0`,
616	nullptr,
617	`0`,
618	nullptr,
619	`0`,
620	variants,
621	variantsLength,
622	likelySubtags,
623	sink,
624	err);
625	return true;
626	}
627	}
628
629	/**
630	* Try the language with just the script.
631	**/
632	if (scriptLength > `0`) {
633
634	const char* likelySubtags = nullptr;
635
636	icu::CharString tagBuffer;
637	{
638	icu::CharStringByteSink sink(&tagBuffer);
639	createTagString(
640	lang,
641	langLength,
642	script,
643	scriptLength,
644	nullptr,
645	`0`,
646	nullptr,
647	`0`,
648	sink,
649	err);
650	}
651	if(U_FAILURE(*err)) {
652	goto error;
653	}
654
655	likelySubtags =
656	findLikelySubtags(
657	tagBuffer.data(),
658	likelySubtagsBuffer,
659	sizeof(likelySubtagsBuffer),
660	err);
661	if(U_FAILURE(*err)) {
662	goto error;
663	}
664
665	if (likelySubtags != nullptr) {
666	/ Always use the language tag from the*
667	maximal string, since it may be more
668	specific than the one provided. /*
669	createTagStringWithAlternates(
670	nullptr,
671	`0`,
672	nullptr,
673	`0`,
674	region,
675	regionLength,
676	variants,
677	variantsLength,
678	likelySubtags,
679	sink,
680	err);
681	return true;
682	}
683	}
684
685	/**
686	* Try the language with just the region.
687	**/
688	if (regionLength > `0`) {
689
690	const char* likelySubtags = nullptr;
691
692	icu::CharString tagBuffer;
693	{
694	icu::CharStringByteSink sink(&tagBuffer);
695	createTagString(
696	lang,
697	langLength,
698	nullptr,
699	`0`,
700	region,
701	regionLength,
702	nullptr,
703	`0`,
704	sink,
705	err);
706	}
707	if(U_FAILURE(*err)) {
708	goto error;
709	}
710
711	likelySubtags =
712	findLikelySubtags(
713	tagBuffer.data(),
714	likelySubtagsBuffer,
715	sizeof(likelySubtagsBuffer),
716	err);
717	if(U_FAILURE(*err)) {
718	goto error;
719	}
720
721	if (likelySubtags != nullptr) {
722	/ Always use the language tag from the*
723	maximal string, since it may be more
724	specific than the one provided. /*
725	createTagStringWithAlternates(
726	nullptr,
727	`0`,
728	script,
729	scriptLength,
730	nullptr,
731	`0`,
732	variants,
733	variantsLength,
734	likelySubtags,
735	sink,
736	err);
737	return true;
738	}
739	}
740
741	/**
742	* Finally, try just the language.
743	**/
744	{
745	const char* likelySubtags = nullptr;
746
747	icu::CharString tagBuffer;
748	{
749	icu::CharStringByteSink sink(&tagBuffer);
750	createTagString(
751	lang,
752	langLength,
753	nullptr,
754	`0`,
755	nullptr,
756	`0`,
757	nullptr,
758	`0`,
759	sink,
760	err);
761	}
762	if(U_FAILURE(*err)) {
763	goto error;
764	}
765
766	likelySubtags =
767	findLikelySubtags(
768	tagBuffer.data(),
769	likelySubtagsBuffer,
770	sizeof(likelySubtagsBuffer),
771	err);
772	if(U_FAILURE(*err)) {
773	goto error;
774	}
775
776	if (likelySubtags != nullptr) {
777	/ Always use the language tag from the*
778	maximal string, since it may be more
779	specific than the one provided. /*
780	createTagStringWithAlternates(
781	nullptr,
782	`0`,
783	script,
784	scriptLength,
785	region,
786	regionLength,
787	variants,
788	variantsLength,
789	likelySubtags,
790	sink,
791	err);
792	return true;
793	}
794	}
795
796	return false;
797
798	error:
799
800	if (!U_FAILURE(*err)) {
801	*err = U_ILLEGAL_ARGUMENT_ERROR;
802	}
803
804	return false;
805	}
806
807	#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
808	int32_t count = 0; \
809	int32_t i; \
810	for (i = 0; i < trailingLength; i++) { \
811	if (trailing[i] == '-' \|\| trailing[i] == '_') { \
812	count = 0; \
813	if (count > 8) { \
814	goto error; \
815	} \
816	} else if (trailing[i] == '@') { \
817	break; \
818	} else if (count > 8) { \
819	goto error; \
820	} else { \
821	count++; \
822	} \
823	} \
824	} UPRV_BLOCK_MACRO_END
825
826	static UBool
827	_uloc_addLikelySubtags(const char* localeID,
828	icu::ByteSink& sink,
829	UErrorCode* err) {
830	char lang[ULOC_LANG_CAPACITY];
831	int32_t langLength = sizeof(lang);
832	char script[ULOC_SCRIPT_CAPACITY];
833	int32_t scriptLength = sizeof(script);
834	char region[ULOC_COUNTRY_CAPACITY];
835	int32_t regionLength = sizeof(region);
836	const char* trailing = "";
837	int32_t trailingLength = `0`;
838	int32_t trailingIndex = `0`;
839	UBool success = false;
840
841	if(U_FAILURE(*err)) {
842	goto error;
843	}
844	if (localeID == nullptr) {
845	goto error;
846	}
847
848	trailingIndex = parseTagString(
849	localeID,
850	lang,
851	&langLength,
852	script,
853	&scriptLength,
854	region,
855	&regionLength,
856	err);
857	if(U_FAILURE(*err)) {
858	/ Overflow indicates an illegal argument error /
859	if (*err == U_BUFFER_OVERFLOW_ERROR) {
860	*err = U_ILLEGAL_ARGUMENT_ERROR;
861	}
862
863	goto error;
864	}
865
866	/ Find the length of the trailing portion. /
867	while (_isIDSeparator(localeID[trailingIndex])) {
868	trailingIndex++;
869	}
870	trailing = &localeID[trailingIndex];
871	trailingLength = (int32_t)uprv_strlen(trailing);
872
873	CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
874
875	success =
876	createLikelySubtagsString(
877	lang,
878	langLength,
879	script,
880	scriptLength,
881	region,
882	regionLength,
883	trailing,
884	trailingLength,
885	sink,
886	err);
887
888	if (!success) {
889	const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
890
891	/*
892	* If we get here, we need to return localeID.
893	*/
894	sink.Append(localeID, localIDLength);
895	}
896
897	return success;
898
899	error:
900
901	if (!U_FAILURE(*err)) {
902	*err = U_ILLEGAL_ARGUMENT_ERROR;
903	}
904	return false;
905	}
906
907	// Add likely subtags to the sink
908	// return true if the value in the sink is produced by a match during the lookup
909	// return false if the value in the sink is the same as input because there are
910	// no match after the lookup.
911	static UBool _ulocimp_addLikelySubtags(const char, icu::ByteSink&, UErrorCode);
912
913	static void
914	_uloc_minimizeSubtags(const char* localeID,
915	icu::ByteSink& sink,
916	UErrorCode* err) {
917	icu::CharString maximizedTagBuffer;
918
919	char lang[ULOC_LANG_CAPACITY];
920	int32_t langLength = sizeof(lang);
921	char script[ULOC_SCRIPT_CAPACITY];
922	int32_t scriptLength = sizeof(script);
923	char region[ULOC_COUNTRY_CAPACITY];
924	int32_t regionLength = sizeof(region);
925	const char* trailing = "";
926	int32_t trailingLength = `0`;
927	int32_t trailingIndex = `0`;
928	UBool successGetMax = false;
929
930	if(U_FAILURE(*err)) {
931	goto error;
932	}
933	else if (localeID == nullptr) {
934	goto error;
935	}
936
937	trailingIndex =
938	parseTagString(
939	localeID,
940	lang,
941	&langLength,
942	script,
943	&scriptLength,
944	region,
945	&regionLength,
946	err);
947	if(U_FAILURE(*err)) {
948
949	/ Overflow indicates an illegal argument error /
950	if (*err == U_BUFFER_OVERFLOW_ERROR) {
951	*err = U_ILLEGAL_ARGUMENT_ERROR;
952	}
953
954	goto error;
955	}
956
957	/ Find the spot where the variants or the keywords begin, if any. /
958	while (_isIDSeparator(localeID[trailingIndex])) {
959	trailingIndex++;
960	}
961	trailing = &localeID[trailingIndex];
962	trailingLength = (int32_t)uprv_strlen(trailing);
963
964	CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
965
966	{
967	icu::CharString base;
968	{
969	icu::CharStringByteSink baseSink(&base);
970	createTagString(
971	lang,
972	langLength,
973	script,
974	scriptLength,
975	region,
976	regionLength,
977	nullptr,
978	`0`,
979	baseSink,
980	err);
981	}
982
983	/**
984	* First, we need to first get the maximization
985	* from AddLikelySubtags.
986	**/
987	{
988	icu::CharStringByteSink maxSink(&maximizedTagBuffer);
989	successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
990	}
991	}
992
993	if(U_FAILURE(*err)) {
994	goto error;
995	}
996
997	if (!successGetMax) {
998	/**
999	* If we got here, return the locale ID parameter unchanged.
1000	**/
1001	const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1002	sink.Append(localeID, localeIDLength);
1003	return;
1004	}
1005
1006	// In the following, the lang, script, region are referring to those in
1007	// the maximizedTagBuffer, not the one in the localeID.
1008	langLength = sizeof(lang);
1009	scriptLength = sizeof(script);
1010	regionLength = sizeof(region);
1011	parseTagString(
1012	maximizedTagBuffer.data(),
1013	lang,
1014	&langLength,
1015	script,
1016	&scriptLength,
1017	region,
1018	&regionLength,
1019	err);
1020	if(U_FAILURE(*err)) {
1021	goto error;
1022	}
1023
1024	/**
1025	* Start first with just the language.
1026	**/
1027	{
1028	icu::CharString tagBuffer;
1029	{
1030	icu::CharStringByteSink tagSink(&tagBuffer);
1031	createLikelySubtagsString(
1032	lang,
1033	langLength,
1034	nullptr,
1035	`0`,
1036	nullptr,
1037	`0`,
1038	nullptr,
1039	`0`,
1040	tagSink,
1041	err);
1042	}
1043
1044	if(U_FAILURE(*err)) {
1045	goto error;
1046	}
1047	else if (!tagBuffer.isEmpty() &&
1048	uprv_strnicmp(
1049	maximizedTagBuffer.data(),
1050	tagBuffer.data(),
1051	tagBuffer.length()) == `0`) {
1052
1053	createTagString(
1054	lang,
1055	langLength,
1056	nullptr,
1057	`0`,
1058	nullptr,
1059	`0`,
1060	trailing,
1061	trailingLength,
1062	sink,
1063	err);
1064	return;
1065	}
1066	}
1067
1068	/**
1069	* Next, try the language and region.
1070	**/
1071	if (regionLength > `0`) {
1072
1073	icu::CharString tagBuffer;
1074	{
1075	icu::CharStringByteSink tagSink(&tagBuffer);
1076	createLikelySubtagsString(
1077	lang,
1078	langLength,
1079	nullptr,
1080	`0`,
1081	region,
1082	regionLength,
1083	nullptr,
1084	`0`,
1085	tagSink,
1086	err);
1087	}
1088
1089	if(U_FAILURE(*err)) {
1090	goto error;
1091	}
1092	else if (!tagBuffer.isEmpty() &&
1093	uprv_strnicmp(
1094	maximizedTagBuffer.data(),
1095	tagBuffer.data(),
1096	tagBuffer.length()) == `0`) {
1097
1098	createTagString(
1099	lang,
1100	langLength,
1101	nullptr,
1102	`0`,
1103	region,
1104	regionLength,
1105	trailing,
1106	trailingLength,
1107	sink,
1108	err);
1109	return;
1110	}
1111	}
1112
1113	/**
1114	* Finally, try the language and script. This is our last chance,
1115	* since trying with all three subtags would only yield the
1116	* maximal version that we already have.
1117	**/
1118	if (scriptLength > `0`) {
1119	icu::CharString tagBuffer;
1120	{
1121	icu::CharStringByteSink tagSink(&tagBuffer);
1122	createLikelySubtagsString(
1123	lang,
1124	langLength,
1125	script,
1126	scriptLength,
1127	nullptr,
1128	`0`,
1129	nullptr,
1130	`0`,
1131	tagSink,
1132	err);
1133	}
1134
1135	if(U_FAILURE(*err)) {
1136	goto error;
1137	}
1138	else if (!tagBuffer.isEmpty() &&
1139	uprv_strnicmp(
1140	maximizedTagBuffer.data(),
1141	tagBuffer.data(),
1142	tagBuffer.length()) == `0`) {
1143
1144	createTagString(
1145	lang,
1146	langLength,
1147	script,
1148	scriptLength,
1149	nullptr,
1150	`0`,
1151	trailing,
1152	trailingLength,
1153	sink,
1154	err);
1155	return;
1156	}
1157	}
1158
1159	{
1160	/**
1161	* If we got here, return the max + trail.
1162	**/
1163	createTagString(
1164	lang,
1165	langLength,
1166	script,
1167	scriptLength,
1168	region,
1169	regionLength,
1170	trailing,
1171	trailingLength,
1172	sink,
1173	err);
1174	return;
1175	}
1176
1177	error:
1178
1179	if (!U_FAILURE(*err)) {
1180	*err = U_ILLEGAL_ARGUMENT_ERROR;
1181	}
1182	}
1183
1184	static int32_t
1185	do_canonicalize(const char* localeID,
1186	char* buffer,
1187	int32_t bufferCapacity,
1188	UErrorCode* err)
1189	{
1190	int32_t canonicalizedSize = uloc_canonicalize(
1191	localeID,
1192	buffer,
1193	bufferCapacity,
1194	err);
1195
1196	if (*err == U_STRING_NOT_TERMINATED_WARNING \|\|
1197	*err == U_BUFFER_OVERFLOW_ERROR) {
1198	return canonicalizedSize;
1199	}
1200	else if (U_FAILURE(*err)) {
1201
1202	return -`1`;
1203	}
1204	else {
1205	return canonicalizedSize;
1206	}
1207	}
1208
1209	U_CAPI int32_t U_EXPORT2
1210	uloc_addLikelySubtags(const char* localeID,
1211	char* maximizedLocaleID,
1212	int32_t maximizedLocaleIDCapacity,
1213	UErrorCode* status) {
1214	if (U_FAILURE(*status)) {
1215	return `0`;
1216	}
1217
1218	icu::CheckedArrayByteSink sink(
1219	maximizedLocaleID, maximizedLocaleIDCapacity);
1220
1221	ulocimp_addLikelySubtags(localeID, sink, status);
1222	int32_t reslen = sink.NumberOfBytesAppended();
1223
1224	if (U_FAILURE(*status)) {
1225	return sink.Overflowed() ? reslen : -`1`;
1226	}
1227
1228	if (sink.Overflowed()) {
1229	*status = U_BUFFER_OVERFLOW_ERROR;
1230	} else {
1231	u_terminateChars(
1232	maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
1233	}
1234
1235	return reslen;
1236	}
1237
1238	static UBool
1239	_ulocimp_addLikelySubtags(const char* localeID,
1240	icu::ByteSink& sink,
1241	UErrorCode* status) {
1242	PreflightingLocaleIDBuffer localeBuffer;
1243	do {
1244	localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
1245	localeBuffer.getCapacity(), status);
1246	} while (localeBuffer.needToTryAgain(status));
1247
1248	if (U_SUCCESS(*status)) {
1249	return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status);
1250	} else {
1251	return false;
1252	}
1253	}
1254
1255	U_CAPI void U_EXPORT2
1256	ulocimp_addLikelySubtags(const char* localeID,
1257	icu::ByteSink& sink,
1258	UErrorCode* status) {
1259	_ulocimp_addLikelySubtags(localeID, sink, status);
1260	}
1261
1262	U_CAPI int32_t U_EXPORT2
1263	uloc_minimizeSubtags(const char* localeID,
1264	char* minimizedLocaleID,
1265	int32_t minimizedLocaleIDCapacity,
1266	UErrorCode* status) {
1267	if (U_FAILURE(*status)) {
1268	return `0`;
1269	}
1270
1271	icu::CheckedArrayByteSink sink(
1272	minimizedLocaleID, minimizedLocaleIDCapacity);
1273
1274	ulocimp_minimizeSubtags(localeID, sink, status);
1275	int32_t reslen = sink.NumberOfBytesAppended();
1276
1277	if (U_FAILURE(*status)) {
1278	return sink.Overflowed() ? reslen : -`1`;
1279	}
1280
1281	if (sink.Overflowed()) {
1282	*status = U_BUFFER_OVERFLOW_ERROR;
1283	} else {
1284	u_terminateChars(
1285	minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
1286	}
1287
1288	return reslen;
1289	}
1290
1291	U_CAPI void U_EXPORT2
1292	ulocimp_minimizeSubtags(const char* localeID,
1293	icu::ByteSink& sink,
1294	UErrorCode* status) {
1295	PreflightingLocaleIDBuffer localeBuffer;
1296	do {
1297	localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
1298	localeBuffer.getCapacity(), status);
1299	} while (localeBuffer.needToTryAgain(status));
1300
1301	_uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status);
1302	}
1303
1304	// Pairs of (language subtag, + or -) for finding out fast if common languages
1305	// are LTR (minus) or RTL (plus).
1306	static const char LANG_DIR_STRING[] =
1307	"root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1308
1309	// Implemented here because this calls ulocimp_addLikelySubtags().
1310	U_CAPI UBool U_EXPORT2
1311	uloc_isRightToLeft(const char *locale) {
1312	UErrorCode errorCode = U_ZERO_ERROR;
1313	char script[`8`];
1314	int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1315	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING \|\|
1316	scriptLength == `0`) {
1317	// Fastpath: We know the likely scripts and their writing direction
1318	// for some common languages.
1319	errorCode = U_ZERO_ERROR;
1320	char lang[`8`];
1321	int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1322	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1323	return false;
1324	}
1325	if (langLength > `0`) {
1326	const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1327	if (langPtr != nullptr) {
1328	switch (langPtr[langLength]) {
1329	case `'-'`: return false;
1330	case `'+'`: return true;
1331	default: break; // partial match of a longer code
1332	}
1333	}
1334	}
1335	// Otherwise, find the likely script.
1336	errorCode = U_ZERO_ERROR;
1337	icu::CharString likely;
1338	{
1339	icu::CharStringByteSink sink(&likely);
1340	ulocimp_addLikelySubtags(locale, sink, &errorCode);
1341	}
1342	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1343	return false;
1344	}
1345	scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
1346	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING \|\|
1347	scriptLength == `0`) {
1348	return false;
1349	}
1350	}
1351	UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1352	return uscript_isRightToLeft(scriptCode);
1353	}
1354
1355	U_NAMESPACE_BEGIN
1356
1357	UBool
1358	Locale::isRightToLeft() const {
1359	return uloc_isRightToLeft(getBaseName());
1360	}
1361
1362	U_NAMESPACE_END
1363
1364	// The following must at least allow for rg key value (6) plus terminator (1).
1365	#define ULOC_RG_BUFLEN 8
1366
1367	U_CAPI int32_t U_EXPORT2
1368	ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1369	char region, int32_t regionCapacity, UErrorCode status) {
1370	if (U_FAILURE(*status)) {
1371	return `0`;
1372	}
1373	char rgBuf[ULOC_RG_BUFLEN];
1374	UErrorCode rgStatus = U_ZERO_ERROR;
1375
1376	// First check for rg keyword value
1377	int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1378	if (U_FAILURE(rgStatus) \|\| rgLen != `6`) {
1379	rgLen = `0`;
1380	} else {
1381	// rgBuf guaranteed to be zero terminated here, with text len 6
1382	char *rgPtr = rgBuf;
1383	for (; *rgPtr!= `0`; rgPtr++) {
1384	rgPtr = uprv_toupper(rgPtr);
1385	}
1386	rgLen = (uprv_strcmp(rgBuf+`2`, "ZZZZ") == `0`)? `2`: `0`;
1387	}
1388
1389	if (rgLen == `0`) {
1390	// No valid rg keyword value, try for unicode_region_subtag
1391	rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1392	if (U_FAILURE(*status)) {
1393	rgLen = `0`;
1394	} else if (rgLen == `0` && inferRegion) {
1395	// no unicode_region_subtag but inferRegion true, try likely subtags
1396	rgStatus = U_ZERO_ERROR;
1397	icu::CharString locBuf;
1398	{
1399	icu::CharStringByteSink sink(&locBuf);
1400	ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
1401	}
1402	if (U_SUCCESS(rgStatus)) {
1403	rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
1404	if (U_FAILURE(*status)) {
1405	rgLen = `0`;
1406	}
1407	}
1408	}
1409	}
1410
1411	rgBuf[rgLen] = `0`;
1412	uprv_strncpy(region, rgBuf, regionCapacity);
1413	return u_terminateChars(region, regionCapacity, rgLen, status);
1414	}
1415
1416

Browse the source code of Godot/thirdparty/icu4c/common/loclikely.cpp