loclikely.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/loclikely.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	*
6	* Copyright (C) 1997-2016, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	*******************************************************************************
10	* file name: loclikely.cpp
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2010feb25
16	* created by: Markus W. Scherer
17	*
18	* Code for likely and minimized locale subtags, separated out from other .cpp files
19	* that then do not depend on resource bundle code and likely-subtags data.
20	*/
21
22	#include "unicode/bytestream.h"
23	#include "unicode/utypes.h"
24	#include "unicode/locid.h"
25	#include "unicode/putil.h"
26	#include "unicode/uchar.h"
27	#include "unicode/uloc.h"
28	#include "unicode/ures.h"
29	#include "unicode/uscript.h"
30	#include "bytesinkutil.h"
31	#include "charstr.h"
32	#include "cmemory.h"
33	#include "cstring.h"
34	#include "ulocimp.h"
35	#include "ustr_imp.h"
36
37	/**
38	* These are the canonical strings for unknown languages, scripts and regions.
39	**/
40	static const char* const unknownLanguage = "und";
41	static const char* const unknownScript = "Zzzz";
42	static const char* const unknownRegion = "ZZ";
43
44	/**
45	* This function looks for the localeID in the likelySubtags resource.
46	*
47	* @param localeID The tag to find.
48	* @param buffer A buffer to hold the matching entry
49	* @param bufferLength The length of the output buffer
50	* @return A pointer to "buffer" if found, or a null pointer if not.
51	*/
52	static const char* U_CALLCONV
53	findLikelySubtags(const char* localeID,
54	char* buffer,
55	int32_t bufferLength,
56	UErrorCode* err) {
57	const char* result = NULL;
58
59	if (!U_FAILURE(*err)) {
60	int32_t resLen = `0`;
61	const UChar* s = NULL;
62	UErrorCode tmpErr = U_ZERO_ERROR;
63	icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
64	if (U_SUCCESS(tmpErr)) {
65	icu::CharString und;
66	if (localeID != NULL) {
67	if (*localeID == `'\0'`) {
68	localeID = unknownLanguage;
69	} else if (*localeID == `'_'`) {
70	und.append(unknownLanguage, *err);
71	und.append(localeID, *err);
72	if (U_FAILURE(*err)) {
73	return NULL;
74	}
75	localeID = und.data();
76	}
77	}
78	s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
79
80	if (U_FAILURE(tmpErr)) {
81	/*
82	* If a resource is missing, it's not really an error, it's
83	* just that we don't have any data for that particular locale ID.
84	*/
85	if (tmpErr != U_MISSING_RESOURCE_ERROR) {
86	*err = tmpErr;
87	}
88	}
89	else if (resLen >= bufferLength) {
90	/ The buffer should never overflow. /
91	*err = U_INTERNAL_PROGRAM_ERROR;
92	}
93	else {
94	u_UCharsToChars(s, buffer, resLen + `1`);
95	if (resLen >= `3` &&
96	uprv_strnicmp(buffer, unknownLanguage, `3`) == `0` &&
97	(resLen == `3` \|\| buffer[`3`] == `'_'`)) {
98	uprv_memmove(buffer, buffer + `3`, resLen - `3` + `1`);
99	}
100	result = buffer;
101	}
102	} else {
103	*err = tmpErr;
104	}
105	}
106
107	return result;
108	}
109
110	/**
111	* Append a tag to a buffer, adding the separator if necessary. The buffer
112	* must be large enough to contain the resulting tag plus any separator
113	* necessary. The tag must not be a zero-length string.
114	*
115	* @param tag The tag to add.
116	* @param tagLength The length of the tag.
117	* @param buffer The output buffer.
118	* @param bufferLength The length of the output buffer. This is an input/ouput parameter.
119	**/
120	static void U_CALLCONV
121	appendTag(
122	const char* tag,
123	int32_t tagLength,
124	char* buffer,
125	int32_t* bufferLength,
126	UBool withSeparator) {
127
128	if (withSeparator) {
129	buffer[*bufferLength] = `'_'`;
130	++(*bufferLength);
131	}
132
133	uprv_memmove(
134	&buffer[*bufferLength],
135	tag,
136	tagLength);
137
138	*bufferLength += tagLength;
139	}
140
141	/**
142	* Create a tag string from the supplied parameters. The lang, script and region
143	* parameters may be NULL pointers. If they are, their corresponding length parameters
144	* must be less than or equal to 0.
145	*
146	* If any of the language, script or region parameters are empty, and the alternateTags
147	* parameter is not NULL, it will be parsed for potential language, script and region tags
148	* to be used when constructing the new tag. If the alternateTags parameter is NULL, or
149	* it contains no language tag, the default tag for the unknown language is used.
150	*
151	* If the length of the new string exceeds the capacity of the output buffer,
152	* the function copies as many bytes to the output buffer as it can, and returns
153	* the error U_BUFFER_OVERFLOW_ERROR.
154	*
155	* If an illegal argument is provided, the function returns the error
156	* U_ILLEGAL_ARGUMENT_ERROR.
157	*
158	* Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
159	* the tag string fits in the output buffer, but the null terminator doesn't.
160	*
161	* @param lang The language tag to use.
162	* @param langLength The length of the language tag.
163	* @param script The script tag to use.
164	* @param scriptLength The length of the script tag.
165	* @param region The region tag to use.
166	* @param regionLength The length of the region tag.
167	* @param trailing Any trailing data to append to the new tag.
168	* @param trailingLength The length of the trailing data.
169	* @param alternateTags A string containing any alternate tags.
170	* @param sink The output sink receiving the tag string.
171	* @param err A pointer to a UErrorCode for error reporting.
172	**/
173	static void U_CALLCONV
174	createTagStringWithAlternates(
175	const char* lang,
176	int32_t langLength,
177	const char* script,
178	int32_t scriptLength,
179	const char* region,
180	int32_t regionLength,
181	const char* trailing,
182	int32_t trailingLength,
183	const char* alternateTags,
184	icu::ByteSink& sink,
185	UErrorCode* err) {
186
187	if (U_FAILURE(*err)) {
188	goto error;
189	}
190	else if (langLength >= ULOC_LANG_CAPACITY \|\|
191	scriptLength >= ULOC_SCRIPT_CAPACITY \|\|
192	regionLength >= ULOC_COUNTRY_CAPACITY) {
193	goto error;
194	}
195	else {
196	/**
197	* ULOC_FULLNAME_CAPACITY will provide enough capacity
198	* that we can build a string that contains the language,
199	* script and region code without worrying about overrunning
200	* the user-supplied buffer.
201	**/
202	char tagBuffer[ULOC_FULLNAME_CAPACITY];
203	int32_t tagLength = `0`;
204	UBool regionAppended = FALSE;
205
206	if (langLength > `0`) {
207	appendTag(
208	lang,
209	langLength,
210	tagBuffer,
211	&tagLength,
212	/withSeparator=/FALSE);
213	}
214	else if (alternateTags == NULL) {
215	/*
216	* Use the empty string for an unknown language, if
217	* we found no language.
218	*/
219	}
220	else {
221	/*
222	* Parse the alternateTags string for the language.
223	*/
224	char alternateLang[ULOC_LANG_CAPACITY];
225	int32_t alternateLangLength = sizeof(alternateLang);
226
227	alternateLangLength =
228	uloc_getLanguage(
229	alternateTags,
230	alternateLang,
231	alternateLangLength,
232	err);
233	if(U_FAILURE(*err) \|\|
234	alternateLangLength >= ULOC_LANG_CAPACITY) {
235	goto error;
236	}
237	else if (alternateLangLength == `0`) {
238	/*
239	* Use the empty string for an unknown language, if
240	* we found no language.
241	*/
242	}
243	else {
244	appendTag(
245	alternateLang,
246	alternateLangLength,
247	tagBuffer,
248	&tagLength,
249	/withSeparator=/FALSE);
250	}
251	}
252
253	if (scriptLength > `0`) {
254	appendTag(
255	script,
256	scriptLength,
257	tagBuffer,
258	&tagLength,
259	/withSeparator=/TRUE);
260	}
261	else if (alternateTags != NULL) {
262	/*
263	* Parse the alternateTags string for the script.
264	*/
265	char alternateScript[ULOC_SCRIPT_CAPACITY];
266
267	const int32_t alternateScriptLength =
268	uloc_getScript(
269	alternateTags,
270	alternateScript,
271	sizeof(alternateScript),
272	err);
273
274	if (U_FAILURE(*err) \|\|
275	alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
276	goto error;
277	}
278	else if (alternateScriptLength > `0`) {
279	appendTag(
280	alternateScript,
281	alternateScriptLength,
282	tagBuffer,
283	&tagLength,
284	/withSeparator=/TRUE);
285	}
286	}
287
288	if (regionLength > `0`) {
289	appendTag(
290	region,
291	regionLength,
292	tagBuffer,
293	&tagLength,
294	/withSeparator=/TRUE);
295
296	regionAppended = TRUE;
297	}
298	else if (alternateTags != NULL) {
299	/*
300	* Parse the alternateTags string for the region.
301	*/
302	char alternateRegion[ULOC_COUNTRY_CAPACITY];
303
304	const int32_t alternateRegionLength =
305	uloc_getCountry(
306	alternateTags,
307	alternateRegion,
308	sizeof(alternateRegion),
309	err);
310	if (U_FAILURE(*err) \|\|
311	alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
312	goto error;
313	}
314	else if (alternateRegionLength > `0`) {
315	appendTag(
316	alternateRegion,
317	alternateRegionLength,
318	tagBuffer,
319	&tagLength,
320	/withSeparator=/TRUE);
321
322	regionAppended = TRUE;
323	}
324	}
325
326	/**
327	* Copy the partial tag from our internal buffer to the supplied
328	* target.
329	**/
330	sink.Append(tagBuffer, tagLength);
331
332	if (trailingLength > `0`) {
333	if (*trailing != `'@'`) {
334	sink.Append("_", `1`);
335	if (!regionAppended) {
336	/ extra separator is required /
337	sink.Append("_", `1`);
338	}
339	}
340
341	/*
342	* Copy the trailing data into the supplied buffer.
343	*/
344	sink.Append(trailing, trailingLength);
345	}
346
347	return;
348	}
349
350	error:
351
352	/**
353	* An overflow indicates the locale ID passed in
354	* is ill-formed. If we got here, and there was
355	* no previous error, it's an implicit overflow.
356	**/
357	if (*err == U_BUFFER_OVERFLOW_ERROR \|\|
358	U_SUCCESS(*err)) {
359	*err = U_ILLEGAL_ARGUMENT_ERROR;
360	}
361	}
362
363	/**
364	* Create a tag string from the supplied parameters. The lang, script and region
365	* parameters may be NULL pointers. If they are, their corresponding length parameters
366	* must be less than or equal to 0. If the lang parameter is an empty string, the
367	* default value for an unknown language is written to the output buffer.
368	*
369	* If the length of the new string exceeds the capacity of the output buffer,
370	* the function copies as many bytes to the output buffer as it can, and returns
371	* the error U_BUFFER_OVERFLOW_ERROR.
372	*
373	* If an illegal argument is provided, the function returns the error
374	* U_ILLEGAL_ARGUMENT_ERROR.
375	*
376	* @param lang The language tag to use.
377	* @param langLength The length of the language tag.
378	* @param script The script tag to use.
379	* @param scriptLength The length of the script tag.
380	* @param region The region tag to use.
381	* @param regionLength The length of the region tag.
382	* @param trailing Any trailing data to append to the new tag.
383	* @param trailingLength The length of the trailing data.
384	* @param sink The output sink receiving the tag string.
385	* @param err A pointer to a UErrorCode for error reporting.
386	**/
387	static void U_CALLCONV
388	createTagString(
389	const char* lang,
390	int32_t langLength,
391	const char* script,
392	int32_t scriptLength,
393	const char* region,
394	int32_t regionLength,
395	const char* trailing,
396	int32_t trailingLength,
397	icu::ByteSink& sink,
398	UErrorCode* err)
399	{
400	createTagStringWithAlternates(
401	lang,
402	langLength,
403	script,
404	scriptLength,
405	region,
406	regionLength,
407	trailing,
408	trailingLength,
409	NULL,
410	sink,
411	err);
412	}
413
414	/**
415	* Parse the language, script, and region subtags from a tag string, and copy the
416	* results into the corresponding output parameters. The buffers are null-terminated,
417	* unless overflow occurs.
418	*
419	* The langLength, scriptLength, and regionLength parameters are input/output
420	* parameters, and must contain the capacity of their corresponding buffers on
421	* input. On output, they will contain the actual length of the buffers, not
422	* including the null terminator.
423	*
424	* If the length of any of the output subtags exceeds the capacity of the corresponding
425	* buffer, the function copies as many bytes to the output buffer as it can, and returns
426	* the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
427	* occurs.
428	*
429	* If an illegal argument is provided, the function returns the error
430	* U_ILLEGAL_ARGUMENT_ERROR.
431	*
432	* @param localeID The locale ID to parse.
433	* @param lang The language tag buffer.
434	* @param langLength The length of the language tag.
435	* @param script The script tag buffer.
436	* @param scriptLength The length of the script tag.
437	* @param region The region tag buffer.
438	* @param regionLength The length of the region tag.
439	* @param err A pointer to a UErrorCode for error reporting.
440	* @return The number of chars of the localeID parameter consumed.
441	**/
442	static int32_t U_CALLCONV
443	parseTagString(
444	const char* localeID,
445	char* lang,
446	int32_t* langLength,
447	char* script,
448	int32_t* scriptLength,
449	char* region,
450	int32_t* regionLength,
451	UErrorCode* err)
452	{
453	const char* position = localeID;
454	int32_t subtagLength = `0`;
455
456	if(U_FAILURE(*err) \|\|
457	localeID == NULL \|\|
458	lang == NULL \|\|
459	langLength == NULL \|\|
460	script == NULL \|\|
461	scriptLength == NULL \|\|
462	region == NULL \|\|
463	regionLength == NULL) {
464	goto error;
465	}
466
467	subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
468	u_terminateChars(lang, *langLength, subtagLength, err);
469
470	/*
471	* Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
472	* to be an error, because it indicates the user-supplied tag is
473	* not well-formed.
474	*/
475	if(U_FAILURE(*err)) {
476	goto error;
477	}
478
479	*langLength = subtagLength;
480
481	/*
482	* If no language was present, use the empty string instead.
483	* Otherwise, move past any separator.
484	*/
485	if (_isIDSeparator(*position)) {
486	++position;
487	}
488
489	subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
490	u_terminateChars(script, *scriptLength, subtagLength, err);
491
492	if(U_FAILURE(*err)) {
493	goto error;
494	}
495
496	*scriptLength = subtagLength;
497
498	if (*scriptLength > `0`) {
499	if (uprv_strnicmp(script, unknownScript, *scriptLength) == `0`) {
500	/**
501	* If the script part is the "unknown" script, then don't return it.
502	**/
503	*scriptLength = `0`;
504	}
505
506	/*
507	* Move past any separator.
508	*/
509	if (_isIDSeparator(*position)) {
510	++position;
511	}
512	}
513
514	subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
515	u_terminateChars(region, *regionLength, subtagLength, err);
516
517	if(U_FAILURE(*err)) {
518	goto error;
519	}
520
521	*regionLength = subtagLength;
522
523	if (*regionLength > `0`) {
524	if (uprv_strnicmp(region, unknownRegion, *regionLength) == `0`) {
525	/**
526	* If the region part is the "unknown" region, then don't return it.
527	**/
528	*regionLength = `0`;
529	}
530	} else if (position != `0` && position != `'@'`) {
531	/ back up over consumed trailing separator /
532	--position;
533	}
534
535	exit:
536
537	return (int32_t)(position - localeID);
538
539	error:
540
541	/**
542	* If we get here, we have no explicit error, it's the result of an
543	* illegal argument.
544	**/
545	if (!U_FAILURE(*err)) {
546	*err = U_ILLEGAL_ARGUMENT_ERROR;
547	}
548
549	goto exit;
550	}
551
552	static UBool U_CALLCONV
553	createLikelySubtagsString(
554	const char* lang,
555	int32_t langLength,
556	const char* script,
557	int32_t scriptLength,
558	const char* region,
559	int32_t regionLength,
560	const char* variants,
561	int32_t variantsLength,
562	icu::ByteSink& sink,
563	UErrorCode* err) {
564	/**
565	* ULOC_FULLNAME_CAPACITY will provide enough capacity
566	* that we can build a string that contains the language,
567	* script and region code without worrying about overrunning
568	* the user-supplied buffer.
569	**/
570	char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
571
572	if(U_FAILURE(*err)) {
573	goto error;
574	}
575
576	/**
577	* Try the language with the script and region first.
578	**/
579	if (scriptLength > `0` && regionLength > `0`) {
580
581	const char* likelySubtags = NULL;
582
583	icu::CharString tagBuffer;
584	{
585	icu::CharStringByteSink sink(&tagBuffer);
586	createTagString(
587	lang,
588	langLength,
589	script,
590	scriptLength,
591	region,
592	regionLength,
593	NULL,
594	`0`,
595	sink,
596	err);
597	}
598	if(U_FAILURE(*err)) {
599	goto error;
600	}
601
602	likelySubtags =
603	findLikelySubtags(
604	tagBuffer.data(),
605	likelySubtagsBuffer,
606	sizeof(likelySubtagsBuffer),
607	err);
608	if(U_FAILURE(*err)) {
609	goto error;
610	}
611
612	if (likelySubtags != NULL) {
613	/ Always use the language tag from the*
614	maximal string, since it may be more
615	specific than the one provided. /*
616	createTagStringWithAlternates(
617	NULL,
618	`0`,
619	NULL,
620	`0`,
621	NULL,
622	`0`,
623	variants,
624	variantsLength,
625	likelySubtags,
626	sink,
627	err);
628	return TRUE;
629	}
630	}
631
632	/**
633	* Try the language with just the script.
634	**/
635	if (scriptLength > `0`) {
636
637	const char* likelySubtags = NULL;
638
639	icu::CharString tagBuffer;
640	{
641	icu::CharStringByteSink sink(&tagBuffer);
642	createTagString(
643	lang,
644	langLength,
645	script,
646	scriptLength,
647	NULL,
648	`0`,
649	NULL,
650	`0`,
651	sink,
652	err);
653	}
654	if(U_FAILURE(*err)) {
655	goto error;
656	}
657
658	likelySubtags =
659	findLikelySubtags(
660	tagBuffer.data(),
661	likelySubtagsBuffer,
662	sizeof(likelySubtagsBuffer),
663	err);
664	if(U_FAILURE(*err)) {
665	goto error;
666	}
667
668	if (likelySubtags != NULL) {
669	/ Always use the language tag from the*
670	maximal string, since it may be more
671	specific than the one provided. /*
672	createTagStringWithAlternates(
673	NULL,
674	`0`,
675	NULL,
676	`0`,
677	region,
678	regionLength,
679	variants,
680	variantsLength,
681	likelySubtags,
682	sink,
683	err);
684	return TRUE;
685	}
686	}
687
688	/**
689	* Try the language with just the region.
690	**/
691	if (regionLength > `0`) {
692
693	const char* likelySubtags = NULL;
694
695	icu::CharString tagBuffer;
696	{
697	icu::CharStringByteSink sink(&tagBuffer);
698	createTagString(
699	lang,
700	langLength,
701	NULL,
702	`0`,
703	region,
704	regionLength,
705	NULL,
706	`0`,
707	sink,
708	err);
709	}
710	if(U_FAILURE(*err)) {
711	goto error;
712	}
713
714	likelySubtags =
715	findLikelySubtags(
716	tagBuffer.data(),
717	likelySubtagsBuffer,
718	sizeof(likelySubtagsBuffer),
719	err);
720	if(U_FAILURE(*err)) {
721	goto error;
722	}
723
724	if (likelySubtags != NULL) {
725	/ Always use the language tag from the*
726	maximal string, since it may be more
727	specific than the one provided. /*
728	createTagStringWithAlternates(
729	NULL,
730	`0`,
731	script,
732	scriptLength,
733	NULL,
734	`0`,
735	variants,
736	variantsLength,
737	likelySubtags,
738	sink,
739	err);
740	return TRUE;
741	}
742	}
743
744	/**
745	* Finally, try just the language.
746	**/
747	{
748	const char* likelySubtags = NULL;
749
750	icu::CharString tagBuffer;
751	{
752	icu::CharStringByteSink sink(&tagBuffer);
753	createTagString(
754	lang,
755	langLength,
756	NULL,
757	`0`,
758	NULL,
759	`0`,
760	NULL,
761	`0`,
762	sink,
763	err);
764	}
765	if(U_FAILURE(*err)) {
766	goto error;
767	}
768
769	likelySubtags =
770	findLikelySubtags(
771	tagBuffer.data(),
772	likelySubtagsBuffer,
773	sizeof(likelySubtagsBuffer),
774	err);
775	if(U_FAILURE(*err)) {
776	goto error;
777	}
778
779	if (likelySubtags != NULL) {
780	/ Always use the language tag from the*
781	maximal string, since it may be more
782	specific than the one provided. /*
783	createTagStringWithAlternates(
784	NULL,
785	`0`,
786	script,
787	scriptLength,
788	region,
789	regionLength,
790	variants,
791	variantsLength,
792	likelySubtags,
793	sink,
794	err);
795	return TRUE;
796	}
797	}
798
799	return FALSE;
800
801	error:
802
803	if (!U_FAILURE(*err)) {
804	*err = U_ILLEGAL_ARGUMENT_ERROR;
805	}
806
807	return FALSE;
808	}
809
810	#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
811	int32_t count = 0; \
812	int32_t i; \
813	for (i = 0; i < trailingLength; i++) { \
814	if (trailing[i] == '-' \|\| trailing[i] == '_') { \
815	count = 0; \
816	if (count > 8) { \
817	goto error; \
818	} \
819	} else if (trailing[i] == '@') { \
820	break; \
821	} else if (count > 8) { \
822	goto error; \
823	} else { \
824	count++; \
825	} \
826	} \
827	} UPRV_BLOCK_MACRO_END
828
829	static void
830	_uloc_addLikelySubtags(const char* localeID,
831	icu::ByteSink& sink,
832	UErrorCode* err) {
833	char lang[ULOC_LANG_CAPACITY];
834	int32_t langLength = sizeof(lang);
835	char script[ULOC_SCRIPT_CAPACITY];
836	int32_t scriptLength = sizeof(script);
837	char region[ULOC_COUNTRY_CAPACITY];
838	int32_t regionLength = sizeof(region);
839	const char* trailing = "";
840	int32_t trailingLength = `0`;
841	int32_t trailingIndex = `0`;
842	UBool success = FALSE;
843
844	if(U_FAILURE(*err)) {
845	goto error;
846	}
847	if (localeID == NULL) {
848	goto error;
849	}
850
851	trailingIndex = parseTagString(
852	localeID,
853	lang,
854	&langLength,
855	script,
856	&scriptLength,
857	region,
858	&regionLength,
859	err);
860	if(U_FAILURE(*err)) {
861	/ Overflow indicates an illegal argument error /
862	if (*err == U_BUFFER_OVERFLOW_ERROR) {
863	*err = U_ILLEGAL_ARGUMENT_ERROR;
864	}
865
866	goto error;
867	}
868
869	/ Find the length of the trailing portion. /
870	while (_isIDSeparator(localeID[trailingIndex])) {
871	trailingIndex++;
872	}
873	trailing = &localeID[trailingIndex];
874	trailingLength = (int32_t)uprv_strlen(trailing);
875
876	CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
877
878	success =
879	createLikelySubtagsString(
880	lang,
881	langLength,
882	script,
883	scriptLength,
884	region,
885	regionLength,
886	trailing,
887	trailingLength,
888	sink,
889	err);
890
891	if (!success) {
892	const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
893
894	/*
895	* If we get here, we need to return localeID.
896	*/
897	sink.Append(localeID, localIDLength);
898	}
899
900	return;
901
902	error:
903
904	if (!U_FAILURE(*err)) {
905	*err = U_ILLEGAL_ARGUMENT_ERROR;
906	}
907	}
908
909	static void
910	_uloc_minimizeSubtags(const char* localeID,
911	icu::ByteSink& sink,
912	UErrorCode* err) {
913	icu::CharString maximizedTagBuffer;
914
915	char lang[ULOC_LANG_CAPACITY];
916	int32_t langLength = sizeof(lang);
917	char script[ULOC_SCRIPT_CAPACITY];
918	int32_t scriptLength = sizeof(script);
919	char region[ULOC_COUNTRY_CAPACITY];
920	int32_t regionLength = sizeof(region);
921	const char* trailing = "";
922	int32_t trailingLength = `0`;
923	int32_t trailingIndex = `0`;
924
925	if(U_FAILURE(*err)) {
926	goto error;
927	}
928	else if (localeID == NULL) {
929	goto error;
930	}
931
932	trailingIndex =
933	parseTagString(
934	localeID,
935	lang,
936	&langLength,
937	script,
938	&scriptLength,
939	region,
940	&regionLength,
941	err);
942	if(U_FAILURE(*err)) {
943
944	/ Overflow indicates an illegal argument error /
945	if (*err == U_BUFFER_OVERFLOW_ERROR) {
946	*err = U_ILLEGAL_ARGUMENT_ERROR;
947	}
948
949	goto error;
950	}
951
952	/ Find the spot where the variants or the keywords begin, if any. /
953	while (_isIDSeparator(localeID[trailingIndex])) {
954	trailingIndex++;
955	}
956	trailing = &localeID[trailingIndex];
957	trailingLength = (int32_t)uprv_strlen(trailing);
958
959	CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
960
961	{
962	icu::CharString base;
963	{
964	icu::CharStringByteSink sink(&base);
965	createTagString(
966	lang,
967	langLength,
968	script,
969	scriptLength,
970	region,
971	regionLength,
972	NULL,
973	`0`,
974	sink,
975	err);
976	}
977
978	/**
979	* First, we need to first get the maximization
980	* from AddLikelySubtags.
981	**/
982	{
983	icu::CharStringByteSink sink(&maximizedTagBuffer);
984	ulocimp_addLikelySubtags(base.data(), sink, err);
985	}
986	}
987
988	if(U_FAILURE(*err)) {
989	goto error;
990	}
991
992	/**
993	* Start first with just the language.
994	**/
995	{
996	icu::CharString tagBuffer;
997	{
998	icu::CharStringByteSink sink(&tagBuffer);
999	createLikelySubtagsString(
1000	lang,
1001	langLength,
1002	NULL,
1003	`0`,
1004	NULL,
1005	`0`,
1006	NULL,
1007	`0`,
1008	sink,
1009	err);
1010	}
1011
1012	if(U_FAILURE(*err)) {
1013	goto error;
1014	}
1015	else if (!tagBuffer.isEmpty() && uprv_strnicmp(
1016	maximizedTagBuffer.data(),
1017	tagBuffer.data(),
1018	tagBuffer.length()) == `0`) {
1019
1020	createTagString(
1021	lang,
1022	langLength,
1023	NULL,
1024	`0`,
1025	NULL,
1026	`0`,
1027	trailing,
1028	trailingLength,
1029	sink,
1030	err);
1031	return;
1032	}
1033	}
1034
1035	/**
1036	* Next, try the language and region.
1037	**/
1038	if (regionLength > `0`) {
1039
1040	icu::CharString tagBuffer;
1041	{
1042	icu::CharStringByteSink sink(&tagBuffer);
1043	createLikelySubtagsString(
1044	lang,
1045	langLength,
1046	NULL,
1047	`0`,
1048	region,
1049	regionLength,
1050	NULL,
1051	`0`,
1052	sink,
1053	err);
1054	}
1055
1056	if(U_FAILURE(*err)) {
1057	goto error;
1058	}
1059	else if (uprv_strnicmp(
1060	maximizedTagBuffer.data(),
1061	tagBuffer.data(),
1062	tagBuffer.length()) == `0`) {
1063
1064	createTagString(
1065	lang,
1066	langLength,
1067	NULL,
1068	`0`,
1069	region,
1070	regionLength,
1071	trailing,
1072	trailingLength,
1073	sink,
1074	err);
1075	return;
1076	}
1077	}
1078
1079	/**
1080	* Finally, try the language and script. This is our last chance,
1081	* since trying with all three subtags would only yield the
1082	* maximal version that we already have.
1083	**/
1084	if (scriptLength > `0` && regionLength > `0`) {
1085	icu::CharString tagBuffer;
1086	{
1087	icu::CharStringByteSink sink(&tagBuffer);
1088	createLikelySubtagsString(
1089	lang,
1090	langLength,
1091	script,
1092	scriptLength,
1093	NULL,
1094	`0`,
1095	NULL,
1096	`0`,
1097	sink,
1098	err);
1099	}
1100
1101	if(U_FAILURE(*err)) {
1102	goto error;
1103	}
1104	else if (uprv_strnicmp(
1105	maximizedTagBuffer.data(),
1106	tagBuffer.data(),
1107	tagBuffer.length()) == `0`) {
1108
1109	createTagString(
1110	lang,
1111	langLength,
1112	script,
1113	scriptLength,
1114	NULL,
1115	`0`,
1116	trailing,
1117	trailingLength,
1118	sink,
1119	err);
1120	return;
1121	}
1122	}
1123
1124	{
1125	/**
1126	* If we got here, return the locale ID parameter.
1127	**/
1128	const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1129	sink.Append(localeID, localeIDLength);
1130	return;
1131	}
1132
1133	error:
1134
1135	if (!U_FAILURE(*err)) {
1136	*err = U_ILLEGAL_ARGUMENT_ERROR;
1137	}
1138	}
1139
1140	static UBool
1141	do_canonicalize(const char* localeID,
1142	char* buffer,
1143	int32_t bufferCapacity,
1144	UErrorCode* err)
1145	{
1146	uloc_canonicalize(
1147	localeID,
1148	buffer,
1149	bufferCapacity,
1150	err);
1151
1152	if (*err == U_STRING_NOT_TERMINATED_WARNING \|\|
1153	*err == U_BUFFER_OVERFLOW_ERROR) {
1154	*err = U_ILLEGAL_ARGUMENT_ERROR;
1155
1156	return FALSE;
1157	}
1158	else if (U_FAILURE(*err)) {
1159
1160	return FALSE;
1161	}
1162	else {
1163	return TRUE;
1164	}
1165	}
1166
1167	U_CAPI int32_t U_EXPORT2
1168	uloc_addLikelySubtags(const char* localeID,
1169	char* maximizedLocaleID,
1170	int32_t maximizedLocaleIDCapacity,
1171	UErrorCode* status) {
1172	if (U_FAILURE(*status)) {
1173	return `0`;
1174	}
1175
1176	icu::CheckedArrayByteSink sink(
1177	maximizedLocaleID, maximizedLocaleIDCapacity);
1178
1179	ulocimp_addLikelySubtags(localeID, sink, status);
1180	int32_t reslen = sink.NumberOfBytesAppended();
1181
1182	if (U_FAILURE(*status)) {
1183	return sink.Overflowed() ? reslen : -`1`;
1184	}
1185
1186	if (sink.Overflowed()) {
1187	*status = U_BUFFER_OVERFLOW_ERROR;
1188	} else {
1189	u_terminateChars(
1190	maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
1191	}
1192
1193	return reslen;
1194	}
1195
1196	U_CAPI void U_EXPORT2
1197	ulocimp_addLikelySubtags(const char* localeID,
1198	icu::ByteSink& sink,
1199	UErrorCode* status) {
1200	char localeBuffer[ULOC_FULLNAME_CAPACITY];
1201
1202	if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1203	_uloc_addLikelySubtags(localeBuffer, sink, status);
1204	}
1205	}
1206
1207	U_CAPI int32_t U_EXPORT2
1208	uloc_minimizeSubtags(const char* localeID,
1209	char* minimizedLocaleID,
1210	int32_t minimizedLocaleIDCapacity,
1211	UErrorCode* status) {
1212	if (U_FAILURE(*status)) {
1213	return `0`;
1214	}
1215
1216	icu::CheckedArrayByteSink sink(
1217	minimizedLocaleID, minimizedLocaleIDCapacity);
1218
1219	ulocimp_minimizeSubtags(localeID, sink, status);
1220	int32_t reslen = sink.NumberOfBytesAppended();
1221
1222	if (U_FAILURE(*status)) {
1223	return sink.Overflowed() ? reslen : -`1`;
1224	}
1225
1226	if (sink.Overflowed()) {
1227	*status = U_BUFFER_OVERFLOW_ERROR;
1228	} else {
1229	u_terminateChars(
1230	minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
1231	}
1232
1233	return reslen;
1234	}
1235
1236	U_CAPI void U_EXPORT2
1237	ulocimp_minimizeSubtags(const char* localeID,
1238	icu::ByteSink& sink,
1239	UErrorCode* status) {
1240	char localeBuffer[ULOC_FULLNAME_CAPACITY];
1241
1242	if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1243	_uloc_minimizeSubtags(localeBuffer, sink, status);
1244	}
1245	}
1246
1247	// Pairs of (language subtag, + or -) for finding out fast if common languages
1248	// are LTR (minus) or RTL (plus).
1249	static const char LANG_DIR_STRING[] =
1250	"root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1251
1252	// Implemented here because this calls ulocimp_addLikelySubtags().
1253	U_CAPI UBool U_EXPORT2
1254	uloc_isRightToLeft(const char *locale) {
1255	UErrorCode errorCode = U_ZERO_ERROR;
1256	char script[`8`];
1257	int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1258	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING \|\|
1259	scriptLength == `0`) {
1260	// Fastpath: We know the likely scripts and their writing direction
1261	// for some common languages.
1262	errorCode = U_ZERO_ERROR;
1263	char lang[`8`];
1264	int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1265	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1266	return FALSE;
1267	}
1268	if (langLength > `0`) {
1269	const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1270	if (langPtr != NULL) {
1271	switch (langPtr[langLength]) {
1272	case `'-'`: return FALSE;
1273	case `'+'`: return TRUE;
1274	default: break; // partial match of a longer code
1275	}
1276	}
1277	}
1278	// Otherwise, find the likely script.
1279	errorCode = U_ZERO_ERROR;
1280	icu::CharString likely;
1281	{
1282	icu::CharStringByteSink sink(&likely);
1283	ulocimp_addLikelySubtags(locale, sink, &errorCode);
1284	}
1285	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1286	return FALSE;
1287	}
1288	scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
1289	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING \|\|
1290	scriptLength == `0`) {
1291	return FALSE;
1292	}
1293	}
1294	UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1295	return uscript_isRightToLeft(scriptCode);
1296	}
1297
1298	U_NAMESPACE_BEGIN
1299
1300	UBool
1301	Locale::isRightToLeft() const {
1302	return uloc_isRightToLeft(getBaseName());
1303	}
1304
1305	U_NAMESPACE_END
1306
1307	// The following must at least allow for rg key value (6) plus terminator (1).
1308	#define ULOC_RG_BUFLEN 8
1309
1310	U_CAPI int32_t U_EXPORT2
1311	ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1312	char region, int32_t regionCapacity, UErrorCode status) {
1313	if (U_FAILURE(*status)) {
1314	return `0`;
1315	}
1316	char rgBuf[ULOC_RG_BUFLEN];
1317	UErrorCode rgStatus = U_ZERO_ERROR;
1318
1319	// First check for rg keyword value
1320	int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1321	if (U_FAILURE(rgStatus) \|\| rgLen != `6`) {
1322	rgLen = `0`;
1323	} else {
1324	// rgBuf guaranteed to be zero terminated here, with text len 6
1325	char *rgPtr = rgBuf;
1326	for (; *rgPtr!= `0`; rgPtr++) {
1327	rgPtr = uprv_toupper(rgPtr);
1328	}
1329	rgLen = (uprv_strcmp(rgBuf+`2`, "ZZZZ") == `0`)? `2`: `0`;
1330	}
1331
1332	if (rgLen == `0`) {
1333	// No valid rg keyword value, try for unicode_region_subtag
1334	rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1335	if (U_FAILURE(*status)) {
1336	rgLen = `0`;
1337	} else if (rgLen == `0` && inferRegion) {
1338	// no unicode_region_subtag but inferRegion TRUE, try likely subtags
1339	rgStatus = U_ZERO_ERROR;
1340	icu::CharString locBuf;
1341	{
1342	icu::CharStringByteSink sink(&locBuf);
1343	ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
1344	}
1345	if (U_SUCCESS(rgStatus)) {
1346	rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
1347	if (U_FAILURE(*status)) {
1348	rgLen = `0`;
1349	}
1350	}
1351	}
1352	}
1353
1354	rgBuf[rgLen] = `0`;
1355	uprv_strncpy(region, rgBuf, regionCapacity);
1356	return u_terminateChars(region, regionCapacity, rgLen, status);
1357	}
1358
1359

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/loclikely.cpp