1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 1997-2016, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: loclikely.cpp
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2010feb25
16* created by: Markus W. Scherer
17*
18* Code for likely and minimized locale subtags, separated out from other .cpp files
19* that then do not depend on resource bundle code and likely-subtags data.
20*/
21
22#include "unicode/bytestream.h"
23#include "unicode/utypes.h"
24#include "unicode/locid.h"
25#include "unicode/putil.h"
26#include "unicode/uchar.h"
27#include "unicode/uloc.h"
28#include "unicode/ures.h"
29#include "unicode/uscript.h"
30#include "bytesinkutil.h"
31#include "charstr.h"
32#include "cmemory.h"
33#include "cstring.h"
34#include "ulocimp.h"
35#include "ustr_imp.h"
36
37/**
38 * These are the canonical strings for unknown languages, scripts and regions.
39 **/
40static const char* const unknownLanguage = "und";
41static const char* const unknownScript = "Zzzz";
42static const char* const unknownRegion = "ZZ";
43
44/**
45 * This function looks for the localeID in the likelySubtags resource.
46 *
47 * @param localeID The tag to find.
48 * @param buffer A buffer to hold the matching entry
49 * @param bufferLength The length of the output buffer
50 * @return A pointer to "buffer" if found, or a null pointer if not.
51 */
52static const char* U_CALLCONV
53findLikelySubtags(const char* localeID,
54 char* buffer,
55 int32_t bufferLength,
56 UErrorCode* err) {
57 const char* result = NULL;
58
59 if (!U_FAILURE(*err)) {
60 int32_t resLen = 0;
61 const UChar* s = NULL;
62 UErrorCode tmpErr = U_ZERO_ERROR;
63 icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
64 if (U_SUCCESS(tmpErr)) {
65 icu::CharString und;
66 if (localeID != NULL) {
67 if (*localeID == '\0') {
68 localeID = unknownLanguage;
69 } else if (*localeID == '_') {
70 und.append(unknownLanguage, *err);
71 und.append(localeID, *err);
72 if (U_FAILURE(*err)) {
73 return NULL;
74 }
75 localeID = und.data();
76 }
77 }
78 s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
79
80 if (U_FAILURE(tmpErr)) {
81 /*
82 * If a resource is missing, it's not really an error, it's
83 * just that we don't have any data for that particular locale ID.
84 */
85 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
86 *err = tmpErr;
87 }
88 }
89 else if (resLen >= bufferLength) {
90 /* The buffer should never overflow. */
91 *err = U_INTERNAL_PROGRAM_ERROR;
92 }
93 else {
94 u_UCharsToChars(s, buffer, resLen + 1);
95 if (resLen >= 3 &&
96 uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
97 (resLen == 3 || buffer[3] == '_')) {
98 uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
99 }
100 result = buffer;
101 }
102 } else {
103 *err = tmpErr;
104 }
105 }
106
107 return result;
108}
109
110/**
111 * Append a tag to a buffer, adding the separator if necessary. The buffer
112 * must be large enough to contain the resulting tag plus any separator
113 * necessary. The tag must not be a zero-length string.
114 *
115 * @param tag The tag to add.
116 * @param tagLength The length of the tag.
117 * @param buffer The output buffer.
118 * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
119 **/
120static void U_CALLCONV
121appendTag(
122 const char* tag,
123 int32_t tagLength,
124 char* buffer,
125 int32_t* bufferLength,
126 UBool withSeparator) {
127
128 if (withSeparator) {
129 buffer[*bufferLength] = '_';
130 ++(*bufferLength);
131 }
132
133 uprv_memmove(
134 &buffer[*bufferLength],
135 tag,
136 tagLength);
137
138 *bufferLength += tagLength;
139}
140
141/**
142 * Create a tag string from the supplied parameters. The lang, script and region
143 * parameters may be NULL pointers. If they are, their corresponding length parameters
144 * must be less than or equal to 0.
145 *
146 * If any of the language, script or region parameters are empty, and the alternateTags
147 * parameter is not NULL, it will be parsed for potential language, script and region tags
148 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
149 * it contains no language tag, the default tag for the unknown language is used.
150 *
151 * If the length of the new string exceeds the capacity of the output buffer,
152 * the function copies as many bytes to the output buffer as it can, and returns
153 * the error U_BUFFER_OVERFLOW_ERROR.
154 *
155 * If an illegal argument is provided, the function returns the error
156 * U_ILLEGAL_ARGUMENT_ERROR.
157 *
158 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
159 * the tag string fits in the output buffer, but the null terminator doesn't.
160 *
161 * @param lang The language tag to use.
162 * @param langLength The length of the language tag.
163 * @param script The script tag to use.
164 * @param scriptLength The length of the script tag.
165 * @param region The region tag to use.
166 * @param regionLength The length of the region tag.
167 * @param trailing Any trailing data to append to the new tag.
168 * @param trailingLength The length of the trailing data.
169 * @param alternateTags A string containing any alternate tags.
170 * @param sink The output sink receiving the tag string.
171 * @param err A pointer to a UErrorCode for error reporting.
172 **/
173static void U_CALLCONV
174createTagStringWithAlternates(
175 const char* lang,
176 int32_t langLength,
177 const char* script,
178 int32_t scriptLength,
179 const char* region,
180 int32_t regionLength,
181 const char* trailing,
182 int32_t trailingLength,
183 const char* alternateTags,
184 icu::ByteSink& sink,
185 UErrorCode* err) {
186
187 if (U_FAILURE(*err)) {
188 goto error;
189 }
190 else if (langLength >= ULOC_LANG_CAPACITY ||
191 scriptLength >= ULOC_SCRIPT_CAPACITY ||
192 regionLength >= ULOC_COUNTRY_CAPACITY) {
193 goto error;
194 }
195 else {
196 /**
197 * ULOC_FULLNAME_CAPACITY will provide enough capacity
198 * that we can build a string that contains the language,
199 * script and region code without worrying about overrunning
200 * the user-supplied buffer.
201 **/
202 char tagBuffer[ULOC_FULLNAME_CAPACITY];
203 int32_t tagLength = 0;
204 UBool regionAppended = FALSE;
205
206 if (langLength > 0) {
207 appendTag(
208 lang,
209 langLength,
210 tagBuffer,
211 &tagLength,
212 /*withSeparator=*/FALSE);
213 }
214 else if (alternateTags == NULL) {
215 /*
216 * Use the empty string for an unknown language, if
217 * we found no language.
218 */
219 }
220 else {
221 /*
222 * Parse the alternateTags string for the language.
223 */
224 char alternateLang[ULOC_LANG_CAPACITY];
225 int32_t alternateLangLength = sizeof(alternateLang);
226
227 alternateLangLength =
228 uloc_getLanguage(
229 alternateTags,
230 alternateLang,
231 alternateLangLength,
232 err);
233 if(U_FAILURE(*err) ||
234 alternateLangLength >= ULOC_LANG_CAPACITY) {
235 goto error;
236 }
237 else if (alternateLangLength == 0) {
238 /*
239 * Use the empty string for an unknown language, if
240 * we found no language.
241 */
242 }
243 else {
244 appendTag(
245 alternateLang,
246 alternateLangLength,
247 tagBuffer,
248 &tagLength,
249 /*withSeparator=*/FALSE);
250 }
251 }
252
253 if (scriptLength > 0) {
254 appendTag(
255 script,
256 scriptLength,
257 tagBuffer,
258 &tagLength,
259 /*withSeparator=*/TRUE);
260 }
261 else if (alternateTags != NULL) {
262 /*
263 * Parse the alternateTags string for the script.
264 */
265 char alternateScript[ULOC_SCRIPT_CAPACITY];
266
267 const int32_t alternateScriptLength =
268 uloc_getScript(
269 alternateTags,
270 alternateScript,
271 sizeof(alternateScript),
272 err);
273
274 if (U_FAILURE(*err) ||
275 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
276 goto error;
277 }
278 else if (alternateScriptLength > 0) {
279 appendTag(
280 alternateScript,
281 alternateScriptLength,
282 tagBuffer,
283 &tagLength,
284 /*withSeparator=*/TRUE);
285 }
286 }
287
288 if (regionLength > 0) {
289 appendTag(
290 region,
291 regionLength,
292 tagBuffer,
293 &tagLength,
294 /*withSeparator=*/TRUE);
295
296 regionAppended = TRUE;
297 }
298 else if (alternateTags != NULL) {
299 /*
300 * Parse the alternateTags string for the region.
301 */
302 char alternateRegion[ULOC_COUNTRY_CAPACITY];
303
304 const int32_t alternateRegionLength =
305 uloc_getCountry(
306 alternateTags,
307 alternateRegion,
308 sizeof(alternateRegion),
309 err);
310 if (U_FAILURE(*err) ||
311 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
312 goto error;
313 }
314 else if (alternateRegionLength > 0) {
315 appendTag(
316 alternateRegion,
317 alternateRegionLength,
318 tagBuffer,
319 &tagLength,
320 /*withSeparator=*/TRUE);
321
322 regionAppended = TRUE;
323 }
324 }
325
326 /**
327 * Copy the partial tag from our internal buffer to the supplied
328 * target.
329 **/
330 sink.Append(tagBuffer, tagLength);
331
332 if (trailingLength > 0) {
333 if (*trailing != '@') {
334 sink.Append("_", 1);
335 if (!regionAppended) {
336 /* extra separator is required */
337 sink.Append("_", 1);
338 }
339 }
340
341 /*
342 * Copy the trailing data into the supplied buffer.
343 */
344 sink.Append(trailing, trailingLength);
345 }
346
347 return;
348 }
349
350error:
351
352 /**
353 * An overflow indicates the locale ID passed in
354 * is ill-formed. If we got here, and there was
355 * no previous error, it's an implicit overflow.
356 **/
357 if (*err == U_BUFFER_OVERFLOW_ERROR ||
358 U_SUCCESS(*err)) {
359 *err = U_ILLEGAL_ARGUMENT_ERROR;
360 }
361}
362
363/**
364 * Create a tag string from the supplied parameters. The lang, script and region
365 * parameters may be NULL pointers. If they are, their corresponding length parameters
366 * must be less than or equal to 0. If the lang parameter is an empty string, the
367 * default value for an unknown language is written to the output buffer.
368 *
369 * If the length of the new string exceeds the capacity of the output buffer,
370 * the function copies as many bytes to the output buffer as it can, and returns
371 * the error U_BUFFER_OVERFLOW_ERROR.
372 *
373 * If an illegal argument is provided, the function returns the error
374 * U_ILLEGAL_ARGUMENT_ERROR.
375 *
376 * @param lang The language tag to use.
377 * @param langLength The length of the language tag.
378 * @param script The script tag to use.
379 * @param scriptLength The length of the script tag.
380 * @param region The region tag to use.
381 * @param regionLength The length of the region tag.
382 * @param trailing Any trailing data to append to the new tag.
383 * @param trailingLength The length of the trailing data.
384 * @param sink The output sink receiving the tag string.
385 * @param err A pointer to a UErrorCode for error reporting.
386 **/
387static void U_CALLCONV
388createTagString(
389 const char* lang,
390 int32_t langLength,
391 const char* script,
392 int32_t scriptLength,
393 const char* region,
394 int32_t regionLength,
395 const char* trailing,
396 int32_t trailingLength,
397 icu::ByteSink& sink,
398 UErrorCode* err)
399{
400 createTagStringWithAlternates(
401 lang,
402 langLength,
403 script,
404 scriptLength,
405 region,
406 regionLength,
407 trailing,
408 trailingLength,
409 NULL,
410 sink,
411 err);
412}
413
414/**
415 * Parse the language, script, and region subtags from a tag string, and copy the
416 * results into the corresponding output parameters. The buffers are null-terminated,
417 * unless overflow occurs.
418 *
419 * The langLength, scriptLength, and regionLength parameters are input/output
420 * parameters, and must contain the capacity of their corresponding buffers on
421 * input. On output, they will contain the actual length of the buffers, not
422 * including the null terminator.
423 *
424 * If the length of any of the output subtags exceeds the capacity of the corresponding
425 * buffer, the function copies as many bytes to the output buffer as it can, and returns
426 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
427 * occurs.
428 *
429 * If an illegal argument is provided, the function returns the error
430 * U_ILLEGAL_ARGUMENT_ERROR.
431 *
432 * @param localeID The locale ID to parse.
433 * @param lang The language tag buffer.
434 * @param langLength The length of the language tag.
435 * @param script The script tag buffer.
436 * @param scriptLength The length of the script tag.
437 * @param region The region tag buffer.
438 * @param regionLength The length of the region tag.
439 * @param err A pointer to a UErrorCode for error reporting.
440 * @return The number of chars of the localeID parameter consumed.
441 **/
442static int32_t U_CALLCONV
443parseTagString(
444 const char* localeID,
445 char* lang,
446 int32_t* langLength,
447 char* script,
448 int32_t* scriptLength,
449 char* region,
450 int32_t* regionLength,
451 UErrorCode* err)
452{
453 const char* position = localeID;
454 int32_t subtagLength = 0;
455
456 if(U_FAILURE(*err) ||
457 localeID == NULL ||
458 lang == NULL ||
459 langLength == NULL ||
460 script == NULL ||
461 scriptLength == NULL ||
462 region == NULL ||
463 regionLength == NULL) {
464 goto error;
465 }
466
467 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
468 u_terminateChars(lang, *langLength, subtagLength, err);
469
470 /*
471 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
472 * to be an error, because it indicates the user-supplied tag is
473 * not well-formed.
474 */
475 if(U_FAILURE(*err)) {
476 goto error;
477 }
478
479 *langLength = subtagLength;
480
481 /*
482 * If no language was present, use the empty string instead.
483 * Otherwise, move past any separator.
484 */
485 if (_isIDSeparator(*position)) {
486 ++position;
487 }
488
489 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
490 u_terminateChars(script, *scriptLength, subtagLength, err);
491
492 if(U_FAILURE(*err)) {
493 goto error;
494 }
495
496 *scriptLength = subtagLength;
497
498 if (*scriptLength > 0) {
499 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
500 /**
501 * If the script part is the "unknown" script, then don't return it.
502 **/
503 *scriptLength = 0;
504 }
505
506 /*
507 * Move past any separator.
508 */
509 if (_isIDSeparator(*position)) {
510 ++position;
511 }
512 }
513
514 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
515 u_terminateChars(region, *regionLength, subtagLength, err);
516
517 if(U_FAILURE(*err)) {
518 goto error;
519 }
520
521 *regionLength = subtagLength;
522
523 if (*regionLength > 0) {
524 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
525 /**
526 * If the region part is the "unknown" region, then don't return it.
527 **/
528 *regionLength = 0;
529 }
530 } else if (*position != 0 && *position != '@') {
531 /* back up over consumed trailing separator */
532 --position;
533 }
534
535exit:
536
537 return (int32_t)(position - localeID);
538
539error:
540
541 /**
542 * If we get here, we have no explicit error, it's the result of an
543 * illegal argument.
544 **/
545 if (!U_FAILURE(*err)) {
546 *err = U_ILLEGAL_ARGUMENT_ERROR;
547 }
548
549 goto exit;
550}
551
552static UBool U_CALLCONV
553createLikelySubtagsString(
554 const char* lang,
555 int32_t langLength,
556 const char* script,
557 int32_t scriptLength,
558 const char* region,
559 int32_t regionLength,
560 const char* variants,
561 int32_t variantsLength,
562 icu::ByteSink& sink,
563 UErrorCode* err) {
564 /**
565 * ULOC_FULLNAME_CAPACITY will provide enough capacity
566 * that we can build a string that contains the language,
567 * script and region code without worrying about overrunning
568 * the user-supplied buffer.
569 **/
570 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
571
572 if(U_FAILURE(*err)) {
573 goto error;
574 }
575
576 /**
577 * Try the language with the script and region first.
578 **/
579 if (scriptLength > 0 && regionLength > 0) {
580
581 const char* likelySubtags = NULL;
582
583 icu::CharString tagBuffer;
584 {
585 icu::CharStringByteSink sink(&tagBuffer);
586 createTagString(
587 lang,
588 langLength,
589 script,
590 scriptLength,
591 region,
592 regionLength,
593 NULL,
594 0,
595 sink,
596 err);
597 }
598 if(U_FAILURE(*err)) {
599 goto error;
600 }
601
602 likelySubtags =
603 findLikelySubtags(
604 tagBuffer.data(),
605 likelySubtagsBuffer,
606 sizeof(likelySubtagsBuffer),
607 err);
608 if(U_FAILURE(*err)) {
609 goto error;
610 }
611
612 if (likelySubtags != NULL) {
613 /* Always use the language tag from the
614 maximal string, since it may be more
615 specific than the one provided. */
616 createTagStringWithAlternates(
617 NULL,
618 0,
619 NULL,
620 0,
621 NULL,
622 0,
623 variants,
624 variantsLength,
625 likelySubtags,
626 sink,
627 err);
628 return TRUE;
629 }
630 }
631
632 /**
633 * Try the language with just the script.
634 **/
635 if (scriptLength > 0) {
636
637 const char* likelySubtags = NULL;
638
639 icu::CharString tagBuffer;
640 {
641 icu::CharStringByteSink sink(&tagBuffer);
642 createTagString(
643 lang,
644 langLength,
645 script,
646 scriptLength,
647 NULL,
648 0,
649 NULL,
650 0,
651 sink,
652 err);
653 }
654 if(U_FAILURE(*err)) {
655 goto error;
656 }
657
658 likelySubtags =
659 findLikelySubtags(
660 tagBuffer.data(),
661 likelySubtagsBuffer,
662 sizeof(likelySubtagsBuffer),
663 err);
664 if(U_FAILURE(*err)) {
665 goto error;
666 }
667
668 if (likelySubtags != NULL) {
669 /* Always use the language tag from the
670 maximal string, since it may be more
671 specific than the one provided. */
672 createTagStringWithAlternates(
673 NULL,
674 0,
675 NULL,
676 0,
677 region,
678 regionLength,
679 variants,
680 variantsLength,
681 likelySubtags,
682 sink,
683 err);
684 return TRUE;
685 }
686 }
687
688 /**
689 * Try the language with just the region.
690 **/
691 if (regionLength > 0) {
692
693 const char* likelySubtags = NULL;
694
695 icu::CharString tagBuffer;
696 {
697 icu::CharStringByteSink sink(&tagBuffer);
698 createTagString(
699 lang,
700 langLength,
701 NULL,
702 0,
703 region,
704 regionLength,
705 NULL,
706 0,
707 sink,
708 err);
709 }
710 if(U_FAILURE(*err)) {
711 goto error;
712 }
713
714 likelySubtags =
715 findLikelySubtags(
716 tagBuffer.data(),
717 likelySubtagsBuffer,
718 sizeof(likelySubtagsBuffer),
719 err);
720 if(U_FAILURE(*err)) {
721 goto error;
722 }
723
724 if (likelySubtags != NULL) {
725 /* Always use the language tag from the
726 maximal string, since it may be more
727 specific than the one provided. */
728 createTagStringWithAlternates(
729 NULL,
730 0,
731 script,
732 scriptLength,
733 NULL,
734 0,
735 variants,
736 variantsLength,
737 likelySubtags,
738 sink,
739 err);
740 return TRUE;
741 }
742 }
743
744 /**
745 * Finally, try just the language.
746 **/
747 {
748 const char* likelySubtags = NULL;
749
750 icu::CharString tagBuffer;
751 {
752 icu::CharStringByteSink sink(&tagBuffer);
753 createTagString(
754 lang,
755 langLength,
756 NULL,
757 0,
758 NULL,
759 0,
760 NULL,
761 0,
762 sink,
763 err);
764 }
765 if(U_FAILURE(*err)) {
766 goto error;
767 }
768
769 likelySubtags =
770 findLikelySubtags(
771 tagBuffer.data(),
772 likelySubtagsBuffer,
773 sizeof(likelySubtagsBuffer),
774 err);
775 if(U_FAILURE(*err)) {
776 goto error;
777 }
778
779 if (likelySubtags != NULL) {
780 /* Always use the language tag from the
781 maximal string, since it may be more
782 specific than the one provided. */
783 createTagStringWithAlternates(
784 NULL,
785 0,
786 script,
787 scriptLength,
788 region,
789 regionLength,
790 variants,
791 variantsLength,
792 likelySubtags,
793 sink,
794 err);
795 return TRUE;
796 }
797 }
798
799 return FALSE;
800
801error:
802
803 if (!U_FAILURE(*err)) {
804 *err = U_ILLEGAL_ARGUMENT_ERROR;
805 }
806
807 return FALSE;
808}
809
810#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
811 int32_t count = 0; \
812 int32_t i; \
813 for (i = 0; i < trailingLength; i++) { \
814 if (trailing[i] == '-' || trailing[i] == '_') { \
815 count = 0; \
816 if (count > 8) { \
817 goto error; \
818 } \
819 } else if (trailing[i] == '@') { \
820 break; \
821 } else if (count > 8) { \
822 goto error; \
823 } else { \
824 count++; \
825 } \
826 } \
827} UPRV_BLOCK_MACRO_END
828
829static void
830_uloc_addLikelySubtags(const char* localeID,
831 icu::ByteSink& sink,
832 UErrorCode* err) {
833 char lang[ULOC_LANG_CAPACITY];
834 int32_t langLength = sizeof(lang);
835 char script[ULOC_SCRIPT_CAPACITY];
836 int32_t scriptLength = sizeof(script);
837 char region[ULOC_COUNTRY_CAPACITY];
838 int32_t regionLength = sizeof(region);
839 const char* trailing = "";
840 int32_t trailingLength = 0;
841 int32_t trailingIndex = 0;
842 UBool success = FALSE;
843
844 if(U_FAILURE(*err)) {
845 goto error;
846 }
847 if (localeID == NULL) {
848 goto error;
849 }
850
851 trailingIndex = parseTagString(
852 localeID,
853 lang,
854 &langLength,
855 script,
856 &scriptLength,
857 region,
858 &regionLength,
859 err);
860 if(U_FAILURE(*err)) {
861 /* Overflow indicates an illegal argument error */
862 if (*err == U_BUFFER_OVERFLOW_ERROR) {
863 *err = U_ILLEGAL_ARGUMENT_ERROR;
864 }
865
866 goto error;
867 }
868
869 /* Find the length of the trailing portion. */
870 while (_isIDSeparator(localeID[trailingIndex])) {
871 trailingIndex++;
872 }
873 trailing = &localeID[trailingIndex];
874 trailingLength = (int32_t)uprv_strlen(trailing);
875
876 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
877
878 success =
879 createLikelySubtagsString(
880 lang,
881 langLength,
882 script,
883 scriptLength,
884 region,
885 regionLength,
886 trailing,
887 trailingLength,
888 sink,
889 err);
890
891 if (!success) {
892 const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
893
894 /*
895 * If we get here, we need to return localeID.
896 */
897 sink.Append(localeID, localIDLength);
898 }
899
900 return;
901
902error:
903
904 if (!U_FAILURE(*err)) {
905 *err = U_ILLEGAL_ARGUMENT_ERROR;
906 }
907}
908
909static void
910_uloc_minimizeSubtags(const char* localeID,
911 icu::ByteSink& sink,
912 UErrorCode* err) {
913 icu::CharString maximizedTagBuffer;
914
915 char lang[ULOC_LANG_CAPACITY];
916 int32_t langLength = sizeof(lang);
917 char script[ULOC_SCRIPT_CAPACITY];
918 int32_t scriptLength = sizeof(script);
919 char region[ULOC_COUNTRY_CAPACITY];
920 int32_t regionLength = sizeof(region);
921 const char* trailing = "";
922 int32_t trailingLength = 0;
923 int32_t trailingIndex = 0;
924
925 if(U_FAILURE(*err)) {
926 goto error;
927 }
928 else if (localeID == NULL) {
929 goto error;
930 }
931
932 trailingIndex =
933 parseTagString(
934 localeID,
935 lang,
936 &langLength,
937 script,
938 &scriptLength,
939 region,
940 &regionLength,
941 err);
942 if(U_FAILURE(*err)) {
943
944 /* Overflow indicates an illegal argument error */
945 if (*err == U_BUFFER_OVERFLOW_ERROR) {
946 *err = U_ILLEGAL_ARGUMENT_ERROR;
947 }
948
949 goto error;
950 }
951
952 /* Find the spot where the variants or the keywords begin, if any. */
953 while (_isIDSeparator(localeID[trailingIndex])) {
954 trailingIndex++;
955 }
956 trailing = &localeID[trailingIndex];
957 trailingLength = (int32_t)uprv_strlen(trailing);
958
959 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
960
961 {
962 icu::CharString base;
963 {
964 icu::CharStringByteSink sink(&base);
965 createTagString(
966 lang,
967 langLength,
968 script,
969 scriptLength,
970 region,
971 regionLength,
972 NULL,
973 0,
974 sink,
975 err);
976 }
977
978 /**
979 * First, we need to first get the maximization
980 * from AddLikelySubtags.
981 **/
982 {
983 icu::CharStringByteSink sink(&maximizedTagBuffer);
984 ulocimp_addLikelySubtags(base.data(), sink, err);
985 }
986 }
987
988 if(U_FAILURE(*err)) {
989 goto error;
990 }
991
992 /**
993 * Start first with just the language.
994 **/
995 {
996 icu::CharString tagBuffer;
997 {
998 icu::CharStringByteSink sink(&tagBuffer);
999 createLikelySubtagsString(
1000 lang,
1001 langLength,
1002 NULL,
1003 0,
1004 NULL,
1005 0,
1006 NULL,
1007 0,
1008 sink,
1009 err);
1010 }
1011
1012 if(U_FAILURE(*err)) {
1013 goto error;
1014 }
1015 else if (!tagBuffer.isEmpty() && uprv_strnicmp(
1016 maximizedTagBuffer.data(),
1017 tagBuffer.data(),
1018 tagBuffer.length()) == 0) {
1019
1020 createTagString(
1021 lang,
1022 langLength,
1023 NULL,
1024 0,
1025 NULL,
1026 0,
1027 trailing,
1028 trailingLength,
1029 sink,
1030 err);
1031 return;
1032 }
1033 }
1034
1035 /**
1036 * Next, try the language and region.
1037 **/
1038 if (regionLength > 0) {
1039
1040 icu::CharString tagBuffer;
1041 {
1042 icu::CharStringByteSink sink(&tagBuffer);
1043 createLikelySubtagsString(
1044 lang,
1045 langLength,
1046 NULL,
1047 0,
1048 region,
1049 regionLength,
1050 NULL,
1051 0,
1052 sink,
1053 err);
1054 }
1055
1056 if(U_FAILURE(*err)) {
1057 goto error;
1058 }
1059 else if (uprv_strnicmp(
1060 maximizedTagBuffer.data(),
1061 tagBuffer.data(),
1062 tagBuffer.length()) == 0) {
1063
1064 createTagString(
1065 lang,
1066 langLength,
1067 NULL,
1068 0,
1069 region,
1070 regionLength,
1071 trailing,
1072 trailingLength,
1073 sink,
1074 err);
1075 return;
1076 }
1077 }
1078
1079 /**
1080 * Finally, try the language and script. This is our last chance,
1081 * since trying with all three subtags would only yield the
1082 * maximal version that we already have.
1083 **/
1084 if (scriptLength > 0 && regionLength > 0) {
1085 icu::CharString tagBuffer;
1086 {
1087 icu::CharStringByteSink sink(&tagBuffer);
1088 createLikelySubtagsString(
1089 lang,
1090 langLength,
1091 script,
1092 scriptLength,
1093 NULL,
1094 0,
1095 NULL,
1096 0,
1097 sink,
1098 err);
1099 }
1100
1101 if(U_FAILURE(*err)) {
1102 goto error;
1103 }
1104 else if (uprv_strnicmp(
1105 maximizedTagBuffer.data(),
1106 tagBuffer.data(),
1107 tagBuffer.length()) == 0) {
1108
1109 createTagString(
1110 lang,
1111 langLength,
1112 script,
1113 scriptLength,
1114 NULL,
1115 0,
1116 trailing,
1117 trailingLength,
1118 sink,
1119 err);
1120 return;
1121 }
1122 }
1123
1124 {
1125 /**
1126 * If we got here, return the locale ID parameter.
1127 **/
1128 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1129 sink.Append(localeID, localeIDLength);
1130 return;
1131 }
1132
1133error:
1134
1135 if (!U_FAILURE(*err)) {
1136 *err = U_ILLEGAL_ARGUMENT_ERROR;
1137 }
1138}
1139
1140static UBool
1141do_canonicalize(const char* localeID,
1142 char* buffer,
1143 int32_t bufferCapacity,
1144 UErrorCode* err)
1145{
1146 uloc_canonicalize(
1147 localeID,
1148 buffer,
1149 bufferCapacity,
1150 err);
1151
1152 if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1153 *err == U_BUFFER_OVERFLOW_ERROR) {
1154 *err = U_ILLEGAL_ARGUMENT_ERROR;
1155
1156 return FALSE;
1157 }
1158 else if (U_FAILURE(*err)) {
1159
1160 return FALSE;
1161 }
1162 else {
1163 return TRUE;
1164 }
1165}
1166
1167U_CAPI int32_t U_EXPORT2
1168uloc_addLikelySubtags(const char* localeID,
1169 char* maximizedLocaleID,
1170 int32_t maximizedLocaleIDCapacity,
1171 UErrorCode* status) {
1172 if (U_FAILURE(*status)) {
1173 return 0;
1174 }
1175
1176 icu::CheckedArrayByteSink sink(
1177 maximizedLocaleID, maximizedLocaleIDCapacity);
1178
1179 ulocimp_addLikelySubtags(localeID, sink, status);
1180 int32_t reslen = sink.NumberOfBytesAppended();
1181
1182 if (U_FAILURE(*status)) {
1183 return sink.Overflowed() ? reslen : -1;
1184 }
1185
1186 if (sink.Overflowed()) {
1187 *status = U_BUFFER_OVERFLOW_ERROR;
1188 } else {
1189 u_terminateChars(
1190 maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
1191 }
1192
1193 return reslen;
1194}
1195
1196U_CAPI void U_EXPORT2
1197ulocimp_addLikelySubtags(const char* localeID,
1198 icu::ByteSink& sink,
1199 UErrorCode* status) {
1200 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1201
1202 if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1203 _uloc_addLikelySubtags(localeBuffer, sink, status);
1204 }
1205}
1206
1207U_CAPI int32_t U_EXPORT2
1208uloc_minimizeSubtags(const char* localeID,
1209 char* minimizedLocaleID,
1210 int32_t minimizedLocaleIDCapacity,
1211 UErrorCode* status) {
1212 if (U_FAILURE(*status)) {
1213 return 0;
1214 }
1215
1216 icu::CheckedArrayByteSink sink(
1217 minimizedLocaleID, minimizedLocaleIDCapacity);
1218
1219 ulocimp_minimizeSubtags(localeID, sink, status);
1220 int32_t reslen = sink.NumberOfBytesAppended();
1221
1222 if (U_FAILURE(*status)) {
1223 return sink.Overflowed() ? reslen : -1;
1224 }
1225
1226 if (sink.Overflowed()) {
1227 *status = U_BUFFER_OVERFLOW_ERROR;
1228 } else {
1229 u_terminateChars(
1230 minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
1231 }
1232
1233 return reslen;
1234}
1235
1236U_CAPI void U_EXPORT2
1237ulocimp_minimizeSubtags(const char* localeID,
1238 icu::ByteSink& sink,
1239 UErrorCode* status) {
1240 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1241
1242 if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1243 _uloc_minimizeSubtags(localeBuffer, sink, status);
1244 }
1245}
1246
1247// Pairs of (language subtag, + or -) for finding out fast if common languages
1248// are LTR (minus) or RTL (plus).
1249static const char LANG_DIR_STRING[] =
1250 "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1251
1252// Implemented here because this calls ulocimp_addLikelySubtags().
1253U_CAPI UBool U_EXPORT2
1254uloc_isRightToLeft(const char *locale) {
1255 UErrorCode errorCode = U_ZERO_ERROR;
1256 char script[8];
1257 int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1258 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1259 scriptLength == 0) {
1260 // Fastpath: We know the likely scripts and their writing direction
1261 // for some common languages.
1262 errorCode = U_ZERO_ERROR;
1263 char lang[8];
1264 int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1265 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1266 return FALSE;
1267 }
1268 if (langLength > 0) {
1269 const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1270 if (langPtr != NULL) {
1271 switch (langPtr[langLength]) {
1272 case '-': return FALSE;
1273 case '+': return TRUE;
1274 default: break; // partial match of a longer code
1275 }
1276 }
1277 }
1278 // Otherwise, find the likely script.
1279 errorCode = U_ZERO_ERROR;
1280 icu::CharString likely;
1281 {
1282 icu::CharStringByteSink sink(&likely);
1283 ulocimp_addLikelySubtags(locale, sink, &errorCode);
1284 }
1285 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1286 return FALSE;
1287 }
1288 scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
1289 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1290 scriptLength == 0) {
1291 return FALSE;
1292 }
1293 }
1294 UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1295 return uscript_isRightToLeft(scriptCode);
1296}
1297
1298U_NAMESPACE_BEGIN
1299
1300UBool
1301Locale::isRightToLeft() const {
1302 return uloc_isRightToLeft(getBaseName());
1303}
1304
1305U_NAMESPACE_END
1306
1307// The following must at least allow for rg key value (6) plus terminator (1).
1308#define ULOC_RG_BUFLEN 8
1309
1310U_CAPI int32_t U_EXPORT2
1311ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1312 char *region, int32_t regionCapacity, UErrorCode* status) {
1313 if (U_FAILURE(*status)) {
1314 return 0;
1315 }
1316 char rgBuf[ULOC_RG_BUFLEN];
1317 UErrorCode rgStatus = U_ZERO_ERROR;
1318
1319 // First check for rg keyword value
1320 int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1321 if (U_FAILURE(rgStatus) || rgLen != 6) {
1322 rgLen = 0;
1323 } else {
1324 // rgBuf guaranteed to be zero terminated here, with text len 6
1325 char *rgPtr = rgBuf;
1326 for (; *rgPtr!= 0; rgPtr++) {
1327 *rgPtr = uprv_toupper(*rgPtr);
1328 }
1329 rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1330 }
1331
1332 if (rgLen == 0) {
1333 // No valid rg keyword value, try for unicode_region_subtag
1334 rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1335 if (U_FAILURE(*status)) {
1336 rgLen = 0;
1337 } else if (rgLen == 0 && inferRegion) {
1338 // no unicode_region_subtag but inferRegion TRUE, try likely subtags
1339 rgStatus = U_ZERO_ERROR;
1340 icu::CharString locBuf;
1341 {
1342 icu::CharStringByteSink sink(&locBuf);
1343 ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
1344 }
1345 if (U_SUCCESS(rgStatus)) {
1346 rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
1347 if (U_FAILURE(*status)) {
1348 rgLen = 0;
1349 }
1350 }
1351 }
1352 }
1353
1354 rgBuf[rgLen] = 0;
1355 uprv_strncpy(region, rgBuf, regionCapacity);
1356 return u_terminateChars(region, regionCapacity, rgLen, status);
1357}
1358
1359