1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ********************************************************************** |
5 | * Copyright (C) 1997-2014, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** |
8 | * |
9 | * File USCRIPT.C |
10 | * |
11 | * Modification History: |
12 | * |
13 | * Date Name Description |
14 | * 07/06/2001 Ram Creation. |
15 | ****************************************************************************** |
16 | */ |
17 | |
18 | #include "unicode/uchar.h" |
19 | #include "unicode/uscript.h" |
20 | #include "unicode/uloc.h" |
21 | #include "bytesinkutil.h" |
22 | #include "charstr.h" |
23 | #include "cmemory.h" |
24 | #include "cstring.h" |
25 | #include "ulocimp.h" |
26 | |
27 | static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN }; |
28 | static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN }; |
29 | static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO }; |
30 | |
31 | static int32_t |
32 | setCodes(const UScriptCode *src, int32_t length, |
33 | UScriptCode *dest, int32_t capacity, UErrorCode *err) { |
34 | int32_t i; |
35 | if(U_FAILURE(*err)) { return 0; } |
36 | if(length > capacity) { |
37 | *err = U_BUFFER_OVERFLOW_ERROR; |
38 | return length; |
39 | } |
40 | for(i = 0; i < length; ++i) { |
41 | dest[i] = src[i]; |
42 | } |
43 | return length; |
44 | } |
45 | |
46 | static int32_t |
47 | setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) { |
48 | if(U_FAILURE(*err)) { return 0; } |
49 | if(1 > capacity) { |
50 | *err = U_BUFFER_OVERFLOW_ERROR; |
51 | return 1; |
52 | } |
53 | scripts[0] = script; |
54 | return 1; |
55 | } |
56 | |
57 | static int32_t |
58 | getCodesFromLocale(const char *locale, |
59 | UScriptCode *scripts, int32_t capacity, UErrorCode *err) { |
60 | UErrorCode internalErrorCode = U_ZERO_ERROR; |
61 | char lang[8] = {0}; |
62 | char script[8] = {0}; |
63 | int32_t scriptLength; |
64 | if(U_FAILURE(*err)) { return 0; } |
65 | // Multi-script languages, equivalent to the LocaleScript data |
66 | // that we used to load from locale resource bundles. |
67 | /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode); |
68 | if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { |
69 | return 0; |
70 | } |
71 | if(0 == uprv_strcmp(lang, "ja" )) { |
72 | return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err); |
73 | } |
74 | if(0 == uprv_strcmp(lang, "ko" )) { |
75 | return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err); |
76 | } |
77 | scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode); |
78 | if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { |
79 | return 0; |
80 | } |
81 | if(0 == uprv_strcmp(lang, "zh" ) && 0 == uprv_strcmp(script, "Hant" )) { |
82 | return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err); |
83 | } |
84 | // Explicit script code. |
85 | if(scriptLength != 0) { |
86 | UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); |
87 | if(scriptCode != USCRIPT_INVALID_CODE) { |
88 | if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) { |
89 | scriptCode = USCRIPT_HAN; |
90 | } |
91 | return setOneCode(scriptCode, scripts, capacity, err); |
92 | } |
93 | } |
94 | return 0; |
95 | } |
96 | |
97 | /* TODO: this is a bad API and should be deprecated, ticket #11141 */ |
98 | U_CAPI int32_t U_EXPORT2 |
99 | uscript_getCode(const char* nameOrAbbrOrLocale, |
100 | UScriptCode* fillIn, |
101 | int32_t capacity, |
102 | UErrorCode* err){ |
103 | UBool triedCode; |
104 | UErrorCode internalErrorCode; |
105 | int32_t length; |
106 | |
107 | if(U_FAILURE(*err)) { |
108 | return 0; |
109 | } |
110 | if(nameOrAbbrOrLocale==nullptr || |
111 | (fillIn == nullptr ? capacity != 0 : capacity < 0)) { |
112 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
113 | return 0; |
114 | } |
115 | |
116 | triedCode = false; |
117 | const char* lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '-'); |
118 | if (lastSepPtr==nullptr) { |
119 | lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '_'); |
120 | } |
121 | // Favor interpretation of nameOrAbbrOrLocale as a script alias if either |
122 | // 1. nameOrAbbrOrLocale does not contain -/_. Handles Han, Mro, Nko, etc. |
123 | // 2. The last instance of -/_ is at offset 3, and the portion after that is |
124 | // longer than 4 characters (i.e. not a script or region code). This handles |
125 | // Old_Hungarian, Old_Italic, etc. ("old" is a valid language code) |
126 | // 3. The last instance of -/_ is at offset 7, and the portion after that is |
127 | // 3 characters. This handles New_Tai_Lue ("new" is a valid language code). |
128 | if (lastSepPtr==nullptr |
129 | || (lastSepPtr-nameOrAbbrOrLocale == 3 && uprv_strlen(nameOrAbbrOrLocale) > 8) |
130 | || (lastSepPtr-nameOrAbbrOrLocale == 7 && uprv_strlen(nameOrAbbrOrLocale) == 11) ) { |
131 | /* try long and abbreviated script names first */ |
132 | UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); |
133 | if(code!=USCRIPT_INVALID_CODE) { |
134 | return setOneCode(code, fillIn, capacity, err); |
135 | } |
136 | triedCode = true; |
137 | } |
138 | internalErrorCode = U_ZERO_ERROR; |
139 | length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err); |
140 | if(U_FAILURE(*err) || length != 0) { |
141 | return length; |
142 | } |
143 | icu::CharString likely; |
144 | { |
145 | icu::CharStringByteSink sink(&likely); |
146 | ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode); |
147 | } |
148 | if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) { |
149 | length = getCodesFromLocale(likely.data(), fillIn, capacity, err); |
150 | if(U_FAILURE(*err) || length != 0) { |
151 | return length; |
152 | } |
153 | } |
154 | if(!triedCode) { |
155 | /* still not found .. try long and abbreviated script names again */ |
156 | UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); |
157 | if(code!=USCRIPT_INVALID_CODE) { |
158 | return setOneCode(code, fillIn, capacity, err); |
159 | } |
160 | } |
161 | return 0; |
162 | } |
163 | |