1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2014, International Business Machines
6* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* loadednormalizer2impl.cpp
9*
10* created on: 2014sep03
11* created by: Markus W. Scherer
12*/
13
14#include "unicode/utypes.h"
15
16#if !UCONFIG_NO_NORMALIZATION
17
18#include "unicode/udata.h"
19#include "unicode/localpointer.h"
20#include "unicode/normalizer2.h"
21#include "unicode/ucptrie.h"
22#include "unicode/unistr.h"
23#include "unicode/unorm.h"
24#include "cstring.h"
25#include "mutex.h"
26#include "norm2allmodes.h"
27#include "normalizer2impl.h"
28#include "uassert.h"
29#include "ucln_cmn.h"
30#include "uhash.h"
31
32U_NAMESPACE_BEGIN
33
34class LoadedNormalizer2Impl : public Normalizer2Impl {
35public:
36 LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {}
37 virtual ~LoadedNormalizer2Impl();
38
39 void load(const char *packageName, const char *name, UErrorCode &errorCode);
40
41private:
42 static UBool U_CALLCONV
43 isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
44
45 UDataMemory *memory;
46 UCPTrie *ownedTrie;
47};
48
49LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
50 udata_close(memory);
51 ucptrie_close(ownedTrie);
52}
53
54UBool U_CALLCONV
55LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
56 const char * /* type */, const char * /*name*/,
57 const UDataInfo *pInfo) {
58 if(
59 pInfo->size>=20 &&
60 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
61 pInfo->charsetFamily==U_CHARSET_FAMILY &&
62 pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
63 pInfo->dataFormat[1]==0x72 &&
64 pInfo->dataFormat[2]==0x6d &&
65 pInfo->dataFormat[3]==0x32 &&
66 pInfo->formatVersion[0]==4
67 ) {
68 // Normalizer2Impl *me=(Normalizer2Impl *)context;
69 // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
70 return true;
71 } else {
72 return false;
73 }
74}
75
76void
77LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
78 if(U_FAILURE(errorCode)) {
79 return;
80 }
81 memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
82 if(U_FAILURE(errorCode)) {
83 return;
84 }
85 const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
86 const int32_t *inIndexes=(const int32_t *)inBytes;
87 int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
88 if(indexesLength<=IX_MIN_LCCC_CP) {
89 errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
90 return;
91 }
92
93 int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
94 int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
95 ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
96 inBytes+offset, nextOffset-offset, nullptr,
97 &errorCode);
98 if(U_FAILURE(errorCode)) {
99 return;
100 }
101
102 offset=nextOffset;
103 nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
104 const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
105
106 // smallFCD: new in formatVersion 2
107 offset=nextOffset;
108 const uint8_t *inSmallFCD=inBytes+offset;
109
110 init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
111}
112
113// instance cache ---------------------------------------------------------- ***
114
115Norm2AllModes *
116Norm2AllModes::createInstance(const char *packageName,
117 const char *name,
118 UErrorCode &errorCode) {
119 if(U_FAILURE(errorCode)) {
120 return nullptr;
121 }
122 LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
123 if(impl==nullptr) {
124 errorCode=U_MEMORY_ALLOCATION_ERROR;
125 return nullptr;
126 }
127 impl->load(packageName, name, errorCode);
128 return createInstance(impl, errorCode);
129}
130
131U_CDECL_BEGIN
132static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
133U_CDECL_END
134
135#if !NORM2_HARDCODE_NFC_DATA
136static Norm2AllModes *nfcSingleton;
137static icu::UInitOnce nfcInitOnce {};
138#endif
139
140static Norm2AllModes *nfkcSingleton;
141static icu::UInitOnce nfkcInitOnce {};
142
143static Norm2AllModes *nfkc_cfSingleton;
144static icu::UInitOnce nfkc_cfInitOnce {};
145
146static UHashtable *cache=nullptr;
147
148// UInitOnce singleton initialization function
149static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
150#if !NORM2_HARDCODE_NFC_DATA
151 if (uprv_strcmp(what, "nfc") == 0) {
152 nfcSingleton = Norm2AllModes::createInstance(nullptr, "nfc", errorCode);
153 } else
154#endif
155 if (uprv_strcmp(what, "nfkc") == 0) {
156 nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
157 } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
158 nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
159 } else {
160 UPRV_UNREACHABLE_EXIT; // Unknown singleton
161 }
162 ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
163}
164
165U_CDECL_BEGIN
166
167static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
168 delete (Norm2AllModes *)allModes;
169}
170
171static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
172#if !NORM2_HARDCODE_NFC_DATA
173 delete nfcSingleton;
174 nfcSingleton = nullptr;
175 nfcInitOnce.reset();
176#endif
177
178 delete nfkcSingleton;
179 nfkcSingleton = nullptr;
180 nfkcInitOnce.reset();
181
182 delete nfkc_cfSingleton;
183 nfkc_cfSingleton = nullptr;
184 nfkc_cfInitOnce.reset();
185
186 uhash_close(cache);
187 cache=nullptr;
188 return true;
189}
190
191U_CDECL_END
192
193#if !NORM2_HARDCODE_NFC_DATA
194const Norm2AllModes *
195Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
196 if(U_FAILURE(errorCode)) { return nullptr; }
197 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
198 return nfcSingleton;
199}
200#endif
201
202const Norm2AllModes *
203Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
204 if(U_FAILURE(errorCode)) { return nullptr; }
205 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
206 return nfkcSingleton;
207}
208
209const Norm2AllModes *
210Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
211 if(U_FAILURE(errorCode)) { return nullptr; }
212 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
213 return nfkc_cfSingleton;
214}
215
216#if !NORM2_HARDCODE_NFC_DATA
217const Normalizer2 *
218Normalizer2::getNFCInstance(UErrorCode &errorCode) {
219 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
220 return allModes!=nullptr ? &allModes->comp : nullptr;
221}
222
223const Normalizer2 *
224Normalizer2::getNFDInstance(UErrorCode &errorCode) {
225 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
226 return allModes!=nullptr ? &allModes->decomp : nullptr;
227}
228
229const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
230 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
231 return allModes!=nullptr ? &allModes->fcd : nullptr;
232}
233
234const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
235 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
236 return allModes!=nullptr ? &allModes->fcc : nullptr;
237}
238
239const Normalizer2Impl *
240Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
241 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
242 return allModes!=nullptr ? allModes->impl : nullptr;
243}
244#endif
245
246const Normalizer2 *
247Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
248 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
249 return allModes!=nullptr ? &allModes->comp : nullptr;
250}
251
252const Normalizer2 *
253Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
254 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
255 return allModes!=nullptr ? &allModes->decomp : nullptr;
256}
257
258const Normalizer2 *
259Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
260 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
261 return allModes!=nullptr ? &allModes->comp : nullptr;
262}
263
264const Normalizer2 *
265Normalizer2::getInstance(const char *packageName,
266 const char *name,
267 UNormalization2Mode mode,
268 UErrorCode &errorCode) {
269 if(U_FAILURE(errorCode)) {
270 return nullptr;
271 }
272 if(name==nullptr || *name==0) {
273 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
274 return nullptr;
275 }
276 const Norm2AllModes *allModes=nullptr;
277 if(packageName==nullptr) {
278 if(0==uprv_strcmp(name, "nfc")) {
279 allModes=Norm2AllModes::getNFCInstance(errorCode);
280 } else if(0==uprv_strcmp(name, "nfkc")) {
281 allModes=Norm2AllModes::getNFKCInstance(errorCode);
282 } else if(0==uprv_strcmp(name, "nfkc_cf")) {
283 allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
284 }
285 }
286 if(allModes==nullptr && U_SUCCESS(errorCode)) {
287 {
288 Mutex lock;
289 if(cache!=nullptr) {
290 allModes=(Norm2AllModes *)uhash_get(cache, name);
291 }
292 }
293 if(allModes==nullptr) {
294 ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
295 LocalPointer<Norm2AllModes> localAllModes(
296 Norm2AllModes::createInstance(packageName, name, errorCode));
297 if(U_SUCCESS(errorCode)) {
298 Mutex lock;
299 if(cache==nullptr) {
300 cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);
301 if(U_FAILURE(errorCode)) {
302 return nullptr;
303 }
304 uhash_setKeyDeleter(cache, uprv_free);
305 uhash_setValueDeleter(cache, deleteNorm2AllModes);
306 }
307 void *temp=uhash_get(cache, name);
308 if(temp==nullptr) {
309 int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
310 char *nameCopy=(char *)uprv_malloc(keyLength);
311 if(nameCopy==nullptr) {
312 errorCode=U_MEMORY_ALLOCATION_ERROR;
313 return nullptr;
314 }
315 uprv_memcpy(nameCopy, name, keyLength);
316 allModes=localAllModes.getAlias();
317 uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
318 } else {
319 // race condition
320 allModes=(Norm2AllModes *)temp;
321 }
322 }
323 }
324 }
325 if(allModes!=nullptr && U_SUCCESS(errorCode)) {
326 switch(mode) {
327 case UNORM2_COMPOSE:
328 return &allModes->comp;
329 case UNORM2_DECOMPOSE:
330 return &allModes->decomp;
331 case UNORM2_FCD:
332 return &allModes->fcd;
333 case UNORM2_COMPOSE_CONTIGUOUS:
334 return &allModes->fcc;
335 default:
336 break; // do nothing
337 }
338 }
339 return nullptr;
340}
341
342const Normalizer2 *
343Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
344 if(U_FAILURE(errorCode)) {
345 return nullptr;
346 }
347 switch(mode) {
348 case UNORM_NFD:
349 return Normalizer2::getNFDInstance(errorCode);
350 case UNORM_NFKD:
351 return Normalizer2::getNFKDInstance(errorCode);
352 case UNORM_NFC:
353 return Normalizer2::getNFCInstance(errorCode);
354 case UNORM_NFKC:
355 return Normalizer2::getNFKCInstance(errorCode);
356 case UNORM_FCD:
357 return getFCDInstance(errorCode);
358 default: // UNORM_NONE
359 return getNoopInstance(errorCode);
360 }
361}
362
363const Normalizer2Impl *
364Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
365 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
366 return allModes!=nullptr ? allModes->impl : nullptr;
367}
368
369const Normalizer2Impl *
370Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
371 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
372 return allModes!=nullptr ? allModes->impl : nullptr;
373}
374
375U_NAMESPACE_END
376
377// C API ------------------------------------------------------------------- ***
378
379U_NAMESPACE_USE
380
381U_CAPI const UNormalizer2 * U_EXPORT2
382unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
383 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
384}
385
386U_CAPI const UNormalizer2 * U_EXPORT2
387unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
388 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
389}
390
391U_CAPI const UNormalizer2 * U_EXPORT2
392unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
393 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
394}
395
396U_CAPI const UNormalizer2 * U_EXPORT2
397unorm2_getInstance(const char *packageName,
398 const char *name,
399 UNormalization2Mode mode,
400 UErrorCode *pErrorCode) {
401 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
402}
403
404U_CFUNC UNormalizationCheckResult
405unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
406 if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
407 return UNORM_YES;
408 }
409 UErrorCode errorCode=U_ZERO_ERROR;
410 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
411 if(U_SUCCESS(errorCode)) {
412 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
413 } else {
414 return UNORM_MAYBE;
415 }
416}
417
418#endif // !UCONFIG_NO_NORMALIZATION
419