transreg.cpp source code [ClickHouse/contrib/icu/icu4c/source/i18n/transreg.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (c) 2001-2014, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	* Date Name Description
9	* 08/10/2001 aliu Creation.
10	**********************************************************************
11	*/
12
13	#include "unicode/utypes.h"
14
15	#if !UCONFIG_NO_TRANSLITERATION
16
17	#include "unicode/translit.h"
18	#include "unicode/resbund.h"
19	#include "unicode/uniset.h"
20	#include "unicode/uscript.h"
21	#include "rbt.h"
22	#include "cpdtrans.h"
23	#include "nultrans.h"
24	#include "transreg.h"
25	#include "rbt_data.h"
26	#include "rbt_pars.h"
27	#include "tridpars.h"
28	#include "charstr.h"
29	#include "uassert.h"
30	#include "locutil.h"
31
32	// Enable the following symbol to add debugging code that tracks the
33	// allocation, deletion, and use of Entry objects. BoundsChecker has
34	// reported dangling pointer errors with these objects, but I have
35	// been unable to confirm them. I suspect BoundsChecker is getting
36	// confused with pointers going into and coming out of a UHashtable,
37	// despite the hinting code that is designed to help it.
38	// #define DEBUG_MEM
39	#ifdef DEBUG_MEM
40	#include <stdio.h>
41	#endif
42
43	// UChar constants
44	static const UChar LOCALE_SEP = `95`; // '_'
45	//static const UChar ID_SEP = 0x002D; /-/
46	//static const UChar VARIANT_SEP = 0x002F; // '/'
47
48	// String constants
49	static const UChar ANY[] = { `0x41`, `0x6E`, `0x79`, `0` }; // Any
50	static const UChar LAT[] = { `0x4C`, `0x61`, `0x74`, `0` }; // Lat
51
52	// empty string
53	#define NO_VARIANT UnicodeString ()
54
55	// initial estimate for specDAG size
56	// ICU 60 Transliterator::countAvailableSources()
57	#define SPECDAG_INIT_SIZE 149
58
59	// initial estimate for number of variant names
60	#define VARIANT_LIST_INIT_SIZE 11
61	#define VARIANT_LIST_MAX_SIZE 31
62
63	// initial estimate for availableIDs count (default estimate is 8 => multiple reallocs)
64	// ICU 60 Transliterator::countAvailableIDs()
65	#define AVAILABLE_IDS_INIT_SIZE 641
66
67	// initial estimate for number of targets for source "Any", "Lat"
68	// ICU 60 Transliterator::countAvailableTargets("Any")/("Latn")
69	#define ANY_TARGETS_INIT_SIZE 125
70	#define LAT_TARGETS_INIT_SIZE 23
71
72	/**
73	* Resource bundle key for the RuleBasedTransliterator rule.
74	*/
75	//static const char RB_RULE[] = "Rule";
76
77	U_NAMESPACE_BEGIN
78
79	//------------------------------------------------------------------
80	// Alias
81	//------------------------------------------------------------------
82
83	TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID,
84	const UnicodeSet* cpdFilter) :
85	ID (),
86	aliasesOrRules (theAliasID),
87	transes(`0`),
88	compoundFilter(cpdFilter),
89	direction(UTRANS_FORWARD),
90	type(TransliteratorAlias::SIMPLE) {
91	}
92
93	TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
94	const UnicodeString& idBlocks,
95	UVector* adoptedTransliterators,
96	const UnicodeSet* cpdFilter) :
97	ID (theID),
98	aliasesOrRules (idBlocks),
99	transes(adoptedTransliterators),
100	compoundFilter(cpdFilter),
101	direction(UTRANS_FORWARD),
102	type(TransliteratorAlias::COMPOUND) {
103	}
104
105	TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
106	const UnicodeString& rules,
107	UTransDirection dir) :
108	ID (theID),
109	aliasesOrRules (rules),
110	transes(`0`),
111	compoundFilter(`0`),
112	direction(dir),
113	type(TransliteratorAlias::RULES) {
114	}
115
116	TransliteratorAlias::~TransliteratorAlias() {
117	delete transes;
118	}
119
120
121	Transliterator* TransliteratorAlias::create(UParseError& pe,
122	UErrorCode& ec) {
123	if (U_FAILURE(ec)) {
124	return `0`;
125	}
126	Transliterator *t = NULL;
127	switch (type) {
128	case SIMPLE:
129	t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec);
130	if(U_FAILURE(ec)){
131	return `0`;
132	}
133	if (compoundFilter != `0`)
134	t->adoptFilter(compoundFilter->clone());
135	break;
136	case COMPOUND:
137	{
138	// the total number of transliterators in the compound is the total number of anonymous transliterators
139	// plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID
140	// block and that each pair anonymous transliterators has an ID block between them. Then we go back
141	// to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which
142	// marks the position where an anonymous transliterator goes) and adjust accordingly
143	int32_t anonymousRBTs = transes->size();
144	int32_t transCount = anonymousRBTs * `2` + `1`;
145	if (!aliasesOrRules.isEmpty() && aliasesOrRules [`0`] == (UChar)(`0xffff`))
146	--transCount;
147	if (aliasesOrRules.length() >= `2` && aliasesOrRules [aliasesOrRules.length() - `1`] == (UChar)(`0xffff`))
148	--transCount;
149	UnicodeString noIDBlock((UChar)(`0xffff`));
150	noIDBlock += ((UChar)(`0xffff`));
151	int32_t pos = aliasesOrRules.indexOf(noIDBlock);
152	while (pos >= `0`) {
153	--transCount;
154	pos = aliasesOrRules.indexOf(noIDBlock, pos + `1`);
155	}
156
157	UVector transliterators(ec);
158	UnicodeString idBlock;
159	int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(`0xffff`));
160	while (blockSeparatorPos >= `0`) {
161	aliasesOrRules.extract(`0`, blockSeparatorPos, idBlock);
162	aliasesOrRules.remove(`0`, blockSeparatorPos + `1`);
163	if (!idBlock.isEmpty())
164	transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec);
165	if (!transes->isEmpty())
166	transliterators.addElement(transes->orphanElementAt(`0`), ec);
167	blockSeparatorPos = aliasesOrRules.indexOf((UChar)(`0xffff`));
168	}
169	if (!aliasesOrRules.isEmpty())
170	transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec);
171	while (!transes->isEmpty())
172	transliterators.addElement(transes->orphanElementAt(`0`), ec);
173
174	if (U_SUCCESS(ec)) {
175	t = new CompoundTransliterator (ID, transliterators,
176	(compoundFilter ? compoundFilter->clone() : nullptr),
177	anonymousRBTs, pe, ec);
178	if (t == `0`) {
179	ec = U_MEMORY_ALLOCATION_ERROR;
180	return `0`;
181	}
182	} else {
183	for (int32_t i = `0`; i < transliterators.size(); i++)
184	delete (Transliterator*)(transliterators.elementAt(i));
185	}
186	}
187	break;
188	case RULES:
189	UPRV_UNREACHABLE; // don't call create() if isRuleBased() returns TRUE!
190	}
191	return t;
192	}
193
194	UBool TransliteratorAlias::isRuleBased() const {
195	return type == RULES;
196	}
197
198	void TransliteratorAlias::parse(TransliteratorParser& parser,
199	UParseError& pe, UErrorCode& ec) const {
200	U_ASSERT(type == RULES);
201	if (U_FAILURE(ec)) {
202	return;
203	}
204
205	parser.parse(aliasesOrRules, direction, pe, ec);
206	}
207
208	//----------------------------------------------------------------------
209	// class TransliteratorSpec
210	//----------------------------------------------------------------------
211
212	/**
213	* A TransliteratorSpec is a string specifying either a source or a target. In more
214	* general terms, it may also specify a variant, but we only use the
215	* Spec class for sources and targets.
216	*
217	* A Spec may be a locale or a script. If it is a locale, it has a
218	* fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where
219	* ssss is the script mapping of xx_YY_ZZZ. The Spec API methods
220	* hasFallback(), next(), and reset() iterate over this fallback
221	* sequence.
222	*
223	* The Spec class canonicalizes itself, so the locale is put into
224	* canonical form, or the script is transformed from an abbreviation
225	* to a full name.
226	*/
227	class TransliteratorSpec : public UMemory {
228	public:
229	TransliteratorSpec(const UnicodeString& spec);
230	~TransliteratorSpec();
231
232	const UnicodeString& get() const;
233	UBool hasFallback() const;
234	const UnicodeString& next();
235	void reset();
236
237	UBool isLocale() const;
238	ResourceBundle& getBundle() const;
239
240	operator const UnicodeString&() const { return get(); }
241	const UnicodeString& getTop() const { return top; }
242
243	private:
244	void setupNext();
245
246	UnicodeString top;
247	UnicodeString spec;
248	UnicodeString nextSpec;
249	UnicodeString scriptName;
250	UBool isSpecLocale; // TRUE if spec is a locale
251	UBool isNextLocale; // TRUE if nextSpec is a locale
252	ResourceBundle* res;
253
254	TransliteratorSpec(const TransliteratorSpec &other); // forbid copying of this class
255	TransliteratorSpec &operator=(const TransliteratorSpec &other); // forbid copying of this class
256	};
257
258	TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec)
259	: top (theSpec),
260	res(`0`)
261	{
262	UErrorCode status = U_ZERO_ERROR;
263	Locale topLoc("");
264	LocaleUtility::initLocaleFromName(theSpec, topLoc);
265	if (!topLoc.isBogus()) {
266	res = new ResourceBundle (U_ICUDATA_TRANSLIT, topLoc, status);
267	/ test for NULL /
268	if (res == `0`) {
269	return;
270	}
271	if (U_FAILURE(status) \|\| status == U_USING_DEFAULT_WARNING) {
272	delete res;
273	res = `0`;
274	}
275	}
276
277	// Canonicalize script name -or- do locale->script mapping
278	status = U_ZERO_ERROR;
279	static const int32_t capacity = `10`;
280	UScriptCode script[capacity]={USCRIPT_INVALID_CODE};
281	int32_t num = uscript_getCode(CharString ().appendInvariantChars(theSpec, status).data(),
282	script, capacity, &status);
283	if (num > `0` && script[`0`] != USCRIPT_INVALID_CODE) {
284	scriptName = UnicodeString (uscript_getName(script[`0`]), -`1`, US_INV);
285	}
286
287	// Canonicalize top
288	if (res != `0`) {
289	// Canonicalize locale name
290	UnicodeString locStr;
291	LocaleUtility::initNameFromLocale(topLoc, locStr);
292	if (!locStr.isBogus()) {
293	top = locStr;
294	}
295	} else if (scriptName.length() != `0`) {
296	// We are a script; use canonical name
297	top = scriptName;
298	}
299
300	// assert(spec != top);
301	reset();
302	}
303
304	TransliteratorSpec::~TransliteratorSpec() {
305	delete res;
306	}
307
308	UBool TransliteratorSpec::hasFallback() const {
309	return nextSpec.length() != `0`;
310	}
311
312	void TransliteratorSpec::reset() {
313	if (spec != top) {
314	spec = top;
315	isSpecLocale = (res != `0`);
316	setupNext();
317	}
318	}
319
320	void TransliteratorSpec::setupNext() {
321	isNextLocale = FALSE;
322	if (isSpecLocale) {
323	nextSpec = spec;
324	int32_t i = nextSpec.lastIndexOf(LOCALE_SEP);
325	// If i == 0 then we have _FOO, so we fall through
326	// to the scriptName.
327	if (i > `0`) {
328	nextSpec.truncate(i);
329	isNextLocale = TRUE;
330	} else {
331	nextSpec = scriptName; // scriptName may be empty
332	}
333	} else {
334	// spec is a script, so we are at the end
335	nextSpec.truncate(`0`);
336	}
337	}
338
339	// Protocol:
340	// for(const UnicodeString& s(spec.get());
341	// spec.hasFallback(); s(spec.next())) { ...
342
343	const UnicodeString& TransliteratorSpec::next() {
344	spec = nextSpec;
345	isSpecLocale = isNextLocale;
346	setupNext();
347	return spec;
348	}
349
350	const UnicodeString& TransliteratorSpec::get() const {
351	return spec;
352	}
353
354	UBool TransliteratorSpec::isLocale() const {
355	return isSpecLocale;
356	}
357
358	ResourceBundle& TransliteratorSpec::getBundle() const {
359	return *res;
360	}
361
362	//----------------------------------------------------------------------
363
364	#ifdef DEBUG_MEM
365
366	// Vector of Entry pointers currently in use
367	static UVector* DEBUG_entries = NULL;
368
369	static void DEBUG_setup() {
370	if (DEBUG_entries == NULL) {
371	UErrorCode ec = U_ZERO_ERROR;
372	DEBUG_entries = new UVector(ec);
373	}
374	}
375
376	// Caller must call DEBUG_setup first. Return index of given Entry,
377	// if it is in use (not deleted yet), or -1 if not found.
378	static int DEBUG_findEntry(TransliteratorEntry* e) {
379	for (int i=`0`; i<DEBUG_entries->size(); ++i) {
380	if (e == (TransliteratorEntry*) DEBUG_entries->elementAt(i)) {
381	return i;
382	}
383	}
384	return -`1`;
385	}
386
387	// Track object creation
388	static void DEBUG_newEntry(TransliteratorEntry* e) {
389	DEBUG_setup();
390	if (DEBUG_findEntry(e) >= `0`) {
391	// This should really never happen unless the heap is broken
392	printf("ERROR DEBUG_newEntry duplicate new pointer %08X\n", e);
393	return;
394	}
395	UErrorCode ec = U_ZERO_ERROR;
396	DEBUG_entries->addElement(e, ec);
397	}
398
399	// Track object deletion
400	static void DEBUG_delEntry(TransliteratorEntry* e) {
401	DEBUG_setup();
402	int i = DEBUG_findEntry(e);
403	if (i < `0`) {
404	printf("ERROR DEBUG_delEntry possible double deletion %08X\n", e);
405	return;
406	}
407	DEBUG_entries->removeElementAt(i);
408	}
409
410	// Track object usage
411	static void DEBUG_useEntry(TransliteratorEntry* e) {
412	if (e == NULL) return;
413	DEBUG_setup();
414	int i = DEBUG_findEntry(e);
415	if (i < `0`) {
416	printf("ERROR DEBUG_useEntry possible dangling pointer %08X\n", e);
417	}
418	}
419
420	#else
421	// If we're not debugging then make these macros into NOPs
422	#define DEBUG_newEntry(x)
423	#define DEBUG_delEntry(x)
424	#define DEBUG_useEntry(x)
425	#endif
426
427	//----------------------------------------------------------------------
428	// class Entry
429	//----------------------------------------------------------------------
430
431	/**
432	* The Entry object stores objects of different types and
433	* singleton objects as placeholders for rule-based transliterators to
434	* be built as needed. Instances of this struct can be placeholders,
435	* can represent prototype transliterators to be cloned, or can
436	* represent TransliteratorData objects. We don't support storing
437	* classes in the registry because we don't have the rtti infrastructure
438	* for it. We could easily add this if there is a need for it in the
439	* future.
440	*/
441	class TransliteratorEntry : public UMemory {
442	public:
443	enum Type {
444	RULES_FORWARD,
445	RULES_REVERSE,
446	LOCALE_RULES,
447	PROTOTYPE,
448	RBT_DATA,
449	COMPOUND_RBT,
450	ALIAS,
451	FACTORY,
452	NONE // Only used for uninitialized entries
453	} entryType;
454	// NOTE: stringArg cannot go inside the union because
455	// it has a copy constructor
456	UnicodeString stringArg; // For RULES_, ALIAS, COMPOUND_RBT*
457	int32_t intArg; // For COMPOUND_RBT, LOCALE_RULES
458	UnicodeSet* compoundFilter; // For COMPOUND_RBT
459	union {
460	Transliterator* prototype; // For PROTOTYPE
461	TransliterationRuleData* data; // For RBT_DATA
462	UVector* dataVector; // For COMPOUND_RBT
463	struct {
464	Transliterator::Factory function;
465	Transliterator::Token context;
466	} factory; // For FACTORY
467	} u;
468	TransliteratorEntry();
469	~TransliteratorEntry();
470	void adoptPrototype(Transliterator* adopted);
471	void setFactory(Transliterator::Factory factory,
472	Transliterator::Token context);
473
474	private:
475
476	TransliteratorEntry(const TransliteratorEntry &other); // forbid copying of this class
477	TransliteratorEntry &operator=(const TransliteratorEntry &other); // forbid copying of this class
478	};
479
480	TransliteratorEntry::TransliteratorEntry() {
481	u.prototype = `0`;
482	compoundFilter = NULL;
483	entryType = NONE;
484	DEBUG_newEntry(this);
485	}
486
487	TransliteratorEntry::~TransliteratorEntry() {
488	DEBUG_delEntry(this);
489	if (entryType == PROTOTYPE) {
490	delete u.prototype;
491	} else if (entryType == RBT_DATA) {
492	// The data object is shared between instances of RBT. The
493	// entry object owns it. It should only be deleted when the
494	// transliterator component is being cleaned up. Doing so
495	// invalidates any RBTs that the user has instantiated.
496	delete u.data;
497	} else if (entryType == COMPOUND_RBT) {
498	while (u.dataVector != NULL && !u.dataVector->isEmpty())
499	delete (TransliterationRuleData*)u.dataVector->orphanElementAt(`0`);
500	delete u.dataVector;
501	}
502	delete compoundFilter;
503	}
504
505	void TransliteratorEntry::adoptPrototype(Transliterator* adopted) {
506	if (entryType == PROTOTYPE) {
507	delete u.prototype;
508	}
509	entryType = PROTOTYPE;
510	u.prototype = adopted;
511	}
512
513	void TransliteratorEntry::setFactory(Transliterator::Factory factory,
514	Transliterator::Token context) {
515	if (entryType == PROTOTYPE) {
516	delete u.prototype;
517	}
518	entryType = FACTORY;
519	u.factory.function = factory;
520	u.factory.context = context;
521	}
522
523	// UObjectDeleter for Hashtable::setValueDeleter
524	U_CDECL_BEGIN
525	static void U_CALLCONV
526	deleteEntry(void* obj) {
527	delete (TransliteratorEntry*) obj;
528	}
529	U_CDECL_END
530
531	//----------------------------------------------------------------------
532	// class TransliteratorRegistry: Basic public API
533	//----------------------------------------------------------------------
534
535	TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
536	registry (TRUE, status),
537	specDAG (TRUE, SPECDAG_INIT_SIZE, status),
538	variantList (VARIANT_LIST_INIT_SIZE, status),
539	availableIDs (AVAILABLE_IDS_INIT_SIZE, status)
540	{
541	registry.setValueDeleter(deleteEntry);
542	variantList.setDeleter(uprv_deleteUObject);
543	variantList.setComparer(uhash_compareCaselessUnicodeString);
544	UnicodeString emptyString = new* UnicodeString ();
545	if (emptyString != NULL) {
546	variantList.addElement(emptyString, status);
547	}
548	availableIDs.setDeleter(uprv_deleteUObject);
549	availableIDs.setComparer(uhash_compareCaselessUnicodeString);
550	specDAG.setValueDeleter(uhash_deleteHashtable);
551	}
552
553	TransliteratorRegistry::~TransliteratorRegistry() {
554	// Through the magic of C++, everything cleans itself up
555	}
556
557	Transliterator* TransliteratorRegistry::get(const UnicodeString& ID,
558	TransliteratorAlias*& aliasReturn,
559	UErrorCode& status) {
560	U_ASSERT(aliasReturn == NULL);
561	TransliteratorEntry *entry = find(ID);
562	return (entry == `0`) ? `0`
563	: instantiateEntry(ID, entry, aliasReturn, status);
564	}
565
566	Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
567	TransliteratorParser& parser,
568	TransliteratorAlias*& aliasReturn,
569	UErrorCode& status) {
570	U_ASSERT(aliasReturn == NULL);
571	TransliteratorEntry *entry = find(ID);
572
573	if (entry == `0`) {
574	// We get to this point if there are two threads, one of which
575	// is instantiating an ID, and another of which is removing
576	// the same ID from the registry, and the timing is just right.
577	return `0`;
578	}
579
580	// The usage model for the caller is that they will first call
581	// reg->get() inside the mutex, they'll get back an alias, they call
582	// alias->isRuleBased(), and if they get TRUE, they call alias->parse()
583	// outside the mutex, then reg->reget() inside the mutex again. A real
584	// mess, but it gets things working for ICU 3.0. [alan].
585
586	// Note: It's possible that in between the caller calling
587	// alias->parse() and reg->reget(), that another thread will have
588	// called reg->reget(), and the entry will already have been fixed up.
589	// We have to detect this so we don't stomp over existing entry
590	// data members and potentially leak memory (u.data and compoundFilter).
591
592	if (entry->entryType == TransliteratorEntry::RULES_FORWARD \|\|
593	entry->entryType == TransliteratorEntry::RULES_REVERSE \|\|
594	entry->entryType == TransliteratorEntry::LOCALE_RULES) {
595
596	if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) {
597	entry->u.data = `0`;
598	entry->entryType = TransliteratorEntry::ALIAS;
599	entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL");
600	}
601	else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == `1`) {
602	entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(`0`);
603	entry->entryType = TransliteratorEntry::RBT_DATA;
604	}
605	else if (parser.idBlockVector.size() == `1` && parser.dataVector.isEmpty()) {
606	entry->stringArg = (UnicodeString)(parser.idBlockVector.elementAt(`0`));
607	entry->compoundFilter = parser.orphanCompoundFilter();
608	entry->entryType = TransliteratorEntry::ALIAS;
609	}
610	else {
611	entry->entryType = TransliteratorEntry::COMPOUND_RBT;
612	entry->compoundFilter = parser.orphanCompoundFilter();
613	entry->u.dataVector = new UVector (status);
614	entry->stringArg.remove();
615
616	int32_t limit = parser.idBlockVector.size();
617	if (parser.dataVector.size() > limit)
618	limit = parser.dataVector.size();
619
620	for (int32_t i = `0`; i < limit; i++) {
621	if (i < parser.idBlockVector.size()) {
622	UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
623	if (!idBlock->isEmpty())
624	entry->stringArg += *idBlock;
625	}
626	if (!parser.dataVector.isEmpty()) {
627	TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(`0`);
628	entry->u.dataVector->addElement(data, status);
629	entry->stringArg += (UChar)`0xffff`; // use U+FFFF to mark position of RBTs in ID block
630	}
631	}
632	}
633	}
634
635	Transliterator *t =
636	instantiateEntry(ID, entry, aliasReturn, status);
637	return t;
638	}
639
640	void TransliteratorRegistry::put(Transliterator* adoptedProto,
641	UBool visible,
642	UErrorCode& ec)
643	{
644	TransliteratorEntry entry = new* TransliteratorEntry ();
645	if (entry == NULL) {
646	ec = U_MEMORY_ALLOCATION_ERROR;
647	return;
648	}
649	entry->adoptPrototype(adoptedProto);
650	registerEntry(adoptedProto->getID(), entry, visible);
651	}
652
653	void TransliteratorRegistry::put(const UnicodeString& ID,
654	Transliterator::Factory factory,
655	Transliterator::Token context,
656	UBool visible,
657	UErrorCode& ec) {
658	TransliteratorEntry entry = new* TransliteratorEntry ();
659	if (entry == NULL) {
660	ec = U_MEMORY_ALLOCATION_ERROR;
661	return;
662	}
663	entry->setFactory(factory, context);
664	registerEntry(ID, entry, visible);
665	}
666
667	void TransliteratorRegistry::put(const UnicodeString& ID,
668	const UnicodeString& resourceName,
669	UTransDirection dir,
670	UBool readonlyResourceAlias,
671	UBool visible,
672	UErrorCode& ec) {
673	TransliteratorEntry entry = new* TransliteratorEntry ();
674	if (entry == NULL) {
675	ec = U_MEMORY_ALLOCATION_ERROR;
676	return;
677	}
678	entry->entryType = (dir == UTRANS_FORWARD) ? TransliteratorEntry::RULES_FORWARD
679	: TransliteratorEntry::RULES_REVERSE;
680	if (readonlyResourceAlias) {
681	entry->stringArg.setTo(TRUE, resourceName.getBuffer(), -`1`);
682	}
683	else {
684	entry->stringArg = resourceName;
685	}
686	registerEntry(ID, entry, visible);
687	}
688
689	void TransliteratorRegistry::put(const UnicodeString& ID,
690	const UnicodeString& alias,
691	UBool readonlyAliasAlias,
692	UBool visible,
693	UErrorCode& /ec/) {
694	TransliteratorEntry entry = new* TransliteratorEntry ();
695	// Null pointer check
696	if (entry != NULL) {
697	entry->entryType = TransliteratorEntry::ALIAS;
698	if (readonlyAliasAlias) {
699	entry->stringArg.setTo(TRUE, alias.getBuffer(), -`1`);
700	}
701	else {
702	entry->stringArg = alias;
703	}
704	registerEntry(ID, entry, visible);
705	}
706	}
707
708	void TransliteratorRegistry::remove(const UnicodeString& ID) {
709	UnicodeString source, target, variant;
710	UBool sawSource;
711	TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
712	// Only need to do this if ID.indexOf('-') < 0
713	UnicodeString id;
714	TransliteratorIDParser::STVtoID(source, target, variant, id);
715	registry.remove(id);
716	removeSTV(source, target, variant);
717	availableIDs.removeElement((void*) &id);
718	}
719
720	//----------------------------------------------------------------------
721	// class TransliteratorRegistry: Public ID and spec management
722	//----------------------------------------------------------------------
723
724	/**
725	* == OBSOLETE - remove in ICU 3.4 ==
726	* Return the number of IDs currently registered with the system.
727	* To retrieve the actual IDs, call getAvailableID(i) with
728	* i from 0 to countAvailableIDs() - 1.
729	*/
730	int32_t TransliteratorRegistry::countAvailableIDs(void) const {
731	return availableIDs.size();
732	}
733
734	/**
735	* == OBSOLETE - remove in ICU 3.4 ==
736	* Return the index-th available ID. index must be between 0
737	* and countAvailableIDs() - 1, inclusive. If index is out of
738	* range, the result of getAvailableID(0) is returned.
739	*/
740	const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const {
741	if (index < `0` \|\| index >= availableIDs.size()) {
742	index = `0`;
743	}
744	return (const* UnicodeString*) availableIDs [index];
745	}
746
747	StringEnumeration* TransliteratorRegistry::getAvailableIDs() const {
748	return new Enumeration (*this);
749	}
750
751	int32_t TransliteratorRegistry::countAvailableSources(void) const {
752	return specDAG.count();
753	}
754
755	UnicodeString& TransliteratorRegistry::getAvailableSource(int32_t index,
756	UnicodeString& result) const {
757	int32_t pos = UHASH_FIRST;
758	const UHashElement *e = `0`;
759	while (index-- >= `0`) {
760	e = specDAG.nextElement(pos);
761	if (e == `0`) {
762	break;
763	}
764	}
765	if (e == `0`) {
766	result.truncate(`0`);
767	} else {
768	result = (UnicodeString) e->key.pointer;
769	}
770	return result;
771	}
772
773	int32_t TransliteratorRegistry::countAvailableTargets(const UnicodeString& source) const {
774	Hashtable targets = (Hashtable) specDAG.get(source);
775	return (targets == `0`) ? `0` : targets->count();
776	}
777
778	UnicodeString& TransliteratorRegistry::getAvailableTarget(int32_t index,
779	const UnicodeString& source,
780	UnicodeString& result) const {
781	Hashtable targets = (Hashtable) specDAG.get(source);
782	if (targets == `0`) {
783	result.truncate(`0`); // invalid source
784	return result;
785	}
786	int32_t pos = UHASH_FIRST;
787	const UHashElement *e = `0`;
788	while (index-- >= `0`) {
789	e = targets->nextElement(pos);
790	if (e == `0`) {
791	break;
792	}
793	}
794	if (e == `0`) {
795	result.truncate(`0`); // invalid index
796	} else {
797	result = (UnicodeString) e->key.pointer;
798	}
799	return result;
800	}
801
802	int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& source,
803	const UnicodeString& target) const {
804	Hashtable targets = (Hashtable) specDAG.get(source);
805	if (targets == `0`) {
806	return `0`;
807	}
808	uint32_t varMask = targets->geti(target);
809	int32_t varCount = `0`;
810	while (varMask > `0`) {
811	if (varMask & `1`) {
812	varCount++;
813	}
814	varMask >>= `1`;
815	}
816	return varCount;
817	}
818
819	UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
820	const UnicodeString& source,
821	const UnicodeString& target,
822	UnicodeString& result) const {
823	Hashtable targets = (Hashtable) specDAG.get(source);
824	if (targets == `0`) {
825	result.truncate(`0`); // invalid source
826	return result;
827	}
828	uint32_t varMask = targets->geti(target);
829	int32_t varCount = `0`;
830	int32_t varListIndex = `0`;
831	while (varMask > `0`) {
832	if (varMask & `1`) {
833	if (varCount == index) {
834	UnicodeString v = (UnicodeString) variantList.elementAt(varListIndex);
835	if (v != NULL) {
836	result = *v;
837	return result;
838	}
839	break;
840	}
841	varCount++;
842	}
843	varMask >>= `1`;
844	varListIndex++;
845	}
846	result.truncate(`0`); // invalid target or index
847	return result;
848	}
849
850	//----------------------------------------------------------------------
851	// class TransliteratorRegistry::Enumeration
852	//----------------------------------------------------------------------
853
854	TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) :
855	index(`0`), reg(_reg) {
856	}
857
858	TransliteratorRegistry::Enumeration::~Enumeration() {
859	}
860
861	int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /status/) const {
862	return reg.availableIDs.size();
863	}
864
865	const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) {
866	// This is sloppy but safe -- if we get out of sync with the underlying
867	// registry, we will still return legal strings, but they might not
868	// correspond to the snapshot at construction time. So there could be
869	// duplicate IDs or omitted IDs if insertions or deletions occur in one
870	// thread while another is iterating. To be more rigorous, add a timestamp,
871	// which is incremented with any modification, and validate this iterator
872	// against the timestamp at construction time. This probably isn't worth
873	// doing as long as there is some possibility of removing this code in favor
874	// of some new code based on Doug's service framework.
875	if (U_FAILURE(status)) {
876	return NULL;
877	}
878	int32_t n = reg.availableIDs.size();
879	if (index > n) {
880	status = U_ENUM_OUT_OF_SYNC_ERROR;
881	}
882	// index == n is okay -- this means we've reached the end
883	if (index < n) {
884	// Copy the string! This avoids lifetime problems.
885	unistr = (const* UnicodeString*)reg.availableIDs [index++];
886	return &unistr;
887	} else {
888	return NULL;
889	}
890	}
891
892	void TransliteratorRegistry::Enumeration::reset(UErrorCode& /status/) {
893	index = `0`;
894	}
895
896	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration)
897
898	//----------------------------------------------------------------------
899	// class TransliteratorRegistry: internal
900	//----------------------------------------------------------------------
901
902	/**
903	* Convenience method. Calls 6-arg registerEntry().
904	*/
905	void TransliteratorRegistry::registerEntry(const UnicodeString& source,
906	const UnicodeString& target,
907	const UnicodeString& variant,
908	TransliteratorEntry* adopted,
909	UBool visible) {
910	UnicodeString ID;
911	UnicodeString s(source);
912	if (s.length() == `0`) {
913	s.setTo(TRUE, ANY, `3`);
914	}
915	TransliteratorIDParser::STVtoID(source, target, variant, ID);
916	registerEntry(ID, s, target, variant, adopted, visible);
917	}
918
919	/**
920	* Convenience method. Calls 6-arg registerEntry().
921	*/
922	void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
923	TransliteratorEntry* adopted,
924	UBool visible) {
925	UnicodeString source, target, variant;
926	UBool sawSource;
927	TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
928	// Only need to do this if ID.indexOf('-') < 0
929	UnicodeString id;
930	TransliteratorIDParser::STVtoID(source, target, variant, id);
931	registerEntry(id, source, target, variant, adopted, visible);
932	}
933
934	/**
935	* Register an entry object (adopted) with the given ID, source,
936	* target, and variant strings.
937	*/
938	void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
939	const UnicodeString& source,
940	const UnicodeString& target,
941	const UnicodeString& variant,
942	TransliteratorEntry* adopted,
943	UBool visible) {
944	UErrorCode status = U_ZERO_ERROR;
945	registry.put(ID, adopted, status);
946	if (visible) {
947	registerSTV(source, target, variant);
948	if (!availableIDs.contains((void*) &ID)) {
949	UnicodeString *newID = ID.clone();
950	// Check to make sure newID was created.
951	if (newID != NULL) {
952	// NUL-terminate the ID string
953	newID->getTerminatedBuffer();
954	availableIDs.addElement(newID, status);
955	}
956	}
957	} else {
958	removeSTV(source, target, variant);
959	availableIDs.removeElement((void*) &ID);
960	}
961	}
962
963	/**
964	* Register a source-target/variant in the specDAG. Variant may be
965	* empty, but source and target must not be.
966	*/
967	void TransliteratorRegistry::registerSTV(const UnicodeString& source,
968	const UnicodeString& target,
969	const UnicodeString& variant) {
970	// assert(source.length() > 0);
971	// assert(target.length() > 0);
972	UErrorCode status = U_ZERO_ERROR;
973	Hashtable targets = (Hashtable) specDAG.get(source);
974	if (targets == `0`) {
975	int32_t size = `3`;
976	if (source.compare(ANY,`3`) == `0`) {
977	size = ANY_TARGETS_INIT_SIZE;
978	} else if (source.compare(LAT,`3`) == `0`) {
979	size = LAT_TARGETS_INIT_SIZE;
980	}
981	targets = new Hashtable (TRUE, size, status);
982	if (U_FAILURE(status) \|\| targets == NULL) {
983	return;
984	}
985	specDAG.put(source, targets, status);
986	}
987	int32_t variantListIndex = variantList.indexOf((void*) &variant, `0`);
988	if (variantListIndex < `0`) {
989	if (variantList.size() >= VARIANT_LIST_MAX_SIZE) {
990	// can't handle any more variants
991	return;
992	}
993	UnicodeString variantEntry = new* UnicodeString (variant);
994	if (variantEntry != NULL) {
995	variantList.addElement(variantEntry, status);
996	if (U_SUCCESS(status)) {
997	variantListIndex = variantList.size() - `1`;
998	}
999	}
1000	if (variantListIndex < `0`) {
1001	return;
1002	}
1003	}
1004	uint32_t addMask = `1` << variantListIndex;
1005	uint32_t varMask = targets->geti(target);
1006	targets->puti(target, varMask \| addMask, status);
1007	}
1008
1009	/**
1010	* Remove a source-target/variant from the specDAG.
1011	*/
1012	void TransliteratorRegistry::removeSTV(const UnicodeString& source,
1013	const UnicodeString& target,
1014	const UnicodeString& variant) {
1015	// assert(source.length() > 0);
1016	// assert(target.length() > 0);
1017	UErrorCode status = U_ZERO_ERROR;
1018	Hashtable targets = (Hashtable) specDAG.get(source);
1019	if (targets == NULL) {
1020	return; // should never happen for valid s-t/v
1021	}
1022	uint32_t varMask = targets->geti(target);
1023	if (varMask == `0`) {
1024	return; // should never happen for valid s-t/v
1025	}
1026	int32_t variantListIndex = variantList.indexOf((void*) &variant, `0`);
1027	if (variantListIndex < `0`) {
1028	return; // should never happen for valid s-t/v
1029	}
1030	int32_t remMask = `1` << variantListIndex;
1031	varMask &= (~remMask);
1032	if (varMask != `0`) {
1033	targets->puti(target, varMask, status);
1034	} else {
1035	targets->remove(target); // should delete variants
1036	if (targets->count() == `0`) {
1037	specDAG.remove(source); // should delete targets
1038	}
1039	}
1040	}
1041
1042	/**
1043	* Attempt to find a source-target/variant in the dynamic registry
1044	* store. Return 0 on failure.
1045	*
1046	* Caller does NOT own returned object.
1047	*/
1048	TransliteratorEntry* TransliteratorRegistry::findInDynamicStore(const TransliteratorSpec& src,
1049	const TransliteratorSpec& trg,
1050	const UnicodeString& variant) const {
1051	UnicodeString ID;
1052	TransliteratorIDParser::STVtoID(src, trg, variant, ID);
1053	TransliteratorEntry e = (TransliteratorEntry) registry.get(ID);
1054	DEBUG_useEntry(e);
1055	return e;
1056	}
1057
1058	/**
1059	* Attempt to find a source-target/variant in the static locale
1060	* resource store. Do not perform fallback. Return 0 on failure.
1061	*
1062	* On success, create a new entry object, register it in the dynamic
1063	* store, and return a pointer to it, but do not make it public --
1064	* just because someone requested something, we do not expand the
1065	* available ID list (or spec DAG).
1066	*
1067	* Caller does NOT own returned object.
1068	*/
1069	TransliteratorEntry* TransliteratorRegistry::findInStaticStore(const TransliteratorSpec& src,
1070	const TransliteratorSpec& trg,
1071	const UnicodeString& variant) {
1072	TransliteratorEntry* entry = `0`;
1073	if (src.isLocale()) {
1074	entry = findInBundle(src, trg, variant, UTRANS_FORWARD);
1075	} else if (trg.isLocale()) {
1076	entry = findInBundle(trg, src, variant, UTRANS_REVERSE);
1077	}
1078
1079	// If we found an entry, store it in the Hashtable for next
1080	// time.
1081	if (entry != `0`) {
1082	registerEntry(src.getTop(), trg.getTop(), variant, entry, FALSE);
1083	}
1084
1085	return entry;
1086	}
1087
1088	// As of 2.0, resource bundle keys cannot contain '_'
1089	static const UChar TRANSLITERATE_TO[] = {`84`,`114`,`97`,`110`,`115`,`108`,`105`,`116`,`101`,`114`,`97`,`116`,`101`,`84`,`111`,`0`}; // "TransliterateTo"
1090
1091	static const UChar TRANSLITERATE_FROM[] = {`84`,`114`,`97`,`110`,`115`,`108`,`105`,`116`,`101`,`114`,`97`,`116`,`101`,`70`,`114`,`111`,`109`,`0`}; // "TransliterateFrom"
1092
1093	static const UChar TRANSLITERATE[] = {`84`,`114`,`97`,`110`,`115`,`108`,`105`,`116`,`101`,`114`,`97`,`116`,`101`,`0`}; // "Transliterate"
1094
1095	/**
1096	* Attempt to find an entry in a single resource bundle. This is
1097	* a one-sided lookup. findInStaticStore() performs up to two such
1098	* lookups, one for the source, and one for the target.
1099	*
1100	* Do not perform fallback. Return 0 on failure.
1101	*
1102	* On success, create a new Entry object, populate it, and return it.
1103	* The caller owns the returned object.
1104	*/
1105	TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen,
1106	const TransliteratorSpec& specToFind,
1107	const UnicodeString& variant,
1108	UTransDirection direction)
1109	{
1110	UnicodeString utag;
1111	UnicodeString resStr;
1112	int32_t pass;
1113
1114	for (pass=`0`; pass<`2`; ++pass) {
1115	utag.truncate(`0`);
1116	// First try either TransliteratorTo_xxx or
1117	// TransliterateFrom_xxx, then try the bidirectional
1118	// Transliterate_xxx. This precedence order is arbitrary
1119	// but must be consistent and documented.
1120	if (pass == `0`) {
1121	utag.append(direction == UTRANS_FORWARD ?
1122	TRANSLITERATE_TO : TRANSLITERATE_FROM, -`1`);
1123	} else {
1124	utag.append(TRANSLITERATE, -`1`);
1125	}
1126	UnicodeString s(specToFind.get());
1127	utag.append(s.toUpper(""));
1128	UErrorCode status = U_ZERO_ERROR;
1129	ResourceBundle subres(specToOpen.getBundle().get(
1130	CharString ().appendInvariantChars(utag, status).data(), status));
1131	if (U_FAILURE(status) \|\| status == U_USING_DEFAULT_WARNING) {
1132	continue;
1133	}
1134
1135	s.truncate(`0`);
1136	if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) {
1137	continue;
1138	}
1139
1140	if (variant.length() != `0`) {
1141	status = U_ZERO_ERROR;
1142	resStr = subres.getStringEx(
1143	CharString ().appendInvariantChars(variant, status).data(), status);
1144	if (U_SUCCESS(status)) {
1145	// Exit loop successfully
1146	break;
1147	}
1148	} else {
1149	// Variant is empty, which means match the first variant listed.
1150	status = U_ZERO_ERROR;
1151	resStr = subres.getStringEx(`1`, status);
1152	if (U_SUCCESS(status)) {
1153	// Exit loop successfully
1154	break;
1155	}
1156	}
1157	}
1158
1159	if (pass==`2`) {
1160	// Failed
1161	return NULL;
1162	}
1163
1164	// We have succeeded in loading a string from the locale
1165	// resources. Create a new registry entry to hold it and return it.
1166	TransliteratorEntry entry = new* TransliteratorEntry ();
1167	if (entry != `0`) {
1168	// The direction is always forward for the
1169	// TransliterateTo_xxx and TransliterateFrom_xxx
1170	// items; those are unidirectional forward rules.
1171	// For the bidirectional Transliterate_xxx items,
1172	// the direction is the value passed in to this
1173	// function.
1174	int32_t dir = (pass == `0`) ? UTRANS_FORWARD : direction;
1175	entry->entryType = TransliteratorEntry::LOCALE_RULES;
1176	entry->stringArg = resStr;
1177	entry->intArg = dir;
1178	}
1179
1180	return entry;
1181	}
1182
1183	/**
1184	* Convenience method. Calls 3-arg find().
1185	*/
1186	TransliteratorEntry* TransliteratorRegistry::find(const UnicodeString& ID) {
1187	UnicodeString source, target, variant;
1188	UBool sawSource;
1189	TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
1190	return find(source, target, variant);
1191	}
1192
1193	/**
1194	* Top-level find method. Attempt to find a source-target/variant in
1195	* either the dynamic or the static (locale resource) store. Perform
1196	* fallback.
1197	*
1198	* Lookup sequence for ss_SS_SSS-tt_TT_TTT/v:
1199	*
1200	* ss_SS_SSS-tt_TT_TTT/v -- in hashtable
1201	* ss_SS_SSS-tt_TT_TTT/v -- in ss_SS_SSS (no fallback)
1202	*
1203	* repeat with t = tt_TT_TTT, tt_TT, tt, and tscript
1204	*
1205	* ss_SS_SSS-t/ *
1206	* ss_SS-t/ *
1207	* ss-t/ *
1208	* sscript-t/ *
1209	*
1210	* Here * matches the first variant listed.
1211	*
1212	* Caller does NOT own returned object. Return 0 on failure.
1213	*/
1214	TransliteratorEntry* TransliteratorRegistry::find(UnicodeString& source,
1215	UnicodeString& target,
1216	UnicodeString& variant) {
1217
1218	TransliteratorSpec src(source);
1219	TransliteratorSpec trg(target);
1220	TransliteratorEntry* entry;
1221
1222	// Seek exact match in hashtable. Temporary fix for ICU 4.6.
1223	// TODO: The general logic for finding a matching transliterator needs to be reviewed.
1224	// ICU ticket #8089
1225	UnicodeString ID;
1226	TransliteratorIDParser::STVtoID(source, target, variant, ID);
1227	entry = (TransliteratorEntry*) registry.get(ID);
1228	if (entry != `0`) {
1229	// std::string ss;
1230	// std::cout << ID.toUTF8String(ss) << std::endl;
1231	return entry;
1232	}
1233
1234	if (variant.length() != `0`) {
1235
1236	// Seek exact match in hashtable
1237	entry = findInDynamicStore(src, trg, variant);
1238	if (entry != `0`) {
1239	return entry;
1240	}
1241
1242	// Seek exact match in locale resources
1243	entry = findInStaticStore(src, trg, variant);
1244	if (entry != `0`) {
1245	return entry;
1246	}
1247	}
1248
1249	for (;;) {
1250	src.reset();
1251	for (;;) {
1252	// Seek match in hashtable
1253	entry = findInDynamicStore(src, trg, NO_VARIANT);
1254	if (entry != `0`) {
1255	return entry;
1256	}
1257
1258	// Seek match in locale resources
1259	entry = findInStaticStore(src, trg, NO_VARIANT);
1260	if (entry != `0`) {
1261	return entry;
1262	}
1263	if (!src.hasFallback()) {
1264	break;
1265	}
1266	src.next();
1267	}
1268	if (!trg.hasFallback()) {
1269	break;
1270	}
1271	trg.next();
1272	}
1273
1274	return `0`;
1275	}
1276
1277	/**
1278	* Given an Entry object, instantiate it. Caller owns result. Return
1279	* 0 on failure.
1280	*
1281	* Return a non-empty aliasReturn value if the ID points to an alias.
1282	* We cannot instantiate it ourselves because the alias may contain
1283	* filters or compounds, which we do not understand. Caller should
1284	* make aliasReturn empty before calling.
1285	*
1286	* The entry object is assumed to reside in the dynamic store. It may be
1287	* modified.
1288	*/
1289	Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID,
1290	TransliteratorEntry *entry,
1291	TransliteratorAlias* &aliasReturn,
1292	UErrorCode& status) {
1293	Transliterator *t = `0`;
1294	U_ASSERT(aliasReturn == `0`);
1295
1296	switch (entry->entryType) {
1297	case TransliteratorEntry::RBT_DATA:
1298	t = new RuleBasedTransliterator (ID, entry->u.data);
1299	if (t == `0`) {
1300	status = U_MEMORY_ALLOCATION_ERROR;
1301	}
1302	return t;
1303	case TransliteratorEntry::PROTOTYPE:
1304	t = entry->u.prototype->clone();
1305	if (t == `0`) {
1306	status = U_MEMORY_ALLOCATION_ERROR;
1307	}
1308	return t;
1309	case TransliteratorEntry::ALIAS:
1310	aliasReturn = new TransliteratorAlias (entry->stringArg, entry->compoundFilter);
1311	if (aliasReturn == `0`) {
1312	status = U_MEMORY_ALLOCATION_ERROR;
1313	}
1314	return `0`;
1315	case TransliteratorEntry::FACTORY:
1316	t = entry->u.factory.function(ID, entry->u.factory.context);
1317	if (t == `0`) {
1318	status = U_MEMORY_ALLOCATION_ERROR;
1319	}
1320	return t;
1321	case TransliteratorEntry::COMPOUND_RBT:
1322	{
1323	UVector* rbts = new UVector (entry->u.dataVector->size(), status);
1324	// Check for null pointer
1325	if (rbts == NULL) {
1326	status = U_MEMORY_ALLOCATION_ERROR;
1327	return NULL;
1328	}
1329	int32_t passNumber = `1`;
1330	for (int32_t i = `0`; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {
1331	// TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")?
1332	Transliterator* tl = new RuleBasedTransliterator (UnicodeString (CompoundTransliterator::PASS_STRING) + UnicodeString (passNumber++),
1333	(TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE);
1334	if (tl == `0`)
1335	status = U_MEMORY_ALLOCATION_ERROR;
1336	else
1337	rbts->addElement(tl, status);
1338	}
1339	if (U_FAILURE(status)) {
1340	delete rbts;
1341	return `0`;
1342	}
1343	aliasReturn = new TransliteratorAlias (ID, entry->stringArg, rbts, entry->compoundFilter);
1344	}
1345	if (aliasReturn == `0`) {
1346	status = U_MEMORY_ALLOCATION_ERROR;
1347	}
1348	return `0`;
1349	case TransliteratorEntry::LOCALE_RULES:
1350	aliasReturn = new TransliteratorAlias (ID, entry->stringArg,
1351	(UTransDirection) entry->intArg);
1352	if (aliasReturn == `0`) {
1353	status = U_MEMORY_ALLOCATION_ERROR;
1354	}
1355	return `0`;
1356	case TransliteratorEntry::RULES_FORWARD:
1357	case TransliteratorEntry::RULES_REVERSE:
1358	// Process the rule data into a TransliteratorRuleData object,
1359	// and possibly also into an ::id header and/or footer. Then
1360	// we modify the registry with the parsed data and retry.
1361	{
1362	TransliteratorParser parser(status);
1363
1364	// We use the file name, taken from another resource bundle
1365	// 2-d array at static init time, as a locale language. We're
1366	// just using the locale mechanism to map through to a file
1367	// name; this in no way represents an actual locale.
1368	//CharString ch(entry->stringArg);
1369	//UResourceBundle bundle = ures_openDirect(0, ch, &status);*
1370	UnicodeString rules = entry->stringArg;
1371	//ures_close(bundle);
1372
1373	//if (U_FAILURE(status)) {
1374	// We have a failure of some kind. Remove the ID from the
1375	// registry so we don't keep trying. NOTE: This will throw off
1376	// anyone who is, at the moment, trying to iterate over the
1377	// available IDs. That's acceptable since we should never
1378	// really get here except under installation, configuration,
1379	// or unrecoverable run time memory failures.
1380	// remove(ID);
1381	//} else {
1382
1383	// If the status indicates a failure, then we don't have any
1384	// rules -- there is probably an installation error. The list
1385	// in the root locale should correspond to all the installed
1386	// transliterators; if it lists something that's not
1387	// installed, we'll get an error from ResourceBundle.
1388	aliasReturn = new TransliteratorAlias (ID, rules,
1389	((entry->entryType == TransliteratorEntry::RULES_REVERSE) ?
1390	UTRANS_REVERSE : UTRANS_FORWARD));
1391	if (aliasReturn == `0`) {
1392	status = U_MEMORY_ALLOCATION_ERROR;
1393	}
1394	//}
1395	}
1396	return `0`;
1397	default:
1398	UPRV_UNREACHABLE; // can't get here
1399	}
1400	}
1401	U_NAMESPACE_END
1402
1403	#endif /* #if !UCONFIG_NO_TRANSLITERATION */
1404
1405	//eof
1406

Browse the source code of ClickHouse/contrib/icu/icu4c/source/i18n/transreg.cpp