1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ********************************************************************** |
5 | * Copyright (c) 2001-2014, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** |
8 | * Date Name Description |
9 | * 08/10/2001 aliu Creation. |
10 | ********************************************************************** |
11 | */ |
12 | #ifndef _TRANSREG_H |
13 | #define _TRANSREG_H |
14 | |
15 | #include "unicode/utypes.h" |
16 | |
17 | #if !UCONFIG_NO_TRANSLITERATION |
18 | |
19 | #include "unicode/uobject.h" |
20 | #include "unicode/translit.h" |
21 | #include "hash.h" |
22 | #include "uvector.h" |
23 | |
24 | U_NAMESPACE_BEGIN |
25 | |
26 | class TransliteratorEntry; |
27 | class TransliteratorSpec; |
28 | class UnicodeString; |
29 | |
30 | //------------------------------------------------------------------ |
31 | // TransliteratorAlias |
32 | //------------------------------------------------------------------ |
33 | |
34 | /** |
35 | * A TransliteratorAlias object is returned by get() if the given ID |
36 | * actually translates into something else. The caller then invokes |
37 | * the create() method on the alias to create the actual |
38 | * transliterator, and deletes the alias. |
39 | * |
40 | * Why all the shenanigans? To prevent circular calls between |
41 | * the registry code and the transliterator code that deadlocks. |
42 | */ |
43 | class TransliteratorAlias : public UMemory { |
44 | public: |
45 | /** |
46 | * Construct a simple alias (type == SIMPLE) |
47 | * @param aliasID the given id. |
48 | */ |
49 | TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); |
50 | |
51 | /** |
52 | * Construct a compound RBT alias (type == COMPOUND) |
53 | */ |
54 | TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, |
55 | UVector* adoptedTransliterators, |
56 | const UnicodeSet* compoundFilter); |
57 | |
58 | /** |
59 | * Construct a rules alias (type = RULES) |
60 | */ |
61 | TransliteratorAlias(const UnicodeString& theID, |
62 | const UnicodeString& rules, |
63 | UTransDirection dir); |
64 | |
65 | ~TransliteratorAlias(); |
66 | |
67 | /** |
68 | * The whole point of create() is that the caller must invoke |
69 | * it when the registry mutex is NOT held, to prevent deadlock. |
70 | * It may only be called once. |
71 | * |
72 | * Note: Only call create() if isRuleBased() returns FALSE. |
73 | * |
74 | * This method must be called *outside* of the TransliteratorRegistry |
75 | * mutex. |
76 | */ |
77 | Transliterator* create(UParseError&, UErrorCode&); |
78 | |
79 | /** |
80 | * Return TRUE if this alias is rule-based. If so, the caller |
81 | * must call parse() on it, then call TransliteratorRegistry::reget(). |
82 | */ |
83 | UBool isRuleBased() const; |
84 | |
85 | /** |
86 | * If isRuleBased() returns TRUE, then the caller must call this |
87 | * method, followed by TransliteratorRegistry::reget(). The latter |
88 | * method must be called inside the TransliteratorRegistry mutex. |
89 | * |
90 | * Note: Only call parse() if isRuleBased() returns TRUE. |
91 | * |
92 | * This method must be called *outside* of the TransliteratorRegistry |
93 | * mutex, because it can instantiate Transliterators embedded in |
94 | * the rules via the "&Latin-Arabic()" syntax. |
95 | */ |
96 | void parse(TransliteratorParser& parser, |
97 | UParseError& pe, UErrorCode& ec) const; |
98 | |
99 | private: |
100 | // We actually come in three flavors: |
101 | // 1. Simple alias |
102 | // Here aliasID is the alias string. Everything else is |
103 | // null, zero, empty. |
104 | // 2. CompoundRBT |
105 | // Here ID is the ID, aliasID is the idBlock, trans is the |
106 | // contained RBT, and idSplitPoint is the offet in aliasID |
107 | // where the contained RBT goes. compoundFilter is the |
108 | // compound filter, and it is _not_ owned. |
109 | // 3. Rules |
110 | // Here ID is the ID, aliasID is the rules string. |
111 | // idSplitPoint is the UTransDirection. |
112 | UnicodeString ID; |
113 | UnicodeString aliasesOrRules; |
114 | UVector* transes; // owned |
115 | const UnicodeSet* compoundFilter; // alias |
116 | UTransDirection direction; |
117 | enum { SIMPLE, COMPOUND, RULES } type; |
118 | |
119 | TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class |
120 | TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class |
121 | }; |
122 | |
123 | |
124 | /** |
125 | * A registry of system transliterators. This is the data structure |
126 | * that implements the mapping between transliterator IDs and the data |
127 | * or function pointers used to create the corresponding |
128 | * transliterators. There is one instance of the registry that is |
129 | * created statically. |
130 | * |
131 | * The registry consists of a dynamic component -- a hashtable -- and |
132 | * a static component -- locale resource bundles. The dynamic store |
133 | * is semantically overlaid on the static store, so the static mapping |
134 | * can be dynamically overridden. |
135 | * |
136 | * This is an internal class that is only used by Transliterator. |
137 | * Transliterator maintains one static instance of this class and |
138 | * delegates all registry-related operations to it. |
139 | * |
140 | * @author Alan Liu |
141 | */ |
142 | class TransliteratorRegistry : public UMemory { |
143 | |
144 | public: |
145 | |
146 | /** |
147 | * Contructor |
148 | * @param status Output param set to success/failure code. |
149 | */ |
150 | TransliteratorRegistry(UErrorCode& status); |
151 | |
152 | /** |
153 | * Nonvirtual destructor -- this class is not subclassable. |
154 | */ |
155 | ~TransliteratorRegistry(); |
156 | |
157 | //------------------------------------------------------------------ |
158 | // Basic public API |
159 | //------------------------------------------------------------------ |
160 | |
161 | /** |
162 | * Given a simple ID (forward direction, no inline filter, not |
163 | * compound) attempt to instantiate it from the registry. Return |
164 | * 0 on failure. |
165 | * |
166 | * Return a non-NULL aliasReturn value if the ID points to an alias. |
167 | * We cannot instantiate it ourselves because the alias may contain |
168 | * filters or compounds, which we do not understand. Caller should |
169 | * make aliasReturn NULL before calling. |
170 | * @param ID the given ID |
171 | * @param aliasReturn output param to receive TransliteratorAlias; |
172 | * should be NULL on entry |
173 | * @param parseError Struct to recieve information on position |
174 | * of error if an error is encountered |
175 | * @param status Output param set to success/failure code. |
176 | */ |
177 | Transliterator* get(const UnicodeString& ID, |
178 | TransliteratorAlias*& aliasReturn, |
179 | UErrorCode& status); |
180 | |
181 | /** |
182 | * The caller must call this after calling get(), if [a] calling get() |
183 | * returns an alias, and [b] the alias is rule based. In that |
184 | * situation the caller must call alias->parse() to do the parsing |
185 | * OUTSIDE THE REGISTRY MUTEX, then call this method to retry |
186 | * instantiating the transliterator. |
187 | * |
188 | * Note: Another alias might be returned by this method. |
189 | * |
190 | * This method (like all public methods of this class) must be called |
191 | * from within the TransliteratorRegistry mutex. |
192 | * |
193 | * @param aliasReturn output param to receive TransliteratorAlias; |
194 | * should be NULL on entry |
195 | */ |
196 | Transliterator* reget(const UnicodeString& ID, |
197 | TransliteratorParser& parser, |
198 | TransliteratorAlias*& aliasReturn, |
199 | UErrorCode& status); |
200 | |
201 | /** |
202 | * Register a prototype (adopted). This adds an entry to the |
203 | * dynamic store, or replaces an existing entry. Any entry in the |
204 | * underlying static locale resource store is masked. |
205 | */ |
206 | void put(Transliterator* adoptedProto, |
207 | UBool visible, |
208 | UErrorCode& ec); |
209 | |
210 | /** |
211 | * Register an ID and a factory function pointer. This adds an |
212 | * entry to the dynamic store, or replaces an existing entry. Any |
213 | * entry in the underlying static locale resource store is masked. |
214 | */ |
215 | void put(const UnicodeString& ID, |
216 | Transliterator::Factory factory, |
217 | Transliterator::Token context, |
218 | UBool visible, |
219 | UErrorCode& ec); |
220 | |
221 | /** |
222 | * Register an ID and a resource name. This adds an entry to the |
223 | * dynamic store, or replaces an existing entry. Any entry in the |
224 | * underlying static locale resource store is masked. |
225 | */ |
226 | void put(const UnicodeString& ID, |
227 | const UnicodeString& resourceName, |
228 | UTransDirection dir, |
229 | UBool readonlyResourceAlias, |
230 | UBool visible, |
231 | UErrorCode& ec); |
232 | |
233 | /** |
234 | * Register an ID and an alias ID. This adds an entry to the |
235 | * dynamic store, or replaces an existing entry. Any entry in the |
236 | * underlying static locale resource store is masked. |
237 | */ |
238 | void put(const UnicodeString& ID, |
239 | const UnicodeString& alias, |
240 | UBool readonlyAliasAlias, |
241 | UBool visible, |
242 | UErrorCode& ec); |
243 | |
244 | /** |
245 | * Unregister an ID. This removes an entry from the dynamic store |
246 | * if there is one. The static locale resource store is |
247 | * unaffected. |
248 | * @param ID the given ID. |
249 | */ |
250 | void remove(const UnicodeString& ID); |
251 | |
252 | //------------------------------------------------------------------ |
253 | // Public ID and spec management |
254 | //------------------------------------------------------------------ |
255 | |
256 | /** |
257 | * Return a StringEnumeration over the IDs currently registered |
258 | * with the system. |
259 | * @internal |
260 | */ |
261 | StringEnumeration* getAvailableIDs() const; |
262 | |
263 | /** |
264 | * == OBSOLETE - remove in ICU 3.4 == |
265 | * Return the number of IDs currently registered with the system. |
266 | * To retrieve the actual IDs, call getAvailableID(i) with |
267 | * i from 0 to countAvailableIDs() - 1. |
268 | * @return the number of IDs currently registered with the system. |
269 | * @internal |
270 | */ |
271 | int32_t countAvailableIDs(void) const; |
272 | |
273 | /** |
274 | * == OBSOLETE - remove in ICU 3.4 == |
275 | * Return the index-th available ID. index must be between 0 |
276 | * and countAvailableIDs() - 1, inclusive. If index is out of |
277 | * range, the result of getAvailableID(0) is returned. |
278 | * @param index the given index. |
279 | * @return the index-th available ID. index must be between 0 |
280 | * and countAvailableIDs() - 1, inclusive. If index is out of |
281 | * range, the result of getAvailableID(0) is returned. |
282 | * @internal |
283 | */ |
284 | const UnicodeString& getAvailableID(int32_t index) const; |
285 | |
286 | /** |
287 | * Return the number of registered source specifiers. |
288 | * @return the number of registered source specifiers. |
289 | */ |
290 | int32_t countAvailableSources(void) const; |
291 | |
292 | /** |
293 | * Return a registered source specifier. |
294 | * @param index which specifier to return, from 0 to n-1, where |
295 | * n = countAvailableSources() |
296 | * @param result fill-in paramter to receive the source specifier. |
297 | * If index is out of range, result will be empty. |
298 | * @return reference to result |
299 | */ |
300 | UnicodeString& getAvailableSource(int32_t index, |
301 | UnicodeString& result) const; |
302 | |
303 | /** |
304 | * Return the number of registered target specifiers for a given |
305 | * source specifier. |
306 | * @param source the given source specifier. |
307 | * @return the number of registered target specifiers for a given |
308 | * source specifier. |
309 | */ |
310 | int32_t countAvailableTargets(const UnicodeString& source) const; |
311 | |
312 | /** |
313 | * Return a registered target specifier for a given source. |
314 | * @param index which specifier to return, from 0 to n-1, where |
315 | * n = countAvailableTargets(source) |
316 | * @param source the source specifier |
317 | * @param result fill-in paramter to receive the target specifier. |
318 | * If source is invalid or if index is out of range, result will |
319 | * be empty. |
320 | * @return reference to result |
321 | */ |
322 | UnicodeString& getAvailableTarget(int32_t index, |
323 | const UnicodeString& source, |
324 | UnicodeString& result) const; |
325 | |
326 | /** |
327 | * Return the number of registered variant specifiers for a given |
328 | * source-target pair. There is always at least one variant: If |
329 | * just source-target is registered, then the single variant |
330 | * NO_VARIANT is returned. If source-target/variant is registered |
331 | * then that variant is returned. |
332 | * @param source the source specifiers |
333 | * @param target the target specifiers |
334 | * @return the number of registered variant specifiers for a given |
335 | * source-target pair. |
336 | */ |
337 | int32_t countAvailableVariants(const UnicodeString& source, |
338 | const UnicodeString& target) const; |
339 | |
340 | /** |
341 | * Return a registered variant specifier for a given source-target |
342 | * pair. If NO_VARIANT is one of the variants, then it will be |
343 | * at index 0. |
344 | * @param index which specifier to return, from 0 to n-1, where |
345 | * n = countAvailableVariants(source, target) |
346 | * @param source the source specifier |
347 | * @param target the target specifier |
348 | * @param result fill-in paramter to receive the variant |
349 | * specifier. If source is invalid or if target is invalid or if |
350 | * index is out of range, result will be empty. |
351 | * @return reference to result |
352 | */ |
353 | UnicodeString& getAvailableVariant(int32_t index, |
354 | const UnicodeString& source, |
355 | const UnicodeString& target, |
356 | UnicodeString& result) const; |
357 | |
358 | private: |
359 | |
360 | //---------------------------------------------------------------- |
361 | // Private implementation |
362 | //---------------------------------------------------------------- |
363 | |
364 | TransliteratorEntry* find(const UnicodeString& ID); |
365 | |
366 | TransliteratorEntry* find(UnicodeString& source, |
367 | UnicodeString& target, |
368 | UnicodeString& variant); |
369 | |
370 | TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src, |
371 | const TransliteratorSpec& trg, |
372 | const UnicodeString& variant) const; |
373 | |
374 | TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src, |
375 | const TransliteratorSpec& trg, |
376 | const UnicodeString& variant); |
377 | |
378 | static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen, |
379 | const TransliteratorSpec& specToFind, |
380 | const UnicodeString& variant, |
381 | UTransDirection direction); |
382 | |
383 | void registerEntry(const UnicodeString& source, |
384 | const UnicodeString& target, |
385 | const UnicodeString& variant, |
386 | TransliteratorEntry* adopted, |
387 | UBool visible); |
388 | |
389 | void registerEntry(const UnicodeString& ID, |
390 | TransliteratorEntry* adopted, |
391 | UBool visible); |
392 | |
393 | void registerEntry(const UnicodeString& ID, |
394 | const UnicodeString& source, |
395 | const UnicodeString& target, |
396 | const UnicodeString& variant, |
397 | TransliteratorEntry* adopted, |
398 | UBool visible); |
399 | |
400 | void registerSTV(const UnicodeString& source, |
401 | const UnicodeString& target, |
402 | const UnicodeString& variant); |
403 | |
404 | void removeSTV(const UnicodeString& source, |
405 | const UnicodeString& target, |
406 | const UnicodeString& variant); |
407 | |
408 | Transliterator* instantiateEntry(const UnicodeString& ID, |
409 | TransliteratorEntry *entry, |
410 | TransliteratorAlias*& aliasReturn, |
411 | UErrorCode& status); |
412 | |
413 | /** |
414 | * A StringEnumeration over the registered IDs in this object. |
415 | */ |
416 | class Enumeration : public StringEnumeration { |
417 | public: |
418 | Enumeration(const TransliteratorRegistry& reg); |
419 | virtual ~Enumeration(); |
420 | virtual int32_t count(UErrorCode& status) const; |
421 | virtual const UnicodeString* snext(UErrorCode& status); |
422 | virtual void reset(UErrorCode& status); |
423 | static UClassID U_EXPORT2 getStaticClassID(); |
424 | virtual UClassID getDynamicClassID() const; |
425 | private: |
426 | int32_t index; |
427 | const TransliteratorRegistry& reg; |
428 | }; |
429 | friend class Enumeration; |
430 | |
431 | private: |
432 | |
433 | /** |
434 | * Dynamic registry mapping full IDs to Entry objects. This |
435 | * contains both public and internal entities. The visibility is |
436 | * controlled by whether an entry is listed in availableIDs and |
437 | * specDAG or not. |
438 | */ |
439 | Hashtable registry; |
440 | |
441 | /** |
442 | * DAG of visible IDs by spec. Hashtable: source => (Hashtable: |
443 | * target => variant bitmask) |
444 | */ |
445 | Hashtable specDAG; |
446 | |
447 | /** |
448 | * Vector of all variant names |
449 | */ |
450 | UVector variantList; |
451 | |
452 | /** |
453 | * Vector of public full IDs. |
454 | */ |
455 | UVector availableIDs; |
456 | |
457 | TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class |
458 | TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class |
459 | }; |
460 | |
461 | U_NAMESPACE_END |
462 | |
463 | U_CFUNC UBool utrans_transliterator_cleanup(void); |
464 | |
465 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
466 | |
467 | #endif |
468 | //eof |
469 | |