1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * Copyright (C) 1997-2009,2014 International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* |
8 | * Date Name Description |
9 | * 06/21/00 aliu Creation. |
10 | ******************************************************************************* |
11 | */ |
12 | |
13 | #include "unicode/utypes.h" |
14 | |
15 | #if !UCONFIG_NO_TRANSLITERATION |
16 | |
17 | #include "unicode/utrans.h" |
18 | #include "unicode/putil.h" |
19 | #include "unicode/rep.h" |
20 | #include "unicode/translit.h" |
21 | #include "unicode/unifilt.h" |
22 | #include "unicode/uniset.h" |
23 | #include "unicode/ustring.h" |
24 | #include "unicode/uenum.h" |
25 | #include "unicode/uset.h" |
26 | #include "uenumimp.h" |
27 | #include "cpputils.h" |
28 | #include "rbt.h" |
29 | |
30 | // Following macro is to be followed by <return value>';' or just ';' |
31 | #define utrans_ENTRY(s) if ((s)==NULL || U_FAILURE(*(s))) return |
32 | |
33 | /******************************************************************** |
34 | * Replaceable-UReplaceableCallbacks glue |
35 | ********************************************************************/ |
36 | |
37 | /** |
38 | * Make a UReplaceable + UReplaceableCallbacks into a Replaceable object. |
39 | */ |
40 | U_NAMESPACE_BEGIN |
41 | class ReplaceableGlue : public Replaceable { |
42 | |
43 | UReplaceable *rep; |
44 | const UReplaceableCallbacks *func; |
45 | |
46 | public: |
47 | |
48 | ReplaceableGlue(UReplaceable *replaceable, |
49 | const UReplaceableCallbacks *funcCallback); |
50 | |
51 | virtual ~ReplaceableGlue(); |
52 | |
53 | virtual void handleReplaceBetween(int32_t start, |
54 | int32_t limit, |
55 | const UnicodeString& text); |
56 | |
57 | virtual void extractBetween(int32_t start, |
58 | int32_t limit, |
59 | UnicodeString& target) const; |
60 | |
61 | virtual void copy(int32_t start, int32_t limit, int32_t dest); |
62 | |
63 | // virtual Replaceable *clone() const { return NULL; } same as default |
64 | |
65 | /** |
66 | * ICU "poor man's RTTI", returns a UClassID for the actual class. |
67 | * |
68 | * @draft ICU 2.2 |
69 | */ |
70 | virtual UClassID getDynamicClassID() const; |
71 | |
72 | /** |
73 | * ICU "poor man's RTTI", returns a UClassID for this class. |
74 | * |
75 | * @draft ICU 2.2 |
76 | */ |
77 | static UClassID U_EXPORT2 getStaticClassID(); |
78 | |
79 | protected: |
80 | |
81 | virtual int32_t getLength() const; |
82 | |
83 | virtual UChar getCharAt(int32_t offset) const; |
84 | |
85 | virtual UChar32 getChar32At(int32_t offset) const; |
86 | }; |
87 | |
88 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue) |
89 | |
90 | ReplaceableGlue::ReplaceableGlue(UReplaceable *replaceable, |
91 | const UReplaceableCallbacks *funcCallback) |
92 | : Replaceable() |
93 | { |
94 | this->rep = replaceable; |
95 | this->func = funcCallback; |
96 | } |
97 | |
98 | ReplaceableGlue::~ReplaceableGlue() {} |
99 | |
100 | int32_t ReplaceableGlue::getLength() const { |
101 | return (*func->length)(rep); |
102 | } |
103 | |
104 | UChar ReplaceableGlue::getCharAt(int32_t offset) const { |
105 | return (*func->charAt)(rep, offset); |
106 | } |
107 | |
108 | UChar32 ReplaceableGlue::getChar32At(int32_t offset) const { |
109 | return (*func->char32At)(rep, offset); |
110 | } |
111 | |
112 | void ReplaceableGlue::handleReplaceBetween(int32_t start, |
113 | int32_t limit, |
114 | const UnicodeString& text) { |
115 | (*func->replace)(rep, start, limit, text.getBuffer(), text.length()); |
116 | } |
117 | |
118 | void ReplaceableGlue::(int32_t start, |
119 | int32_t limit, |
120 | UnicodeString& target) const { |
121 | (*func->extract)(rep, start, limit, target.getBuffer(limit-start)); |
122 | target.releaseBuffer(limit-start); |
123 | } |
124 | |
125 | void ReplaceableGlue::copy(int32_t start, int32_t limit, int32_t dest) { |
126 | (*func->copy)(rep, start, limit, dest); |
127 | } |
128 | U_NAMESPACE_END |
129 | /******************************************************************** |
130 | * General API |
131 | ********************************************************************/ |
132 | U_NAMESPACE_USE |
133 | |
134 | U_CAPI UTransliterator* U_EXPORT2 |
135 | utrans_openU(const UChar *id, |
136 | int32_t idLength, |
137 | UTransDirection dir, |
138 | const UChar *rules, |
139 | int32_t rulesLength, |
140 | UParseError *parseError, |
141 | UErrorCode *status) { |
142 | if(status==NULL || U_FAILURE(*status)) { |
143 | return NULL; |
144 | } |
145 | if (id == NULL) { |
146 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
147 | return NULL; |
148 | } |
149 | UParseError temp; |
150 | |
151 | if(parseError == NULL){ |
152 | parseError = &temp; |
153 | } |
154 | |
155 | UnicodeString ID(idLength<0, id, idLength); // r-o alias |
156 | |
157 | if(rules==NULL){ |
158 | |
159 | Transliterator *trans = NULL; |
160 | |
161 | trans = Transliterator::createInstance(ID, dir, *parseError, *status); |
162 | |
163 | if(U_FAILURE(*status)){ |
164 | return NULL; |
165 | } |
166 | return (UTransliterator*) trans; |
167 | }else{ |
168 | UnicodeString ruleStr(rulesLength < 0, |
169 | rules, |
170 | rulesLength); // r-o alias |
171 | |
172 | Transliterator *trans = NULL; |
173 | trans = Transliterator::createFromRules(ID, ruleStr, dir, *parseError, *status); |
174 | if(U_FAILURE(*status)) { |
175 | return NULL; |
176 | } |
177 | |
178 | return (UTransliterator*) trans; |
179 | } |
180 | } |
181 | |
182 | U_CAPI UTransliterator* U_EXPORT2 |
183 | utrans_open(const char* id, |
184 | UTransDirection dir, |
185 | const UChar* rules, /* may be Null */ |
186 | int32_t rulesLength, /* -1 if null-terminated */ |
187 | UParseError* parseError, /* may be Null */ |
188 | UErrorCode* status) { |
189 | UnicodeString ID(id, -1, US_INV); // use invariant converter |
190 | return utrans_openU(ID.getBuffer(), ID.length(), dir, |
191 | rules, rulesLength, |
192 | parseError, status); |
193 | } |
194 | |
195 | U_CAPI UTransliterator* U_EXPORT2 |
196 | utrans_openInverse(const UTransliterator* trans, |
197 | UErrorCode* status) { |
198 | |
199 | utrans_ENTRY(status) NULL; |
200 | |
201 | UTransliterator* result = |
202 | (UTransliterator*) ((Transliterator*) trans)->createInverse(*status); |
203 | |
204 | return result; |
205 | } |
206 | |
207 | U_CAPI UTransliterator* U_EXPORT2 |
208 | utrans_clone(const UTransliterator* trans, |
209 | UErrorCode* status) { |
210 | |
211 | utrans_ENTRY(status) NULL; |
212 | |
213 | if (trans == NULL) { |
214 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
215 | return NULL; |
216 | } |
217 | |
218 | Transliterator *t = ((Transliterator*) trans)->clone(); |
219 | if (t == NULL) { |
220 | *status = U_MEMORY_ALLOCATION_ERROR; |
221 | } |
222 | return (UTransliterator*) t; |
223 | } |
224 | |
225 | U_CAPI void U_EXPORT2 |
226 | utrans_close(UTransliterator* trans) { |
227 | delete (Transliterator*) trans; |
228 | } |
229 | |
230 | U_CAPI const UChar * U_EXPORT2 |
231 | utrans_getUnicodeID(const UTransliterator *trans, |
232 | int32_t *resultLength) { |
233 | // Transliterator keeps its ID NUL-terminated |
234 | const UnicodeString &ID=((Transliterator*) trans)->getID(); |
235 | if(resultLength!=NULL) { |
236 | *resultLength=ID.length(); |
237 | } |
238 | return ID.getBuffer(); |
239 | } |
240 | |
241 | U_CAPI int32_t U_EXPORT2 |
242 | utrans_getID(const UTransliterator* trans, |
243 | char* buf, |
244 | int32_t bufCapacity) { |
245 | return ((Transliterator*) trans)->getID().extract(0, 0x7fffffff, buf, bufCapacity, US_INV); |
246 | } |
247 | |
248 | U_CAPI void U_EXPORT2 |
249 | utrans_register(UTransliterator* adoptedTrans, |
250 | UErrorCode* status) { |
251 | utrans_ENTRY(status); |
252 | // status currently ignored; may remove later |
253 | Transliterator::registerInstance((Transliterator*) adoptedTrans); |
254 | } |
255 | |
256 | U_CAPI void U_EXPORT2 |
257 | utrans_unregisterID(const UChar* id, int32_t idLength) { |
258 | UnicodeString ID(idLength<0, id, idLength); // r-o alias |
259 | Transliterator::unregister(ID); |
260 | } |
261 | |
262 | U_CAPI void U_EXPORT2 |
263 | utrans_unregister(const char* id) { |
264 | UnicodeString ID(id, -1, US_INV); // use invariant converter |
265 | Transliterator::unregister(ID); |
266 | } |
267 | |
268 | U_CAPI void U_EXPORT2 |
269 | utrans_setFilter(UTransliterator* trans, |
270 | const UChar* filterPattern, |
271 | int32_t filterPatternLen, |
272 | UErrorCode* status) { |
273 | |
274 | utrans_ENTRY(status); |
275 | UnicodeFilter* filter = NULL; |
276 | if (filterPattern != NULL && *filterPattern != 0) { |
277 | // Create read only alias of filterPattern: |
278 | UnicodeString pat(filterPatternLen < 0, filterPattern, filterPatternLen); |
279 | filter = new UnicodeSet(pat, *status); |
280 | /* test for NULL */ |
281 | if (filter == NULL) { |
282 | *status = U_MEMORY_ALLOCATION_ERROR; |
283 | return; |
284 | } |
285 | if (U_FAILURE(*status)) { |
286 | delete filter; |
287 | filter = NULL; |
288 | } |
289 | } |
290 | ((Transliterator*) trans)->adoptFilter(filter); |
291 | } |
292 | |
293 | U_CAPI int32_t U_EXPORT2 |
294 | utrans_countAvailableIDs(void) { |
295 | return Transliterator::countAvailableIDs(); |
296 | } |
297 | |
298 | U_CAPI int32_t U_EXPORT2 |
299 | utrans_getAvailableID(int32_t index, |
300 | char* buf, // may be NULL |
301 | int32_t bufCapacity) { |
302 | return Transliterator::getAvailableID(index).extract(0, 0x7fffffff, buf, bufCapacity, US_INV); |
303 | } |
304 | |
305 | /* Transliterator UEnumeration ---------------------------------------------- */ |
306 | |
307 | typedef struct UTransEnumeration { |
308 | UEnumeration uenum; |
309 | int32_t index, count; |
310 | } UTransEnumeration; |
311 | |
312 | U_CDECL_BEGIN |
313 | static int32_t U_CALLCONV |
314 | utrans_enum_count(UEnumeration *uenum, UErrorCode *pErrorCode) { |
315 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
316 | return 0; |
317 | } |
318 | return ((UTransEnumeration *)uenum)->count; |
319 | } |
320 | |
321 | static const UChar* U_CALLCONV |
322 | utrans_enum_unext(UEnumeration *uenum, |
323 | int32_t* resultLength, |
324 | UErrorCode *pErrorCode) { |
325 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
326 | return 0; |
327 | } |
328 | |
329 | UTransEnumeration *ute=(UTransEnumeration *)uenum; |
330 | int32_t index=ute->index; |
331 | if(index<ute->count) { |
332 | const UnicodeString &ID=Transliterator::getAvailableID(index); |
333 | ute->index=index+1; |
334 | if(resultLength!=NULL) { |
335 | *resultLength=ID.length(); |
336 | } |
337 | // Transliterator keeps its ID NUL-terminated |
338 | return ID.getBuffer(); |
339 | } |
340 | |
341 | if(resultLength!=NULL) { |
342 | *resultLength=0; |
343 | } |
344 | return NULL; |
345 | } |
346 | |
347 | static void U_CALLCONV |
348 | utrans_enum_reset(UEnumeration *uenum, UErrorCode *pErrorCode) { |
349 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
350 | return; |
351 | } |
352 | |
353 | UTransEnumeration *ute=(UTransEnumeration *)uenum; |
354 | ute->index=0; |
355 | ute->count=Transliterator::countAvailableIDs(); |
356 | } |
357 | |
358 | static void U_CALLCONV |
359 | utrans_enum_close(UEnumeration *uenum) { |
360 | uprv_free(uenum); |
361 | } |
362 | U_CDECL_END |
363 | |
364 | static const UEnumeration utransEnumeration={ |
365 | NULL, |
366 | NULL, |
367 | utrans_enum_close, |
368 | utrans_enum_count, |
369 | utrans_enum_unext, |
370 | uenum_nextDefault, |
371 | utrans_enum_reset |
372 | }; |
373 | |
374 | U_CAPI UEnumeration * U_EXPORT2 |
375 | utrans_openIDs(UErrorCode *pErrorCode) { |
376 | UTransEnumeration *ute; |
377 | |
378 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
379 | return NULL; |
380 | } |
381 | |
382 | ute=(UTransEnumeration *)uprv_malloc(sizeof(UTransEnumeration)); |
383 | if(ute==NULL) { |
384 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
385 | return NULL; |
386 | } |
387 | |
388 | ute->uenum=utransEnumeration; |
389 | ute->index=0; |
390 | ute->count=Transliterator::countAvailableIDs(); |
391 | return (UEnumeration *)ute; |
392 | } |
393 | |
394 | /******************************************************************** |
395 | * Transliteration API |
396 | ********************************************************************/ |
397 | |
398 | U_CAPI void U_EXPORT2 |
399 | utrans_trans(const UTransliterator* trans, |
400 | UReplaceable* rep, |
401 | const UReplaceableCallbacks* repFunc, |
402 | int32_t start, |
403 | int32_t* limit, |
404 | UErrorCode* status) { |
405 | |
406 | utrans_ENTRY(status); |
407 | |
408 | if (trans == 0 || rep == 0 || repFunc == 0 || limit == 0) { |
409 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
410 | return; |
411 | } |
412 | |
413 | ReplaceableGlue r(rep, repFunc); |
414 | |
415 | *limit = ((Transliterator*) trans)->transliterate(r, start, *limit); |
416 | } |
417 | |
418 | U_CAPI void U_EXPORT2 |
419 | utrans_transIncremental(const UTransliterator* trans, |
420 | UReplaceable* rep, |
421 | const UReplaceableCallbacks* repFunc, |
422 | UTransPosition* pos, |
423 | UErrorCode* status) { |
424 | |
425 | utrans_ENTRY(status); |
426 | |
427 | if (trans == 0 || rep == 0 || repFunc == 0 || pos == 0) { |
428 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
429 | return; |
430 | } |
431 | |
432 | ReplaceableGlue r(rep, repFunc); |
433 | |
434 | ((Transliterator*) trans)->transliterate(r, *pos, *status); |
435 | } |
436 | |
437 | U_CAPI void U_EXPORT2 |
438 | utrans_transUChars(const UTransliterator* trans, |
439 | UChar* text, |
440 | int32_t* textLength, |
441 | int32_t textCapacity, |
442 | int32_t start, |
443 | int32_t* limit, |
444 | UErrorCode* status) { |
445 | |
446 | utrans_ENTRY(status); |
447 | |
448 | if (trans == 0 || text == 0 || limit == 0) { |
449 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
450 | return; |
451 | } |
452 | |
453 | int32_t textLen = (textLength == NULL || *textLength < 0) |
454 | ? u_strlen(text) : *textLength; |
455 | // writeable alias: for this ct, len CANNOT be -1 (why?) |
456 | UnicodeString str(text, textLen, textCapacity); |
457 | |
458 | *limit = ((Transliterator*) trans)->transliterate(str, start, *limit); |
459 | |
460 | // Copy the string buffer back to text (only if necessary) |
461 | // and fill in *neededCapacity (if neededCapacity != NULL). |
462 | textLen = str.extract(text, textCapacity, *status); |
463 | if(textLength != NULL) { |
464 | *textLength = textLen; |
465 | } |
466 | } |
467 | |
468 | U_CAPI void U_EXPORT2 |
469 | utrans_transIncrementalUChars(const UTransliterator* trans, |
470 | UChar* text, |
471 | int32_t* textLength, |
472 | int32_t textCapacity, |
473 | UTransPosition* pos, |
474 | UErrorCode* status) { |
475 | |
476 | utrans_ENTRY(status); |
477 | |
478 | if (trans == 0 || text == 0 || pos == 0) { |
479 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
480 | return; |
481 | } |
482 | |
483 | int32_t textLen = (textLength == NULL || *textLength < 0) |
484 | ? u_strlen(text) : *textLength; |
485 | // writeable alias: for this ct, len CANNOT be -1 (why?) |
486 | UnicodeString str(text, textLen, textCapacity); |
487 | |
488 | ((Transliterator*) trans)->transliterate(str, *pos, *status); |
489 | |
490 | // Copy the string buffer back to text (only if necessary) |
491 | // and fill in *neededCapacity (if neededCapacity != NULL). |
492 | textLen = str.extract(text, textCapacity, *status); |
493 | if(textLength != NULL) { |
494 | *textLength = textLen; |
495 | } |
496 | } |
497 | |
498 | U_CAPI int32_t U_EXPORT2 |
499 | utrans_toRules( const UTransliterator* trans, |
500 | UBool escapeUnprintable, |
501 | UChar* result, int32_t resultLength, |
502 | UErrorCode* status) { |
503 | utrans_ENTRY(status) 0; |
504 | if ( (result==NULL)? resultLength!=0: resultLength<0 ) { |
505 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
506 | return 0; |
507 | } |
508 | |
509 | UnicodeString res; |
510 | res.setTo(result, 0, resultLength); |
511 | ((Transliterator*) trans)->toRules(res, escapeUnprintable); |
512 | return res.extract(result, resultLength, *status); |
513 | } |
514 | |
515 | U_CAPI USet* U_EXPORT2 |
516 | utrans_getSourceSet(const UTransliterator* trans, |
517 | UBool ignoreFilter, |
518 | USet* fillIn, |
519 | UErrorCode* status) { |
520 | utrans_ENTRY(status) fillIn; |
521 | |
522 | if (fillIn == NULL) { |
523 | fillIn = uset_openEmpty(); |
524 | } |
525 | if (ignoreFilter) { |
526 | ((Transliterator*) trans)->handleGetSourceSet(*((UnicodeSet*)fillIn)); |
527 | } else { |
528 | ((Transliterator*) trans)->getSourceSet(*((UnicodeSet*)fillIn)); |
529 | } |
530 | return fillIn; |
531 | } |
532 | |
533 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
534 | |