1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 1996-2015, International Business Machines
6* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* file name: ucol.cpp
9* encoding: UTF-8
10* tab size: 8 (not used)
11* indentation:4
12*
13* Modification history
14* Date Name Comments
15* 1996-1999 various members of ICU team maintained C API for collation framework
16* 02/16/2001 synwee Added internal method getPrevSpecialCE
17* 03/01/2001 synwee Added maxexpansion functionality.
18* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
19* 2012-2014 markus Rewritten in C++ again.
20*/
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_COLLATION
25
26#include "unicode/coll.h"
27#include "unicode/tblcoll.h"
28#include "unicode/bytestream.h"
29#include "unicode/coleitr.h"
30#include "unicode/ucoleitr.h"
31#include "unicode/ustring.h"
32#include "cmemory.h"
33#include "collation.h"
34#include "cstring.h"
35#include "putilimp.h"
36#include "uassert.h"
37#include "utracimp.h"
38
39U_NAMESPACE_USE
40
41U_CAPI UCollator* U_EXPORT2
42ucol_openBinary(const uint8_t *bin, int32_t length,
43 const UCollator *base,
44 UErrorCode *status)
45{
46 if(U_FAILURE(*status)) { return NULL; }
47 RuleBasedCollator *coll = new RuleBasedCollator(
48 bin, length,
49 RuleBasedCollator::rbcFromUCollator(base),
50 *status);
51 if(coll == NULL) {
52 *status = U_MEMORY_ALLOCATION_ERROR;
53 return NULL;
54 }
55 if(U_FAILURE(*status)) {
56 delete coll;
57 return NULL;
58 }
59 return coll->toUCollator();
60}
61
62U_CAPI int32_t U_EXPORT2
63ucol_cloneBinary(const UCollator *coll,
64 uint8_t *buffer, int32_t capacity,
65 UErrorCode *status)
66{
67 if(U_FAILURE(*status)) {
68 return 0;
69 }
70 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
71 if(rbc == NULL && coll != NULL) {
72 *status = U_UNSUPPORTED_ERROR;
73 return 0;
74 }
75 return rbc->cloneBinary(buffer, capacity, *status);
76}
77
78U_CAPI UCollator* U_EXPORT2
79ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferSize, UErrorCode *status)
80{
81 if (status == NULL || U_FAILURE(*status)){
82 return NULL;
83 }
84 if (coll == NULL) {
85 *status = U_ILLEGAL_ARGUMENT_ERROR;
86 return NULL;
87 }
88 if (pBufferSize != NULL) {
89 int32_t inputSize = *pBufferSize;
90 *pBufferSize = 1;
91 if (inputSize == 0) {
92 return NULL; // preflighting for deprecated functionality
93 }
94 }
95 Collator *newColl = Collator::fromUCollator(coll)->clone();
96 if (newColl == NULL) {
97 *status = U_MEMORY_ALLOCATION_ERROR;
98 return nullptr;
99 } else {
100 *status = U_SAFECLONE_ALLOCATED_WARNING;
101 }
102 return newColl->toUCollator();
103}
104
105U_CAPI void U_EXPORT2
106ucol_close(UCollator *coll)
107{
108 UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
109 UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
110 if(coll != NULL) {
111 delete Collator::fromUCollator(coll);
112 }
113 UTRACE_EXIT();
114}
115
116U_CAPI int32_t U_EXPORT2
117ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
118 const uint8_t *src2, int32_t src2Length,
119 uint8_t *dest, int32_t destCapacity) {
120 /* check arguments */
121 if( src1==NULL || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) ||
122 src2==NULL || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) ||
123 destCapacity<0 || (destCapacity>0 && dest==NULL)
124 ) {
125 /* error, attempt to write a zero byte and return 0 */
126 if(dest!=NULL && destCapacity>0) {
127 *dest=0;
128 }
129 return 0;
130 }
131
132 /* check lengths and capacity */
133 if(src1Length<0) {
134 src1Length=(int32_t)uprv_strlen((const char *)src1)+1;
135 }
136 if(src2Length<0) {
137 src2Length=(int32_t)uprv_strlen((const char *)src2)+1;
138 }
139
140 int32_t destLength=src1Length+src2Length;
141 if(destLength>destCapacity) {
142 /* the merged sort key does not fit into the destination */
143 return destLength;
144 }
145
146 /* merge the sort keys with the same number of levels */
147 uint8_t *p=dest;
148 for(;;) {
149 /* copy level from src1 not including 00 or 01 */
150 uint8_t b;
151 while((b=*src1)>=2) {
152 ++src1;
153 *p++=b;
154 }
155
156 /* add a 02 merge separator */
157 *p++=2;
158
159 /* copy level from src2 not including 00 or 01 */
160 while((b=*src2)>=2) {
161 ++src2;
162 *p++=b;
163 }
164
165 /* if both sort keys have another level, then add a 01 level separator and continue */
166 if(*src1==1 && *src2==1) {
167 ++src1;
168 ++src2;
169 *p++=1;
170 } else {
171 break;
172 }
173 }
174
175 /*
176 * here, at least one sort key is finished now, but the other one
177 * might have some contents left from containing more levels;
178 * that contents is just appended to the result
179 */
180 if(*src1!=0) {
181 /* src1 is not finished, therefore *src2==0, and src1 is appended */
182 src2=src1;
183 }
184 /* append src2, "the other, unfinished sort key" */
185 while((*p++=*src2++)!=0) {}
186
187 /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */
188 return (int32_t)(p-dest);
189}
190
191U_CAPI int32_t U_EXPORT2
192ucol_getSortKey(const UCollator *coll,
193 const UChar *source,
194 int32_t sourceLength,
195 uint8_t *result,
196 int32_t resultLength)
197{
198 UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
199 if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
200 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source,
201 ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength));
202 }
203
204 int32_t keySize = Collator::fromUCollator(coll)->
205 getSortKey(source, sourceLength, result, resultLength);
206
207 UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
208 UTRACE_EXIT_VALUE(keySize);
209 return keySize;
210}
211
212U_CAPI int32_t U_EXPORT2
213ucol_nextSortKeyPart(const UCollator *coll,
214 UCharIterator *iter,
215 uint32_t state[2],
216 uint8_t *dest, int32_t count,
217 UErrorCode *status)
218{
219 /* error checking */
220 if(status==NULL || U_FAILURE(*status)) {
221 return 0;
222 }
223 UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
224 UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
225 coll, iter, state[0], state[1], dest, count);
226
227 int32_t i = Collator::fromUCollator(coll)->
228 internalNextSortKeyPart(iter, state, dest, count, *status);
229
230 // Return number of meaningful sortkey bytes.
231 UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
232 dest,i, state[0], state[1]);
233 UTRACE_EXIT_VALUE_STATUS(i, *status);
234 return i;
235}
236
237/**
238 * Produce a bound for a given sortkey and a number of levels.
239 */
240U_CAPI int32_t U_EXPORT2
241ucol_getBound(const uint8_t *source,
242 int32_t sourceLength,
243 UColBoundMode boundType,
244 uint32_t noOfLevels,
245 uint8_t *result,
246 int32_t resultLength,
247 UErrorCode *status)
248{
249 // consistency checks
250 if(status == NULL || U_FAILURE(*status)) {
251 return 0;
252 }
253 if(source == NULL) {
254 *status = U_ILLEGAL_ARGUMENT_ERROR;
255 return 0;
256 }
257
258 int32_t sourceIndex = 0;
259 // Scan the string until we skip enough of the key OR reach the end of the key
260 do {
261 sourceIndex++;
262 if(source[sourceIndex] == Collation::LEVEL_SEPARATOR_BYTE) {
263 noOfLevels--;
264 }
265 } while (noOfLevels > 0
266 && (source[sourceIndex] != 0 || sourceIndex < sourceLength));
267
268 if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
269 && noOfLevels > 0) {
270 *status = U_SORT_KEY_TOO_SHORT_WARNING;
271 }
272
273
274 // READ ME: this code assumes that the values for boundType
275 // enum will not changes. They are set so that the enum value
276 // corresponds to the number of extra bytes each bound type
277 // needs.
278 if(result != NULL && resultLength >= sourceIndex+boundType) {
279 uprv_memcpy(result, source, sourceIndex);
280 switch(boundType) {
281 // Lower bound just gets terminated. No extra bytes
282 case UCOL_BOUND_LOWER: // = 0
283 break;
284 // Upper bound needs one extra byte
285 case UCOL_BOUND_UPPER: // = 1
286 result[sourceIndex++] = 2;
287 break;
288 // Upper long bound needs two extra bytes
289 case UCOL_BOUND_UPPER_LONG: // = 2
290 result[sourceIndex++] = 0xFF;
291 result[sourceIndex++] = 0xFF;
292 break;
293 default:
294 *status = U_ILLEGAL_ARGUMENT_ERROR;
295 return 0;
296 }
297 result[sourceIndex++] = 0;
298
299 return sourceIndex;
300 } else {
301 return sourceIndex+boundType+1;
302 }
303}
304
305U_CAPI void U_EXPORT2
306ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode) {
307 if(U_FAILURE(*pErrorCode)) { return; }
308 Collator::fromUCollator(coll)->setMaxVariable(group, *pErrorCode);
309}
310
311U_CAPI UColReorderCode U_EXPORT2
312ucol_getMaxVariable(const UCollator *coll) {
313 return Collator::fromUCollator(coll)->getMaxVariable();
314}
315
316U_CAPI uint32_t U_EXPORT2
317ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) {
318 if(U_FAILURE(*status) || coll == NULL) {
319 return 0;
320 }
321 return Collator::fromUCollator(coll)->setVariableTop(varTop, len, *status);
322}
323
324U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) {
325 if(U_FAILURE(*status) || coll == NULL) {
326 return 0;
327 }
328 return Collator::fromUCollator(coll)->getVariableTop(*status);
329}
330
331U_CAPI void U_EXPORT2
332ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) {
333 if(U_FAILURE(*status) || coll == NULL) {
334 return;
335 }
336 Collator::fromUCollator(coll)->setVariableTop(varTop, *status);
337}
338
339U_CAPI void U_EXPORT2
340ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) {
341 if(U_FAILURE(*status) || coll == NULL) {
342 return;
343 }
344
345 Collator::fromUCollator(coll)->setAttribute(attr, value, *status);
346}
347
348U_CAPI UColAttributeValue U_EXPORT2
349ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) {
350 if(U_FAILURE(*status) || coll == NULL) {
351 return UCOL_DEFAULT;
352 }
353
354 return Collator::fromUCollator(coll)->getAttribute(attr, *status);
355}
356
357U_CAPI void U_EXPORT2
358ucol_setStrength( UCollator *coll,
359 UCollationStrength strength)
360{
361 UErrorCode status = U_ZERO_ERROR;
362 ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
363}
364
365U_CAPI UCollationStrength U_EXPORT2
366ucol_getStrength(const UCollator *coll)
367{
368 UErrorCode status = U_ZERO_ERROR;
369 return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
370}
371
372U_CAPI int32_t U_EXPORT2
373ucol_getReorderCodes(const UCollator *coll,
374 int32_t *dest,
375 int32_t destCapacity,
376 UErrorCode *status) {
377 if (U_FAILURE(*status)) {
378 return 0;
379 }
380
381 return Collator::fromUCollator(coll)->getReorderCodes(dest, destCapacity, *status);
382}
383
384U_CAPI void U_EXPORT2
385ucol_setReorderCodes(UCollator* coll,
386 const int32_t* reorderCodes,
387 int32_t reorderCodesLength,
388 UErrorCode *status) {
389 if (U_FAILURE(*status)) {
390 return;
391 }
392
393 Collator::fromUCollator(coll)->setReorderCodes(reorderCodes, reorderCodesLength, *status);
394}
395
396U_CAPI int32_t U_EXPORT2
397ucol_getEquivalentReorderCodes(int32_t reorderCode,
398 int32_t* dest,
399 int32_t destCapacity,
400 UErrorCode *pErrorCode) {
401 return Collator::getEquivalentReorderCodes(reorderCode, dest, destCapacity, *pErrorCode);
402}
403
404U_CAPI void U_EXPORT2
405ucol_getVersion(const UCollator* coll,
406 UVersionInfo versionInfo)
407{
408 Collator::fromUCollator(coll)->getVersion(versionInfo);
409}
410
411U_CAPI UCollationResult U_EXPORT2
412ucol_strcollIter( const UCollator *coll,
413 UCharIterator *sIter,
414 UCharIterator *tIter,
415 UErrorCode *status)
416{
417 if(!status || U_FAILURE(*status)) {
418 return UCOL_EQUAL;
419 }
420
421 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
422 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
423
424 if(sIter == NULL || tIter == NULL || coll == NULL) {
425 *status = U_ILLEGAL_ARGUMENT_ERROR;
426 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
427 return UCOL_EQUAL;
428 }
429
430 UCollationResult result = Collator::fromUCollator(coll)->compare(*sIter, *tIter, *status);
431
432 UTRACE_EXIT_VALUE_STATUS(result, *status);
433 return result;
434}
435
436
437/* */
438/* ucol_strcoll Main public API string comparison function */
439/* */
440U_CAPI UCollationResult U_EXPORT2
441ucol_strcoll( const UCollator *coll,
442 const UChar *source,
443 int32_t sourceLength,
444 const UChar *target,
445 int32_t targetLength)
446{
447 UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
448 if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
449 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
450 UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
451 UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
452 }
453
454 UErrorCode status = U_ZERO_ERROR;
455 UCollationResult returnVal = Collator::fromUCollator(coll)->
456 compare(source, sourceLength, target, targetLength, status);
457 UTRACE_EXIT_VALUE_STATUS(returnVal, status);
458 return returnVal;
459}
460
461U_CAPI UCollationResult U_EXPORT2
462ucol_strcollUTF8(
463 const UCollator *coll,
464 const char *source,
465 int32_t sourceLength,
466 const char *target,
467 int32_t targetLength,
468 UErrorCode *status)
469{
470 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8);
471 if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
472 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
473 UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength);
474 UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength);
475 }
476
477 if (U_FAILURE(*status)) {
478 /* do nothing */
479 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
480 return UCOL_EQUAL;
481 }
482
483 UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareUTF8(
484 source, sourceLength, target, targetLength, *status);
485 UTRACE_EXIT_VALUE_STATUS(returnVal, *status);
486 return returnVal;
487}
488
489
490/* convenience function for comparing strings */
491U_CAPI UBool U_EXPORT2
492ucol_greater( const UCollator *coll,
493 const UChar *source,
494 int32_t sourceLength,
495 const UChar *target,
496 int32_t targetLength)
497{
498 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
499 == UCOL_GREATER);
500}
501
502/* convenience function for comparing strings */
503U_CAPI UBool U_EXPORT2
504ucol_greaterOrEqual( const UCollator *coll,
505 const UChar *source,
506 int32_t sourceLength,
507 const UChar *target,
508 int32_t targetLength)
509{
510 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
511 != UCOL_LESS);
512}
513
514/* convenience function for comparing strings */
515U_CAPI UBool U_EXPORT2
516ucol_equal( const UCollator *coll,
517 const UChar *source,
518 int32_t sourceLength,
519 const UChar *target,
520 int32_t targetLength)
521{
522 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
523 == UCOL_EQUAL);
524}
525
526U_CAPI void U_EXPORT2
527ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
528 const Collator *c = Collator::fromUCollator(coll);
529 if(c != NULL) {
530 UVersionInfo v;
531 c->getVersion(v);
532 // Note: This is tied to how the current implementation encodes the UCA version
533 // in the overall getVersion().
534 // Alternatively, we could load the root collator and get at lower-level data from there.
535 // Either way, it will reflect the input collator's UCA version only
536 // if it is a known implementation.
537 // It would be cleaner to make this a virtual Collator method.
538 info[0] = v[1] >> 3;
539 info[1] = v[1] & 7;
540 info[2] = v[2] >> 6;
541 info[3] = 0;
542 }
543}
544
545U_CAPI const UChar * U_EXPORT2
546ucol_getRules(const UCollator *coll, int32_t *length) {
547 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
548 // OK to crash if coll==NULL: We do not want to check "this" pointers.
549 if(rbc != NULL || coll == NULL) {
550 const UnicodeString &rules = rbc->getRules();
551 U_ASSERT(rules.getBuffer()[rules.length()] == 0);
552 *length = rules.length();
553 return rules.getBuffer();
554 }
555 static const UChar _NUL = 0;
556 *length = 0;
557 return &_NUL;
558}
559
560U_CAPI int32_t U_EXPORT2
561ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
562 UnicodeString rules;
563 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
564 if(rbc != NULL || coll == NULL) {
565 rbc->getRules(delta, rules);
566 }
567 if(buffer != NULL && bufferLen > 0) {
568 UErrorCode errorCode = U_ZERO_ERROR;
569 return rules.extract(buffer, bufferLen, errorCode);
570 } else {
571 return rules.length();
572 }
573}
574
575U_CAPI const char * U_EXPORT2
576ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
577 return ucol_getLocaleByType(coll, type, status);
578}
579
580U_CAPI const char * U_EXPORT2
581ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
582 if(U_FAILURE(*status)) {
583 return NULL;
584 }
585 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
586 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
587
588 const char *result;
589 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
590 if(rbc == NULL && coll != NULL) {
591 *status = U_UNSUPPORTED_ERROR;
592 result = NULL;
593 } else {
594 result = rbc->internalGetLocaleID(type, *status);
595 }
596
597 UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
598 UTRACE_EXIT_STATUS(*status);
599 return result;
600}
601
602U_CAPI USet * U_EXPORT2
603ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) {
604 if(U_FAILURE(*status)) {
605 return NULL;
606 }
607 UnicodeSet *set = Collator::fromUCollator(coll)->getTailoredSet(*status);
608 if(U_FAILURE(*status)) {
609 delete set;
610 return NULL;
611 }
612 return set->toUSet();
613}
614
615U_CAPI UBool U_EXPORT2
616ucol_equals(const UCollator *source, const UCollator *target) {
617 return source == target ||
618 (*Collator::fromUCollator(source)) == (*Collator::fromUCollator(target));
619}
620
621#endif /* #if !UCONFIG_NO_COLLATION */
622