1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 1996-2012, International Business Machines Corporation and
6* others. All Rights Reserved.
7*******************************************************************************
8*/
9//===============================================================================
10//
11// File sortkey.cpp
12//
13//
14//
15// Created by: Helena Shih
16//
17// Modification History:
18//
19// Date Name Description
20//
21// 6/20/97 helena Java class name change.
22// 6/23/97 helena Added comments to make code more readable.
23// 6/26/98 erm Canged to use byte arrays instead of UnicodeString
24// 7/31/98 erm hashCode: minimum inc should be 2 not 1,
25// Cleaned up operator=
26// 07/12/99 helena HPUX 11 CC port.
27// 03/06/01 synwee Modified compareTo, to handle the result of
28// 2 string similar in contents, but one is longer
29// than the other
30//===============================================================================
31
32#include "unicode/utypes.h"
33
34#if !UCONFIG_NO_COLLATION
35
36#include "unicode/sortkey.h"
37#include "cmemory.h"
38#include "uelement.h"
39#include "ustr_imp.h"
40
41U_NAMESPACE_BEGIN
42
43// A hash code of kInvalidHashCode indicates that the hash code needs
44// to be computed. A hash code of kEmptyHashCode is used for empty keys
45// and for any key whose computed hash code is kInvalidHashCode.
46static const int32_t kInvalidHashCode = 0;
47static const int32_t kEmptyHashCode = 1;
48// The "bogus hash code" replaces a separate fBogus flag.
49static const int32_t kBogusHashCode = 2;
50
51UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
52
53CollationKey::CollationKey()
54 : UObject(), fFlagAndLength(0),
55 fHashCode(kEmptyHashCode)
56{
57}
58
59// Create a collation key from a bit array.
60CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
61 : UObject(), fFlagAndLength(count),
62 fHashCode(kInvalidHashCode)
63{
64 if (count < 0 || (newValues == NULL && count != 0) ||
65 (count > getCapacity() && reallocate(count, 0) == NULL)) {
66 setToBogus();
67 return;
68 }
69
70 if (count > 0) {
71 uprv_memcpy(getBytes(), newValues, count);
72 }
73}
74
75CollationKey::CollationKey(const CollationKey& other)
76 : UObject(other), fFlagAndLength(other.getLength()),
77 fHashCode(other.fHashCode)
78{
79 if (other.isBogus())
80 {
81 setToBogus();
82 return;
83 }
84
85 int32_t length = fFlagAndLength;
86 if (length > getCapacity() && reallocate(length, 0) == NULL) {
87 setToBogus();
88 return;
89 }
90
91 if (length > 0) {
92 uprv_memcpy(getBytes(), other.getBytes(), length);
93 }
94}
95
96CollationKey::~CollationKey()
97{
98 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
99}
100
101uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
102 uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
103 if(newBytes == NULL) { return NULL; }
104 if(length > 0) {
105 uprv_memcpy(newBytes, getBytes(), length);
106 }
107 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
108 fUnion.fFields.fBytes = newBytes;
109 fUnion.fFields.fCapacity = newCapacity;
110 fFlagAndLength |= 0x80000000;
111 return newBytes;
112}
113
114void CollationKey::setLength(int32_t newLength) {
115 // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
116 fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
117 fHashCode = kInvalidHashCode;
118}
119
120// set the key to an empty state
121CollationKey&
122CollationKey::reset()
123{
124 fFlagAndLength &= 0x80000000;
125 fHashCode = kEmptyHashCode;
126
127 return *this;
128}
129
130// set the key to a "bogus" or invalid state
131CollationKey&
132CollationKey::setToBogus()
133{
134 fFlagAndLength &= 0x80000000;
135 fHashCode = kBogusHashCode;
136
137 return *this;
138}
139
140UBool
141CollationKey::operator==(const CollationKey& source) const
142{
143 return getLength() == source.getLength() &&
144 (this == &source ||
145 uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
146}
147
148const CollationKey&
149CollationKey::operator=(const CollationKey& other)
150{
151 if (this != &other)
152 {
153 if (other.isBogus())
154 {
155 return setToBogus();
156 }
157
158 int32_t length = other.getLength();
159 if (length > getCapacity() && reallocate(length, 0) == NULL) {
160 return setToBogus();
161 }
162 if (length > 0) {
163 uprv_memcpy(getBytes(), other.getBytes(), length);
164 }
165 fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
166 fHashCode = other.fHashCode;
167 }
168
169 return *this;
170}
171
172// Bitwise comparison for the collation keys.
173Collator::EComparisonResult
174CollationKey::compareTo(const CollationKey& target) const
175{
176 UErrorCode errorCode = U_ZERO_ERROR;
177 return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
178}
179
180// Bitwise comparison for the collation keys.
181UCollationResult
182CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
183{
184 if(U_SUCCESS(status)) {
185 const uint8_t *src = getBytes();
186 const uint8_t *tgt = target.getBytes();
187
188 // are we comparing the same string
189 if (src == tgt)
190 return UCOL_EQUAL;
191
192 UCollationResult result;
193
194 // are we comparing different lengths?
195 int32_t minLength = getLength();
196 int32_t targetLength = target.getLength();
197 if (minLength < targetLength) {
198 result = UCOL_LESS;
199 } else if (minLength == targetLength) {
200 result = UCOL_EQUAL;
201 } else {
202 minLength = targetLength;
203 result = UCOL_GREATER;
204 }
205
206 if (minLength > 0) {
207 int diff = uprv_memcmp(src, tgt, minLength);
208 if (diff > 0) {
209 return UCOL_GREATER;
210 }
211 else
212 if (diff < 0) {
213 return UCOL_LESS;
214 }
215 }
216
217 return result;
218 } else {
219 return UCOL_EQUAL;
220 }
221}
222
223#ifdef U_USE_COLLATION_KEY_DEPRECATES
224// Create a copy of the byte array.
225uint8_t*
226CollationKey::toByteArray(int32_t& count) const
227{
228 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
229
230 if (result == NULL)
231 {
232 count = 0;
233 }
234 else
235 {
236 count = fCount;
237 if (count > 0) {
238 uprv_memcpy(result, fBytes, fCount);
239 }
240 }
241
242 return result;
243}
244#endif
245
246static int32_t
247computeHashCode(const uint8_t *key, int32_t length) {
248 const char *s = reinterpret_cast<const char *>(key);
249 int32_t hash;
250 if (s == NULL || length == 0) {
251 hash = kEmptyHashCode;
252 } else {
253 hash = ustr_hashCharsN(s, length);
254 if (hash == kInvalidHashCode || hash == kBogusHashCode) {
255 hash = kEmptyHashCode;
256 }
257 }
258 return hash;
259}
260
261int32_t
262CollationKey::hashCode() const
263{
264 // (Cribbed from UnicodeString)
265 // We cache the hashCode; when it becomes invalid, due to any change to the
266 // string, we note this by setting it to kInvalidHashCode. [LIU]
267
268 // Note: This method is semantically const, but physically non-const.
269
270 if (fHashCode == kInvalidHashCode)
271 {
272 fHashCode = computeHashCode(getBytes(), getLength());
273 }
274
275 return fHashCode;
276}
277
278U_NAMESPACE_END
279
280U_CAPI int32_t U_EXPORT2
281ucol_keyHashCode(const uint8_t *key,
282 int32_t length)
283{
284 return icu::computeHashCode(key, length);
285}
286
287#endif /* #if !UCONFIG_NO_COLLATION */
288