1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * Copyright (C) 1996-2012, International Business Machines Corporation and |
6 | * others. All Rights Reserved. |
7 | ******************************************************************************* |
8 | */ |
9 | //=============================================================================== |
10 | // |
11 | // File sortkey.cpp |
12 | // |
13 | // |
14 | // |
15 | // Created by: Helena Shih |
16 | // |
17 | // Modification History: |
18 | // |
19 | // Date Name Description |
20 | // |
21 | // 6/20/97 helena Java class name change. |
22 | // 6/23/97 helena Added comments to make code more readable. |
23 | // 6/26/98 erm Canged to use byte arrays instead of UnicodeString |
24 | // 7/31/98 erm hashCode: minimum inc should be 2 not 1, |
25 | // Cleaned up operator= |
26 | // 07/12/99 helena HPUX 11 CC port. |
27 | // 03/06/01 synwee Modified compareTo, to handle the result of |
28 | // 2 string similar in contents, but one is longer |
29 | // than the other |
30 | //=============================================================================== |
31 | |
32 | #include "unicode/utypes.h" |
33 | |
34 | #if !UCONFIG_NO_COLLATION |
35 | |
36 | #include "unicode/sortkey.h" |
37 | #include "cmemory.h" |
38 | #include "uelement.h" |
39 | #include "ustr_imp.h" |
40 | |
41 | U_NAMESPACE_BEGIN |
42 | |
43 | // A hash code of kInvalidHashCode indicates that the hash code needs |
44 | // to be computed. A hash code of kEmptyHashCode is used for empty keys |
45 | // and for any key whose computed hash code is kInvalidHashCode. |
46 | static const int32_t kInvalidHashCode = 0; |
47 | static const int32_t kEmptyHashCode = 1; |
48 | // The "bogus hash code" replaces a separate fBogus flag. |
49 | static const int32_t kBogusHashCode = 2; |
50 | |
51 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) |
52 | |
53 | CollationKey::CollationKey() |
54 | : UObject(), fFlagAndLength(0), |
55 | fHashCode(kEmptyHashCode) |
56 | { |
57 | } |
58 | |
59 | // Create a collation key from a bit array. |
60 | CollationKey::CollationKey(const uint8_t* newValues, int32_t count) |
61 | : UObject(), fFlagAndLength(count), |
62 | fHashCode(kInvalidHashCode) |
63 | { |
64 | if (count < 0 || (newValues == NULL && count != 0) || |
65 | (count > getCapacity() && reallocate(count, 0) == NULL)) { |
66 | setToBogus(); |
67 | return; |
68 | } |
69 | |
70 | if (count > 0) { |
71 | uprv_memcpy(getBytes(), newValues, count); |
72 | } |
73 | } |
74 | |
75 | CollationKey::CollationKey(const CollationKey& other) |
76 | : UObject(other), fFlagAndLength(other.getLength()), |
77 | fHashCode(other.fHashCode) |
78 | { |
79 | if (other.isBogus()) |
80 | { |
81 | setToBogus(); |
82 | return; |
83 | } |
84 | |
85 | int32_t length = fFlagAndLength; |
86 | if (length > getCapacity() && reallocate(length, 0) == NULL) { |
87 | setToBogus(); |
88 | return; |
89 | } |
90 | |
91 | if (length > 0) { |
92 | uprv_memcpy(getBytes(), other.getBytes(), length); |
93 | } |
94 | } |
95 | |
96 | CollationKey::~CollationKey() |
97 | { |
98 | if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } |
99 | } |
100 | |
101 | uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { |
102 | uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity)); |
103 | if(newBytes == NULL) { return NULL; } |
104 | if(length > 0) { |
105 | uprv_memcpy(newBytes, getBytes(), length); |
106 | } |
107 | if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } |
108 | fUnion.fFields.fBytes = newBytes; |
109 | fUnion.fFields.fCapacity = newCapacity; |
110 | fFlagAndLength |= 0x80000000; |
111 | return newBytes; |
112 | } |
113 | |
114 | void CollationKey::setLength(int32_t newLength) { |
115 | // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); |
116 | fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; |
117 | fHashCode = kInvalidHashCode; |
118 | } |
119 | |
120 | // set the key to an empty state |
121 | CollationKey& |
122 | CollationKey::reset() |
123 | { |
124 | fFlagAndLength &= 0x80000000; |
125 | fHashCode = kEmptyHashCode; |
126 | |
127 | return *this; |
128 | } |
129 | |
130 | // set the key to a "bogus" or invalid state |
131 | CollationKey& |
132 | CollationKey::setToBogus() |
133 | { |
134 | fFlagAndLength &= 0x80000000; |
135 | fHashCode = kBogusHashCode; |
136 | |
137 | return *this; |
138 | } |
139 | |
140 | UBool |
141 | CollationKey::operator==(const CollationKey& source) const |
142 | { |
143 | return getLength() == source.getLength() && |
144 | (this == &source || |
145 | uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); |
146 | } |
147 | |
148 | const CollationKey& |
149 | CollationKey::operator=(const CollationKey& other) |
150 | { |
151 | if (this != &other) |
152 | { |
153 | if (other.isBogus()) |
154 | { |
155 | return setToBogus(); |
156 | } |
157 | |
158 | int32_t length = other.getLength(); |
159 | if (length > getCapacity() && reallocate(length, 0) == NULL) { |
160 | return setToBogus(); |
161 | } |
162 | if (length > 0) { |
163 | uprv_memcpy(getBytes(), other.getBytes(), length); |
164 | } |
165 | fFlagAndLength = (fFlagAndLength & 0x80000000) | length; |
166 | fHashCode = other.fHashCode; |
167 | } |
168 | |
169 | return *this; |
170 | } |
171 | |
172 | // Bitwise comparison for the collation keys. |
173 | Collator::EComparisonResult |
174 | CollationKey::compareTo(const CollationKey& target) const |
175 | { |
176 | UErrorCode errorCode = U_ZERO_ERROR; |
177 | return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode)); |
178 | } |
179 | |
180 | // Bitwise comparison for the collation keys. |
181 | UCollationResult |
182 | CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const |
183 | { |
184 | if(U_SUCCESS(status)) { |
185 | const uint8_t *src = getBytes(); |
186 | const uint8_t *tgt = target.getBytes(); |
187 | |
188 | // are we comparing the same string |
189 | if (src == tgt) |
190 | return UCOL_EQUAL; |
191 | |
192 | UCollationResult result; |
193 | |
194 | // are we comparing different lengths? |
195 | int32_t minLength = getLength(); |
196 | int32_t targetLength = target.getLength(); |
197 | if (minLength < targetLength) { |
198 | result = UCOL_LESS; |
199 | } else if (minLength == targetLength) { |
200 | result = UCOL_EQUAL; |
201 | } else { |
202 | minLength = targetLength; |
203 | result = UCOL_GREATER; |
204 | } |
205 | |
206 | if (minLength > 0) { |
207 | int diff = uprv_memcmp(src, tgt, minLength); |
208 | if (diff > 0) { |
209 | return UCOL_GREATER; |
210 | } |
211 | else |
212 | if (diff < 0) { |
213 | return UCOL_LESS; |
214 | } |
215 | } |
216 | |
217 | return result; |
218 | } else { |
219 | return UCOL_EQUAL; |
220 | } |
221 | } |
222 | |
223 | #ifdef U_USE_COLLATION_KEY_DEPRECATES |
224 | // Create a copy of the byte array. |
225 | uint8_t* |
226 | CollationKey::toByteArray(int32_t& count) const |
227 | { |
228 | uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); |
229 | |
230 | if (result == NULL) |
231 | { |
232 | count = 0; |
233 | } |
234 | else |
235 | { |
236 | count = fCount; |
237 | if (count > 0) { |
238 | uprv_memcpy(result, fBytes, fCount); |
239 | } |
240 | } |
241 | |
242 | return result; |
243 | } |
244 | #endif |
245 | |
246 | static int32_t |
247 | computeHashCode(const uint8_t *key, int32_t length) { |
248 | const char *s = reinterpret_cast<const char *>(key); |
249 | int32_t hash; |
250 | if (s == NULL || length == 0) { |
251 | hash = kEmptyHashCode; |
252 | } else { |
253 | hash = ustr_hashCharsN(s, length); |
254 | if (hash == kInvalidHashCode || hash == kBogusHashCode) { |
255 | hash = kEmptyHashCode; |
256 | } |
257 | } |
258 | return hash; |
259 | } |
260 | |
261 | int32_t |
262 | CollationKey::hashCode() const |
263 | { |
264 | // (Cribbed from UnicodeString) |
265 | // We cache the hashCode; when it becomes invalid, due to any change to the |
266 | // string, we note this by setting it to kInvalidHashCode. [LIU] |
267 | |
268 | // Note: This method is semantically const, but physically non-const. |
269 | |
270 | if (fHashCode == kInvalidHashCode) |
271 | { |
272 | fHashCode = computeHashCode(getBytes(), getLength()); |
273 | } |
274 | |
275 | return fHashCode; |
276 | } |
277 | |
278 | U_NAMESPACE_END |
279 | |
280 | U_CAPI int32_t U_EXPORT2 |
281 | ucol_keyHashCode(const uint8_t *key, |
282 | int32_t length) |
283 | { |
284 | return icu::computeHashCode(key, length); |
285 | } |
286 | |
287 | #endif /* #if !UCONFIG_NO_COLLATION */ |
288 | |