1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2013-2014, International Business Machines
6* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* collationrootelements.cpp
9*
10* created on: 2013mar05
11* created by: Markus W. Scherer
12*/
13
14#include "unicode/utypes.h"
15
16#if !UCONFIG_NO_COLLATION
17
18#include "collation.h"
19#include "collationrootelements.h"
20#include "uassert.h"
21
22U_NAMESPACE_BEGIN
23
24int64_t
25CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const {
26 if(p == 0) { return 0; }
27 U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]);
28 int32_t index = findP(p);
29 uint32_t q = elements[index];
30 uint32_t secTer;
31 if(p == (q & 0xffffff00)) {
32 // p == elements[index] is a root primary. Find the CE before it.
33 // We must not be in a primary range.
34 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
35 secTer = elements[index - 1];
36 if((secTer & SEC_TER_DELTA_FLAG) == 0) {
37 // Primary CE just before p.
38 p = secTer & 0xffffff00;
39 secTer = Collation::COMMON_SEC_AND_TER_CE;
40 } else {
41 // secTer = last secondary & tertiary for the previous primary
42 index -= 2;
43 for(;;) {
44 p = elements[index];
45 if((p & SEC_TER_DELTA_FLAG) == 0) {
46 p &= 0xffffff00;
47 break;
48 }
49 --index;
50 }
51 }
52 } else {
53 // p > elements[index] which is the previous primary.
54 // Find the last secondary & tertiary weights for it.
55 p = q & 0xffffff00;
56 secTer = Collation::COMMON_SEC_AND_TER_CE;
57 for(;;) {
58 q = elements[++index];
59 if((q & SEC_TER_DELTA_FLAG) == 0) {
60 // We must not be in a primary range.
61 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
62 break;
63 }
64 secTer = q;
65 }
66 }
67 return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
68}
69
70int64_t
71CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const {
72 if(p == 0) { return 0; }
73 int32_t index = findP(p);
74 if(p != (elements[index] & 0xffffff00)) {
75 for(;;) {
76 p = elements[++index];
77 if((p & SEC_TER_DELTA_FLAG) == 0) {
78 // First primary after p. We must not be in a primary range.
79 U_ASSERT((p & PRIMARY_STEP_MASK) == 0);
80 break;
81 }
82 }
83 }
84 // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
85 return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE;
86}
87
88uint32_t
89CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const {
90 int32_t index = findPrimary(p);
91 int32_t step;
92 uint32_t q = elements[index];
93 if(p == (q & 0xffffff00)) {
94 // Found p itself. Return the previous primary.
95 // See if p is at the end of a previous range.
96 step = (int32_t)q & PRIMARY_STEP_MASK;
97 if(step == 0) {
98 // p is not at the end of a range. Look for the previous primary.
99 do {
100 p = elements[--index];
101 } while((p & SEC_TER_DELTA_FLAG) != 0);
102 return p & 0xffffff00;
103 }
104 } else {
105 // p is in a range, and not at the start.
106 uint32_t nextElement = elements[index + 1];
107 U_ASSERT(isEndOfPrimaryRange(nextElement));
108 step = (int32_t)nextElement & PRIMARY_STEP_MASK;
109 }
110 // Return the previous range primary.
111 if((p & 0xffff) == 0) {
112 return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step);
113 } else {
114 return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step);
115 }
116}
117
118uint32_t
119CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const {
120 int32_t index;
121 uint32_t previousSec, sec;
122 if(p == 0) {
123 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
124 // Gap at the beginning of the secondary CE range.
125 previousSec = 0;
126 sec = elements[index] >> 16;
127 } else {
128 index = findPrimary(p) + 1;
129 previousSec = Collation::BEFORE_WEIGHT16;
130 sec = getFirstSecTerForPrimary(index) >> 16;
131 }
132 U_ASSERT(s >= sec);
133 while(s > sec) {
134 previousSec = sec;
135 U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
136 sec = elements[index++] >> 16;
137 }
138 U_ASSERT(sec == s);
139 return previousSec;
140}
141
142uint32_t
143CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const {
144 U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0);
145 int32_t index;
146 uint32_t previousTer, secTer;
147 if(p == 0) {
148 if(s == 0) {
149 index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
150 // Gap at the beginning of the tertiary CE range.
151 previousTer = 0;
152 } else {
153 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
154 previousTer = Collation::BEFORE_WEIGHT16;
155 }
156 secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
157 } else {
158 index = findPrimary(p) + 1;
159 previousTer = Collation::BEFORE_WEIGHT16;
160 secTer = getFirstSecTerForPrimary(index);
161 }
162 uint32_t st = (s << 16) | t;
163 while(st > secTer) {
164 if((secTer >> 16) == s) { previousTer = secTer; }
165 U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
166 secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
167 }
168 U_ASSERT(secTer == st);
169 return previousTer & 0xffff;
170}
171
172uint32_t
173CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const {
174 U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1]));
175 uint32_t q = elements[++index];
176 int32_t step;
177 if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) {
178 // Return the next primary in this range.
179 if((p & 0xffff) == 0) {
180 return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step);
181 } else {
182 return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step);
183 }
184 } else {
185 // Return the next primary in the list.
186 while((q & SEC_TER_DELTA_FLAG) != 0) {
187 q = elements[++index];
188 }
189 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
190 return q;
191 }
192}
193
194uint32_t
195CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const {
196 uint32_t secTer;
197 uint32_t secLimit;
198 if(index == 0) {
199 // primary = 0
200 U_ASSERT(s != 0);
201 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
202 secTer = elements[index];
203 // Gap at the end of the secondary CE range.
204 secLimit = 0x10000;
205 } else {
206 U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
207 secTer = getFirstSecTerForPrimary(index + 1);
208 // If this is an explicit sec/ter unit, then it will be read once more.
209 // Gap for secondaries of primary CEs.
210 secLimit = getSecondaryBoundary();
211 }
212 for(;;) {
213 uint32_t sec = secTer >> 16;
214 if(sec > s) { return sec; }
215 secTer = elements[++index];
216 if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
217 }
218}
219
220uint32_t
221CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const {
222 uint32_t secTer;
223 uint32_t terLimit;
224 if(index == 0) {
225 // primary = 0
226 if(s == 0) {
227 U_ASSERT(t != 0);
228 index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
229 // Gap at the end of the tertiary CE range.
230 terLimit = 0x4000;
231 } else {
232 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
233 // Gap for tertiaries of primary/secondary CEs.
234 terLimit = getTertiaryBoundary();
235 }
236 secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
237 } else {
238 U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
239 secTer = getFirstSecTerForPrimary(index + 1);
240 // If this is an explicit sec/ter unit, then it will be read once more.
241 terLimit = getTertiaryBoundary();
242 }
243 uint32_t st = (s << 16) | t;
244 for(;;) {
245 if(secTer > st) {
246 U_ASSERT((secTer >> 16) == s);
247 return secTer & 0xffff;
248 }
249 secTer = elements[++index];
250 // No tertiary greater than t for this primary+secondary.
251 if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
252 secTer &= ~SEC_TER_DELTA_FLAG;
253 }
254}
255
256uint32_t
257CollationRootElements::getFirstSecTerForPrimary(int32_t index) const {
258 uint32_t secTer = elements[index];
259 if((secTer & SEC_TER_DELTA_FLAG) == 0) {
260 // No sec/ter delta.
261 return Collation::COMMON_SEC_AND_TER_CE;
262 }
263 secTer &= ~SEC_TER_DELTA_FLAG;
264 if(secTer > Collation::COMMON_SEC_AND_TER_CE) {
265 // Implied sec/ter.
266 return Collation::COMMON_SEC_AND_TER_CE;
267 }
268 // Explicit sec/ter below common/common.
269 return secTer;
270}
271
272int32_t
273CollationRootElements::findPrimary(uint32_t p) const {
274 // Requirement: p must occur as a root primary.
275 U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary
276 int32_t index = findP(p);
277 // If p is in a range, then we just assume that p is an actual primary in this range.
278 // (Too cumbersome/expensive to check.)
279 // Otherwise, it must be an exact match.
280 U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00));
281 return index;
282}
283
284int32_t
285CollationRootElements::findP(uint32_t p) const {
286 // p need not occur as a root primary.
287 // For example, it might be a reordering group boundary.
288 U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE);
289 // modified binary search
290 int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX];
291 U_ASSERT(p >= elements[start]);
292 int32_t limit = length - 1;
293 U_ASSERT(elements[limit] >= PRIMARY_SENTINEL);
294 U_ASSERT(p < elements[limit]);
295 while((start + 1) < limit) {
296 // Invariant: elements[start] and elements[limit] are primaries,
297 // and elements[start]<=p<=elements[limit].
298 int32_t i = (start + limit) / 2;
299 uint32_t q = elements[i];
300 if((q & SEC_TER_DELTA_FLAG) != 0) {
301 // Find the next primary.
302 int32_t j = i + 1;
303 for(;;) {
304 if(j == limit) { break; }
305 q = elements[j];
306 if((q & SEC_TER_DELTA_FLAG) == 0) {
307 i = j;
308 break;
309 }
310 ++j;
311 }
312 if((q & SEC_TER_DELTA_FLAG) != 0) {
313 // Find the preceding primary.
314 j = i - 1;
315 for(;;) {
316 if(j == start) { break; }
317 q = elements[j];
318 if((q & SEC_TER_DELTA_FLAG) == 0) {
319 i = j;
320 break;
321 }
322 --j;
323 }
324 if((q & SEC_TER_DELTA_FLAG) != 0) {
325 // No primary between start and limit.
326 break;
327 }
328 }
329 }
330 if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary.
331 limit = i;
332 } else {
333 start = i;
334 }
335 }
336 return start;
337}
338
339U_NAMESPACE_END
340
341#endif // !UCONFIG_NO_COLLATION
342