1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * |
6 | * Copyright (C) 2003-2013, International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ******************************************************************************* |
10 | * file name: uarrsort.c |
11 | * encoding: UTF-8 |
12 | * tab size: 8 (not used) |
13 | * indentation:4 |
14 | * |
15 | * created on: 2003aug04 |
16 | * created by: Markus W. Scherer |
17 | * |
18 | * Internal function for sorting arrays. |
19 | */ |
20 | |
21 | #include "unicode/utypes.h" |
22 | #include "cmemory.h" |
23 | #include "uarrsort.h" |
24 | |
25 | enum { |
26 | /** |
27 | * "from Knuth" |
28 | * |
29 | * A binary search over 8 items performs 4 comparisons: |
30 | * log2(8)=3 to subdivide, +1 to check for equality. |
31 | * A linear search over 8 items on average also performs 4 comparisons. |
32 | */ |
33 | MIN_QSORT=9, |
34 | STACK_ITEM_SIZE=200 |
35 | }; |
36 | |
37 | static constexpr int32_t sizeInMaxAlignTs(int32_t sizeInBytes) { |
38 | return (sizeInBytes + sizeof(max_align_t) - 1) / sizeof(max_align_t); |
39 | } |
40 | |
41 | /* UComparator convenience implementations ---------------------------------- */ |
42 | |
43 | U_CAPI int32_t U_EXPORT2 |
44 | uprv_uint16Comparator(const void *context, const void *left, const void *right) { |
45 | (void)context; |
46 | return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right; |
47 | } |
48 | |
49 | U_CAPI int32_t U_EXPORT2 |
50 | uprv_int32Comparator(const void *context, const void *left, const void *right) { |
51 | (void)context; |
52 | return *(const int32_t *)left - *(const int32_t *)right; |
53 | } |
54 | |
55 | U_CAPI int32_t U_EXPORT2 |
56 | uprv_uint32Comparator(const void *context, const void *left, const void *right) { |
57 | (void)context; |
58 | uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right; |
59 | |
60 | /* compare directly because (l-r) would overflow the int32_t result */ |
61 | if(l<r) { |
62 | return -1; |
63 | } else if(l==r) { |
64 | return 0; |
65 | } else /* l>r */ { |
66 | return 1; |
67 | } |
68 | } |
69 | |
70 | /* Insertion sort using binary search --------------------------------------- */ |
71 | |
72 | U_CAPI int32_t U_EXPORT2 |
73 | uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize, |
74 | UComparator *cmp, const void *context) { |
75 | int32_t start=0; |
76 | UBool found=FALSE; |
77 | |
78 | /* Binary search until we get down to a tiny sub-array. */ |
79 | while((limit-start)>=MIN_QSORT) { |
80 | int32_t i=(start+limit)/2; |
81 | int32_t diff=cmp(context, item, array+i*itemSize); |
82 | if(diff==0) { |
83 | /* |
84 | * Found the item. We look for the *last* occurrence of such |
85 | * an item, for stable sorting. |
86 | * If we knew that there will be only few equal items, |
87 | * we could break now and enter the linear search. |
88 | * However, if there are many equal items, then it should be |
89 | * faster to continue with the binary search. |
90 | * It seems likely that we either have all unique items |
91 | * (where found will never become TRUE in the insertion sort) |
92 | * or potentially many duplicates. |
93 | */ |
94 | found=TRUE; |
95 | start=i+1; |
96 | } else if(diff<0) { |
97 | limit=i; |
98 | } else { |
99 | start=i; |
100 | } |
101 | } |
102 | |
103 | /* Linear search over the remaining tiny sub-array. */ |
104 | while(start<limit) { |
105 | int32_t diff=cmp(context, item, array+start*itemSize); |
106 | if(diff==0) { |
107 | found=TRUE; |
108 | } else if(diff<0) { |
109 | break; |
110 | } |
111 | ++start; |
112 | } |
113 | return found ? (start-1) : ~start; |
114 | } |
115 | |
116 | static void |
117 | doInsertionSort(char *array, int32_t length, int32_t itemSize, |
118 | UComparator *cmp, const void *context, void *pv) { |
119 | int32_t j; |
120 | |
121 | for(j=1; j<length; ++j) { |
122 | char *item=array+j*itemSize; |
123 | int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context); |
124 | if(insertionPoint<0) { |
125 | insertionPoint=~insertionPoint; |
126 | } else { |
127 | ++insertionPoint; /* one past the last equal item */ |
128 | } |
129 | if(insertionPoint<j) { |
130 | char *dest=array+insertionPoint*itemSize; |
131 | uprv_memcpy(pv, item, itemSize); /* v=array[j] */ |
132 | uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*(size_t)itemSize); |
133 | uprv_memcpy(dest, pv, itemSize); /* array[insertionPoint]=v */ |
134 | } |
135 | } |
136 | } |
137 | |
138 | static void |
139 | insertionSort(char *array, int32_t length, int32_t itemSize, |
140 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { |
141 | |
142 | icu::MaybeStackArray<max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE)> v; |
143 | if (sizeInMaxAlignTs(itemSize) > v.getCapacity() && |
144 | v.resize(sizeInMaxAlignTs(itemSize)) == nullptr) { |
145 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
146 | return; |
147 | } |
148 | |
149 | doInsertionSort(array, length, itemSize, cmp, context, v.getAlias()); |
150 | } |
151 | |
152 | /* QuickSort ---------------------------------------------------------------- */ |
153 | |
154 | /* |
155 | * This implementation is semi-recursive: |
156 | * It recurses for the smaller sub-array to shorten the recursion depth, |
157 | * and loops for the larger sub-array. |
158 | * |
159 | * Loosely after QuickSort algorithms in |
160 | * Niklaus Wirth |
161 | * Algorithmen und Datenstrukturen mit Modula-2 |
162 | * B.G. Teubner Stuttgart |
163 | * 4. Auflage 1986 |
164 | * ISBN 3-519-02260-5 |
165 | */ |
166 | static void |
167 | subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize, |
168 | UComparator *cmp, const void *context, |
169 | void *px, void *pw) { |
170 | int32_t left, right; |
171 | |
172 | /* start and left are inclusive, limit and right are exclusive */ |
173 | do { |
174 | if((start+MIN_QSORT)>=limit) { |
175 | doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px); |
176 | break; |
177 | } |
178 | |
179 | left=start; |
180 | right=limit; |
181 | |
182 | /* x=array[middle] */ |
183 | uprv_memcpy(px, array+(size_t)((start+limit)/2)*itemSize, itemSize); |
184 | |
185 | do { |
186 | while(/* array[left]<x */ |
187 | cmp(context, array+left*itemSize, px)<0 |
188 | ) { |
189 | ++left; |
190 | } |
191 | while(/* x<array[right-1] */ |
192 | cmp(context, px, array+(right-1)*itemSize)<0 |
193 | ) { |
194 | --right; |
195 | } |
196 | |
197 | /* swap array[left] and array[right-1] via w; ++left; --right */ |
198 | if(left<right) { |
199 | --right; |
200 | |
201 | if(left<right) { |
202 | uprv_memcpy(pw, array+(size_t)left*itemSize, itemSize); |
203 | uprv_memcpy(array+(size_t)left*itemSize, array+(size_t)right*itemSize, itemSize); |
204 | uprv_memcpy(array+(size_t)right*itemSize, pw, itemSize); |
205 | } |
206 | |
207 | ++left; |
208 | } |
209 | } while(left<right); |
210 | |
211 | /* sort sub-arrays */ |
212 | if((right-start)<(limit-left)) { |
213 | /* sort [start..right[ */ |
214 | if(start<(right-1)) { |
215 | subQuickSort(array, start, right, itemSize, cmp, context, px, pw); |
216 | } |
217 | |
218 | /* sort [left..limit[ */ |
219 | start=left; |
220 | } else { |
221 | /* sort [left..limit[ */ |
222 | if(left<(limit-1)) { |
223 | subQuickSort(array, left, limit, itemSize, cmp, context, px, pw); |
224 | } |
225 | |
226 | /* sort [start..right[ */ |
227 | limit=right; |
228 | } |
229 | } while(start<(limit-1)); |
230 | } |
231 | |
232 | static void |
233 | quickSort(char *array, int32_t length, int32_t itemSize, |
234 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { |
235 | /* allocate two intermediate item variables (x and w) */ |
236 | icu::MaybeStackArray<max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE) * 2> xw; |
237 | if(sizeInMaxAlignTs(itemSize)*2 > xw.getCapacity() && |
238 | xw.resize(sizeInMaxAlignTs(itemSize) * 2) == nullptr) { |
239 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
240 | return; |
241 | } |
242 | |
243 | subQuickSort(array, 0, length, itemSize, cmp, context, |
244 | xw.getAlias(), xw.getAlias() + sizeInMaxAlignTs(itemSize)); |
245 | } |
246 | |
247 | /* uprv_sortArray() API ----------------------------------------------------- */ |
248 | |
249 | /* |
250 | * Check arguments, select an appropriate implementation, |
251 | * cast the array to char * so that array+i*itemSize works. |
252 | */ |
253 | U_CAPI void U_EXPORT2 |
254 | uprv_sortArray(void *array, int32_t length, int32_t itemSize, |
255 | UComparator *cmp, const void *context, |
256 | UBool sortStable, UErrorCode *pErrorCode) { |
257 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
258 | return; |
259 | } |
260 | if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) { |
261 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
262 | return; |
263 | } |
264 | |
265 | if(length<=1) { |
266 | return; |
267 | } else if(length<MIN_QSORT || sortStable) { |
268 | insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode); |
269 | } else { |
270 | quickSort((char *)array, length, itemSize, cmp, context, pErrorCode); |
271 | } |
272 | } |
273 | |