| 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ******************************************************************************* |
| 5 | * |
| 6 | * Copyright (C) 2003-2013, International Business Machines |
| 7 | * Corporation and others. All Rights Reserved. |
| 8 | * |
| 9 | ******************************************************************************* |
| 10 | * file name: uarrsort.c |
| 11 | * encoding: UTF-8 |
| 12 | * tab size: 8 (not used) |
| 13 | * indentation:4 |
| 14 | * |
| 15 | * created on: 2003aug04 |
| 16 | * created by: Markus W. Scherer |
| 17 | * |
| 18 | * Internal function for sorting arrays. |
| 19 | */ |
| 20 | |
| 21 | #include <cstddef> |
| 22 | |
| 23 | #include "unicode/utypes.h" |
| 24 | #include "cmemory.h" |
| 25 | #include "uarrsort.h" |
| 26 | |
| 27 | enum { |
| 28 | /** |
| 29 | * "from Knuth" |
| 30 | * |
| 31 | * A binary search over 8 items performs 4 comparisons: |
| 32 | * log2(8)=3 to subdivide, +1 to check for equality. |
| 33 | * A linear search over 8 items on average also performs 4 comparisons. |
| 34 | */ |
| 35 | MIN_QSORT=9, |
| 36 | STACK_ITEM_SIZE=200 |
| 37 | }; |
| 38 | |
| 39 | static constexpr int32_t sizeInMaxAlignTs(int32_t sizeInBytes) { |
| 40 | return (sizeInBytes + sizeof(std::max_align_t) - 1) / sizeof(std::max_align_t); |
| 41 | } |
| 42 | |
| 43 | /* UComparator convenience implementations ---------------------------------- */ |
| 44 | |
| 45 | U_CAPI int32_t U_EXPORT2 |
| 46 | uprv_uint16Comparator(const void *context, const void *left, const void *right) { |
| 47 | (void)context; |
| 48 | return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right; |
| 49 | } |
| 50 | |
| 51 | U_CAPI int32_t U_EXPORT2 |
| 52 | uprv_int32Comparator(const void *context, const void *left, const void *right) { |
| 53 | (void)context; |
| 54 | return *(const int32_t *)left - *(const int32_t *)right; |
| 55 | } |
| 56 | |
| 57 | U_CAPI int32_t U_EXPORT2 |
| 58 | uprv_uint32Comparator(const void *context, const void *left, const void *right) { |
| 59 | (void)context; |
| 60 | uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right; |
| 61 | |
| 62 | /* compare directly because (l-r) would overflow the int32_t result */ |
| 63 | if(l<r) { |
| 64 | return -1; |
| 65 | } else if(l==r) { |
| 66 | return 0; |
| 67 | } else /* l>r */ { |
| 68 | return 1; |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | /* Insertion sort using binary search --------------------------------------- */ |
| 73 | |
| 74 | U_CAPI int32_t U_EXPORT2 |
| 75 | uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize, |
| 76 | UComparator *cmp, const void *context) { |
| 77 | int32_t start=0; |
| 78 | UBool found=false; |
| 79 | |
| 80 | /* Binary search until we get down to a tiny sub-array. */ |
| 81 | while((limit-start)>=MIN_QSORT) { |
| 82 | int32_t i=(start+limit)/2; |
| 83 | int32_t diff=cmp(context, item, array+i*itemSize); |
| 84 | if(diff==0) { |
| 85 | /* |
| 86 | * Found the item. We look for the *last* occurrence of such |
| 87 | * an item, for stable sorting. |
| 88 | * If we knew that there will be only few equal items, |
| 89 | * we could break now and enter the linear search. |
| 90 | * However, if there are many equal items, then it should be |
| 91 | * faster to continue with the binary search. |
| 92 | * It seems likely that we either have all unique items |
| 93 | * (where found will never become true in the insertion sort) |
| 94 | * or potentially many duplicates. |
| 95 | */ |
| 96 | found=true; |
| 97 | start=i+1; |
| 98 | } else if(diff<0) { |
| 99 | limit=i; |
| 100 | } else { |
| 101 | start=i; |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | /* Linear search over the remaining tiny sub-array. */ |
| 106 | while(start<limit) { |
| 107 | int32_t diff=cmp(context, item, array+start*itemSize); |
| 108 | if(diff==0) { |
| 109 | found=true; |
| 110 | } else if(diff<0) { |
| 111 | break; |
| 112 | } |
| 113 | ++start; |
| 114 | } |
| 115 | return found ? (start-1) : ~start; |
| 116 | } |
| 117 | |
| 118 | static void |
| 119 | doInsertionSort(char *array, int32_t length, int32_t itemSize, |
| 120 | UComparator *cmp, const void *context, void *pv) { |
| 121 | int32_t j; |
| 122 | |
| 123 | for(j=1; j<length; ++j) { |
| 124 | char *item=array+j*itemSize; |
| 125 | int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context); |
| 126 | if(insertionPoint<0) { |
| 127 | insertionPoint=~insertionPoint; |
| 128 | } else { |
| 129 | ++insertionPoint; /* one past the last equal item */ |
| 130 | } |
| 131 | if(insertionPoint<j) { |
| 132 | char *dest=array+insertionPoint*itemSize; |
| 133 | uprv_memcpy(pv, item, itemSize); /* v=array[j] */ |
| 134 | uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*(size_t)itemSize); |
| 135 | uprv_memcpy(dest, pv, itemSize); /* array[insertionPoint]=v */ |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | static void |
| 141 | insertionSort(char *array, int32_t length, int32_t itemSize, |
| 142 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { |
| 143 | |
| 144 | icu::MaybeStackArray<std::max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE)> v; |
| 145 | if (sizeInMaxAlignTs(itemSize) > v.getCapacity() && |
| 146 | v.resize(sizeInMaxAlignTs(itemSize)) == nullptr) { |
| 147 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
| 148 | return; |
| 149 | } |
| 150 | |
| 151 | doInsertionSort(array, length, itemSize, cmp, context, v.getAlias()); |
| 152 | } |
| 153 | |
| 154 | /* QuickSort ---------------------------------------------------------------- */ |
| 155 | |
| 156 | /* |
| 157 | * This implementation is semi-recursive: |
| 158 | * It recurses for the smaller sub-array to shorten the recursion depth, |
| 159 | * and loops for the larger sub-array. |
| 160 | * |
| 161 | * Loosely after QuickSort algorithms in |
| 162 | * Niklaus Wirth |
| 163 | * Algorithmen und Datenstrukturen mit Modula-2 |
| 164 | * B.G. Teubner Stuttgart |
| 165 | * 4. Auflage 1986 |
| 166 | * ISBN 3-519-02260-5 |
| 167 | */ |
| 168 | static void |
| 169 | subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize, |
| 170 | UComparator *cmp, const void *context, |
| 171 | void *px, void *pw) { |
| 172 | int32_t left, right; |
| 173 | |
| 174 | /* start and left are inclusive, limit and right are exclusive */ |
| 175 | do { |
| 176 | if((start+MIN_QSORT)>=limit) { |
| 177 | doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px); |
| 178 | break; |
| 179 | } |
| 180 | |
| 181 | left=start; |
| 182 | right=limit; |
| 183 | |
| 184 | /* x=array[middle] */ |
| 185 | uprv_memcpy(px, array+(size_t)((start+limit)/2)*itemSize, itemSize); |
| 186 | |
| 187 | do { |
| 188 | while(/* array[left]<x */ |
| 189 | cmp(context, array+left*itemSize, px)<0 |
| 190 | ) { |
| 191 | ++left; |
| 192 | } |
| 193 | while(/* x<array[right-1] */ |
| 194 | cmp(context, px, array+(right-1)*itemSize)<0 |
| 195 | ) { |
| 196 | --right; |
| 197 | } |
| 198 | |
| 199 | /* swap array[left] and array[right-1] via w; ++left; --right */ |
| 200 | if(left<right) { |
| 201 | --right; |
| 202 | |
| 203 | if(left<right) { |
| 204 | uprv_memcpy(pw, array+(size_t)left*itemSize, itemSize); |
| 205 | uprv_memcpy(array+(size_t)left*itemSize, array+(size_t)right*itemSize, itemSize); |
| 206 | uprv_memcpy(array+(size_t)right*itemSize, pw, itemSize); |
| 207 | } |
| 208 | |
| 209 | ++left; |
| 210 | } |
| 211 | } while(left<right); |
| 212 | |
| 213 | /* sort sub-arrays */ |
| 214 | if((right-start)<(limit-left)) { |
| 215 | /* sort [start..right[ */ |
| 216 | if(start<(right-1)) { |
| 217 | subQuickSort(array, start, right, itemSize, cmp, context, px, pw); |
| 218 | } |
| 219 | |
| 220 | /* sort [left..limit[ */ |
| 221 | start=left; |
| 222 | } else { |
| 223 | /* sort [left..limit[ */ |
| 224 | if(left<(limit-1)) { |
| 225 | subQuickSort(array, left, limit, itemSize, cmp, context, px, pw); |
| 226 | } |
| 227 | |
| 228 | /* sort [start..right[ */ |
| 229 | limit=right; |
| 230 | } |
| 231 | } while(start<(limit-1)); |
| 232 | } |
| 233 | |
| 234 | static void |
| 235 | quickSort(char *array, int32_t length, int32_t itemSize, |
| 236 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { |
| 237 | /* allocate two intermediate item variables (x and w) */ |
| 238 | icu::MaybeStackArray<std::max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE) * 2> xw; |
| 239 | if(sizeInMaxAlignTs(itemSize)*2 > xw.getCapacity() && |
| 240 | xw.resize(sizeInMaxAlignTs(itemSize) * 2) == nullptr) { |
| 241 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 242 | return; |
| 243 | } |
| 244 | |
| 245 | subQuickSort(array, 0, length, itemSize, cmp, context, |
| 246 | xw.getAlias(), xw.getAlias() + sizeInMaxAlignTs(itemSize)); |
| 247 | } |
| 248 | |
| 249 | /* uprv_sortArray() API ----------------------------------------------------- */ |
| 250 | |
| 251 | /* |
| 252 | * Check arguments, select an appropriate implementation, |
| 253 | * cast the array to char * so that array+i*itemSize works. |
| 254 | */ |
| 255 | U_CAPI void U_EXPORT2 |
| 256 | uprv_sortArray(void *array, int32_t length, int32_t itemSize, |
| 257 | UComparator *cmp, const void *context, |
| 258 | UBool sortStable, UErrorCode *pErrorCode) { |
| 259 | if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { |
| 260 | return; |
| 261 | } |
| 262 | if((length>0 && array==nullptr) || length<0 || itemSize<=0 || cmp==nullptr) { |
| 263 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 264 | return; |
| 265 | } |
| 266 | |
| 267 | if(length<=1) { |
| 268 | return; |
| 269 | } else if(length<MIN_QSORT || sortStable) { |
| 270 | insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode); |
| 271 | } else { |
| 272 | quickSort((char *)array, length, itemSize, cmp, context, pErrorCode); |
| 273 | } |
| 274 | } |
| 275 | |