| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ******************************************************************************* | 
|---|
| 5 | * | 
|---|
| 6 | *   Copyright (C) 2003-2013, International Business Machines | 
|---|
| 7 | *   Corporation and others.  All Rights Reserved. | 
|---|
| 8 | * | 
|---|
| 9 | ******************************************************************************* | 
|---|
| 10 | *   file name:  uarrsort.c | 
|---|
| 11 | *   encoding:   UTF-8 | 
|---|
| 12 | *   tab size:   8 (not used) | 
|---|
| 13 | *   indentation:4 | 
|---|
| 14 | * | 
|---|
| 15 | *   created on: 2003aug04 | 
|---|
| 16 | *   created by: Markus W. Scherer | 
|---|
| 17 | * | 
|---|
| 18 | *   Internal function for sorting arrays. | 
|---|
| 19 | */ | 
|---|
| 20 |  | 
|---|
| 21 | #include "unicode/utypes.h" | 
|---|
| 22 | #include "cmemory.h" | 
|---|
| 23 | #include "uarrsort.h" | 
|---|
| 24 |  | 
|---|
| 25 | enum { | 
|---|
| 26 | /** | 
|---|
| 27 | * "from Knuth" | 
|---|
| 28 | * | 
|---|
| 29 | * A binary search over 8 items performs 4 comparisons: | 
|---|
| 30 | * log2(8)=3 to subdivide, +1 to check for equality. | 
|---|
| 31 | * A linear search over 8 items on average also performs 4 comparisons. | 
|---|
| 32 | */ | 
|---|
| 33 | MIN_QSORT=9, | 
|---|
| 34 | STACK_ITEM_SIZE=200 | 
|---|
| 35 | }; | 
|---|
| 36 |  | 
|---|
| 37 | static constexpr int32_t sizeInMaxAlignTs(int32_t sizeInBytes) { | 
|---|
| 38 | return (sizeInBytes + sizeof(max_align_t) - 1) / sizeof(max_align_t); | 
|---|
| 39 | } | 
|---|
| 40 |  | 
|---|
| 41 | /* UComparator convenience implementations ---------------------------------- */ | 
|---|
| 42 |  | 
|---|
| 43 | U_CAPI int32_t U_EXPORT2 | 
|---|
| 44 | uprv_uint16Comparator(const void *context, const void *left, const void *right) { | 
|---|
| 45 | (void)context; | 
|---|
| 46 | return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right; | 
|---|
| 47 | } | 
|---|
| 48 |  | 
|---|
| 49 | U_CAPI int32_t U_EXPORT2 | 
|---|
| 50 | uprv_int32Comparator(const void *context, const void *left, const void *right) { | 
|---|
| 51 | (void)context; | 
|---|
| 52 | return *(const int32_t *)left - *(const int32_t *)right; | 
|---|
| 53 | } | 
|---|
| 54 |  | 
|---|
| 55 | U_CAPI int32_t U_EXPORT2 | 
|---|
| 56 | uprv_uint32Comparator(const void *context, const void *left, const void *right) { | 
|---|
| 57 | (void)context; | 
|---|
| 58 | uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right; | 
|---|
| 59 |  | 
|---|
| 60 | /* compare directly because (l-r) would overflow the int32_t result */ | 
|---|
| 61 | if(l<r) { | 
|---|
| 62 | return -1; | 
|---|
| 63 | } else if(l==r) { | 
|---|
| 64 | return 0; | 
|---|
| 65 | } else /* l>r */ { | 
|---|
| 66 | return 1; | 
|---|
| 67 | } | 
|---|
| 68 | } | 
|---|
| 69 |  | 
|---|
| 70 | /* Insertion sort using binary search --------------------------------------- */ | 
|---|
| 71 |  | 
|---|
| 72 | U_CAPI int32_t U_EXPORT2 | 
|---|
| 73 | uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize, | 
|---|
| 74 | UComparator *cmp, const void *context) { | 
|---|
| 75 | int32_t start=0; | 
|---|
| 76 | UBool found=FALSE; | 
|---|
| 77 |  | 
|---|
| 78 | /* Binary search until we get down to a tiny sub-array. */ | 
|---|
| 79 | while((limit-start)>=MIN_QSORT) { | 
|---|
| 80 | int32_t i=(start+limit)/2; | 
|---|
| 81 | int32_t diff=cmp(context, item, array+i*itemSize); | 
|---|
| 82 | if(diff==0) { | 
|---|
| 83 | /* | 
|---|
| 84 | * Found the item. We look for the *last* occurrence of such | 
|---|
| 85 | * an item, for stable sorting. | 
|---|
| 86 | * If we knew that there will be only few equal items, | 
|---|
| 87 | * we could break now and enter the linear search. | 
|---|
| 88 | * However, if there are many equal items, then it should be | 
|---|
| 89 | * faster to continue with the binary search. | 
|---|
| 90 | * It seems likely that we either have all unique items | 
|---|
| 91 | * (where found will never become TRUE in the insertion sort) | 
|---|
| 92 | * or potentially many duplicates. | 
|---|
| 93 | */ | 
|---|
| 94 | found=TRUE; | 
|---|
| 95 | start=i+1; | 
|---|
| 96 | } else if(diff<0) { | 
|---|
| 97 | limit=i; | 
|---|
| 98 | } else { | 
|---|
| 99 | start=i; | 
|---|
| 100 | } | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|
| 103 | /* Linear search over the remaining tiny sub-array. */ | 
|---|
| 104 | while(start<limit) { | 
|---|
| 105 | int32_t diff=cmp(context, item, array+start*itemSize); | 
|---|
| 106 | if(diff==0) { | 
|---|
| 107 | found=TRUE; | 
|---|
| 108 | } else if(diff<0) { | 
|---|
| 109 | break; | 
|---|
| 110 | } | 
|---|
| 111 | ++start; | 
|---|
| 112 | } | 
|---|
| 113 | return found ? (start-1) : ~start; | 
|---|
| 114 | } | 
|---|
| 115 |  | 
|---|
| 116 | static void | 
|---|
| 117 | doInsertionSort(char *array, int32_t length, int32_t itemSize, | 
|---|
| 118 | UComparator *cmp, const void *context, void *pv) { | 
|---|
| 119 | int32_t j; | 
|---|
| 120 |  | 
|---|
| 121 | for(j=1; j<length; ++j) { | 
|---|
| 122 | char *item=array+j*itemSize; | 
|---|
| 123 | int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context); | 
|---|
| 124 | if(insertionPoint<0) { | 
|---|
| 125 | insertionPoint=~insertionPoint; | 
|---|
| 126 | } else { | 
|---|
| 127 | ++insertionPoint;  /* one past the last equal item */ | 
|---|
| 128 | } | 
|---|
| 129 | if(insertionPoint<j) { | 
|---|
| 130 | char *dest=array+insertionPoint*itemSize; | 
|---|
| 131 | uprv_memcpy(pv, item, itemSize);  /* v=array[j] */ | 
|---|
| 132 | uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*(size_t)itemSize); | 
|---|
| 133 | uprv_memcpy(dest, pv, itemSize);  /* array[insertionPoint]=v */ | 
|---|
| 134 | } | 
|---|
| 135 | } | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | static void | 
|---|
| 139 | insertionSort(char *array, int32_t length, int32_t itemSize, | 
|---|
| 140 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { | 
|---|
| 141 |  | 
|---|
| 142 | icu::MaybeStackArray<max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE)> v; | 
|---|
| 143 | if (sizeInMaxAlignTs(itemSize) > v.getCapacity() && | 
|---|
| 144 | v.resize(sizeInMaxAlignTs(itemSize)) == nullptr) { | 
|---|
| 145 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; | 
|---|
| 146 | return; | 
|---|
| 147 | } | 
|---|
| 148 |  | 
|---|
| 149 | doInsertionSort(array, length, itemSize, cmp, context, v.getAlias()); | 
|---|
| 150 | } | 
|---|
| 151 |  | 
|---|
| 152 | /* QuickSort ---------------------------------------------------------------- */ | 
|---|
| 153 |  | 
|---|
| 154 | /* | 
|---|
| 155 | * This implementation is semi-recursive: | 
|---|
| 156 | * It recurses for the smaller sub-array to shorten the recursion depth, | 
|---|
| 157 | * and loops for the larger sub-array. | 
|---|
| 158 | * | 
|---|
| 159 | * Loosely after QuickSort algorithms in | 
|---|
| 160 | * Niklaus Wirth | 
|---|
| 161 | * Algorithmen und Datenstrukturen mit Modula-2 | 
|---|
| 162 | * B.G. Teubner Stuttgart | 
|---|
| 163 | * 4. Auflage 1986 | 
|---|
| 164 | * ISBN 3-519-02260-5 | 
|---|
| 165 | */ | 
|---|
| 166 | static void | 
|---|
| 167 | subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize, | 
|---|
| 168 | UComparator *cmp, const void *context, | 
|---|
| 169 | void *px, void *pw) { | 
|---|
| 170 | int32_t left, right; | 
|---|
| 171 |  | 
|---|
| 172 | /* start and left are inclusive, limit and right are exclusive */ | 
|---|
| 173 | do { | 
|---|
| 174 | if((start+MIN_QSORT)>=limit) { | 
|---|
| 175 | doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px); | 
|---|
| 176 | break; | 
|---|
| 177 | } | 
|---|
| 178 |  | 
|---|
| 179 | left=start; | 
|---|
| 180 | right=limit; | 
|---|
| 181 |  | 
|---|
| 182 | /* x=array[middle] */ | 
|---|
| 183 | uprv_memcpy(px, array+(size_t)((start+limit)/2)*itemSize, itemSize); | 
|---|
| 184 |  | 
|---|
| 185 | do { | 
|---|
| 186 | while(/* array[left]<x */ | 
|---|
| 187 | cmp(context, array+left*itemSize, px)<0 | 
|---|
| 188 | ) { | 
|---|
| 189 | ++left; | 
|---|
| 190 | } | 
|---|
| 191 | while(/* x<array[right-1] */ | 
|---|
| 192 | cmp(context, px, array+(right-1)*itemSize)<0 | 
|---|
| 193 | ) { | 
|---|
| 194 | --right; | 
|---|
| 195 | } | 
|---|
| 196 |  | 
|---|
| 197 | /* swap array[left] and array[right-1] via w; ++left; --right */ | 
|---|
| 198 | if(left<right) { | 
|---|
| 199 | --right; | 
|---|
| 200 |  | 
|---|
| 201 | if(left<right) { | 
|---|
| 202 | uprv_memcpy(pw, array+(size_t)left*itemSize, itemSize); | 
|---|
| 203 | uprv_memcpy(array+(size_t)left*itemSize, array+(size_t)right*itemSize, itemSize); | 
|---|
| 204 | uprv_memcpy(array+(size_t)right*itemSize, pw, itemSize); | 
|---|
| 205 | } | 
|---|
| 206 |  | 
|---|
| 207 | ++left; | 
|---|
| 208 | } | 
|---|
| 209 | } while(left<right); | 
|---|
| 210 |  | 
|---|
| 211 | /* sort sub-arrays */ | 
|---|
| 212 | if((right-start)<(limit-left)) { | 
|---|
| 213 | /* sort [start..right[ */ | 
|---|
| 214 | if(start<(right-1)) { | 
|---|
| 215 | subQuickSort(array, start, right, itemSize, cmp, context, px, pw); | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | /* sort [left..limit[ */ | 
|---|
| 219 | start=left; | 
|---|
| 220 | } else { | 
|---|
| 221 | /* sort [left..limit[ */ | 
|---|
| 222 | if(left<(limit-1)) { | 
|---|
| 223 | subQuickSort(array, left, limit, itemSize, cmp, context, px, pw); | 
|---|
| 224 | } | 
|---|
| 225 |  | 
|---|
| 226 | /* sort [start..right[ */ | 
|---|
| 227 | limit=right; | 
|---|
| 228 | } | 
|---|
| 229 | } while(start<(limit-1)); | 
|---|
| 230 | } | 
|---|
| 231 |  | 
|---|
| 232 | static void | 
|---|
| 233 | quickSort(char *array, int32_t length, int32_t itemSize, | 
|---|
| 234 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { | 
|---|
| 235 | /* allocate two intermediate item variables (x and w) */ | 
|---|
| 236 | icu::MaybeStackArray<max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE) * 2> xw; | 
|---|
| 237 | if(sizeInMaxAlignTs(itemSize)*2 > xw.getCapacity() && | 
|---|
| 238 | xw.resize(sizeInMaxAlignTs(itemSize) * 2) == nullptr) { | 
|---|
| 239 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | 
|---|
| 240 | return; | 
|---|
| 241 | } | 
|---|
| 242 |  | 
|---|
| 243 | subQuickSort(array, 0, length, itemSize, cmp, context, | 
|---|
| 244 | xw.getAlias(), xw.getAlias() + sizeInMaxAlignTs(itemSize)); | 
|---|
| 245 | } | 
|---|
| 246 |  | 
|---|
| 247 | /* uprv_sortArray() API ----------------------------------------------------- */ | 
|---|
| 248 |  | 
|---|
| 249 | /* | 
|---|
| 250 | * Check arguments, select an appropriate implementation, | 
|---|
| 251 | * cast the array to char * so that array+i*itemSize works. | 
|---|
| 252 | */ | 
|---|
| 253 | U_CAPI void U_EXPORT2 | 
|---|
| 254 | uprv_sortArray(void *array, int32_t length, int32_t itemSize, | 
|---|
| 255 | UComparator *cmp, const void *context, | 
|---|
| 256 | UBool sortStable, UErrorCode *pErrorCode) { | 
|---|
| 257 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | 
|---|
| 258 | return; | 
|---|
| 259 | } | 
|---|
| 260 | if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) { | 
|---|
| 261 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
|---|
| 262 | return; | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 | if(length<=1) { | 
|---|
| 266 | return; | 
|---|
| 267 | } else if(length<MIN_QSORT || sortStable) { | 
|---|
| 268 | insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode); | 
|---|
| 269 | } else { | 
|---|
| 270 | quickSort((char *)array, length, itemSize, cmp, context, pErrorCode); | 
|---|
| 271 | } | 
|---|
| 272 | } | 
|---|
| 273 |  | 
|---|