| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ******************************************************************************* | 
|---|
| 5 | * | 
|---|
| 6 | *   Copyright (C) 2002-2011, International Business Machines | 
|---|
| 7 | *   Corporation and others.  All Rights Reserved. | 
|---|
| 8 | * | 
|---|
| 9 | ******************************************************************************* | 
|---|
| 10 | *   file name:  propsvec.c | 
|---|
| 11 | *   encoding:   UTF-8 | 
|---|
| 12 | *   tab size:   8 (not used) | 
|---|
| 13 | *   indentation:4 | 
|---|
| 14 | * | 
|---|
| 15 | *   created on: 2002feb22 | 
|---|
| 16 | *   created by: Markus W. Scherer | 
|---|
| 17 | * | 
|---|
| 18 | *   Store bits (Unicode character properties) in bit set vectors. | 
|---|
| 19 | */ | 
|---|
| 20 |  | 
|---|
| 21 | #include <stdlib.h> | 
|---|
| 22 | #include "unicode/utypes.h" | 
|---|
| 23 | #include "cmemory.h" | 
|---|
| 24 | #include "utrie.h" | 
|---|
| 25 | #include "utrie2.h" | 
|---|
| 26 | #include "uarrsort.h" | 
|---|
| 27 | #include "propsvec.h" | 
|---|
| 28 | #include "uassert.h" | 
|---|
| 29 |  | 
|---|
| 30 | struct UPropsVectors { | 
|---|
| 31 | uint32_t *v; | 
|---|
| 32 | int32_t columns;  /* number of columns, plus two for start & limit values */ | 
|---|
| 33 | int32_t maxRows; | 
|---|
| 34 | int32_t rows; | 
|---|
| 35 | int32_t prevRow;  /* search optimization: remember last row seen */ | 
|---|
| 36 | UBool isCompacted; | 
|---|
| 37 | }; | 
|---|
| 38 |  | 
|---|
| 39 | #define UPVEC_INITIAL_ROWS (1<<12) | 
|---|
| 40 | #define UPVEC_MEDIUM_ROWS ((int32_t)1<<16) | 
|---|
| 41 | #define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) | 
|---|
| 42 |  | 
|---|
| 43 | U_CAPI UPropsVectors * U_EXPORT2 | 
|---|
| 44 | upvec_open(int32_t columns, UErrorCode *pErrorCode) { | 
|---|
| 45 | UPropsVectors *pv; | 
|---|
| 46 | uint32_t *v, *row; | 
|---|
| 47 | uint32_t cp; | 
|---|
| 48 |  | 
|---|
| 49 | if(U_FAILURE(*pErrorCode)) { | 
|---|
| 50 | return nullptr; | 
|---|
| 51 | } | 
|---|
| 52 | if(columns<1) { | 
|---|
| 53 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
|---|
| 54 | return nullptr; | 
|---|
| 55 | } | 
|---|
| 56 | columns+=2; /* count range start and limit columns */ | 
|---|
| 57 |  | 
|---|
| 58 | pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors)); | 
|---|
| 59 | v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4); | 
|---|
| 60 | if(pv==nullptr || v==nullptr) { | 
|---|
| 61 | uprv_free(pv); | 
|---|
| 62 | uprv_free(v); | 
|---|
| 63 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | 
|---|
| 64 | return nullptr; | 
|---|
| 65 | } | 
|---|
| 66 | uprv_memset(pv, 0, sizeof(UPropsVectors)); | 
|---|
| 67 | pv->v=v; | 
|---|
| 68 | pv->columns=columns; | 
|---|
| 69 | pv->maxRows=UPVEC_INITIAL_ROWS; | 
|---|
| 70 | pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); | 
|---|
| 71 |  | 
|---|
| 72 | /* set the all-Unicode row and the special-value rows */ | 
|---|
| 73 | row=pv->v; | 
|---|
| 74 | uprv_memset(row, 0, pv->rows*columns*4); | 
|---|
| 75 | row[0]=0; | 
|---|
| 76 | row[1]=0x110000; | 
|---|
| 77 | row+=columns; | 
|---|
| 78 | for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { | 
|---|
| 79 | row[0]=cp; | 
|---|
| 80 | row[1]=cp+1; | 
|---|
| 81 | row+=columns; | 
|---|
| 82 | } | 
|---|
| 83 | return pv; | 
|---|
| 84 | } | 
|---|
| 85 |  | 
|---|
| 86 | U_CAPI void U_EXPORT2 | 
|---|
| 87 | upvec_close(UPropsVectors *pv) { | 
|---|
| 88 | if(pv!=nullptr) { | 
|---|
| 89 | uprv_free(pv->v); | 
|---|
| 90 | uprv_free(pv); | 
|---|
| 91 | } | 
|---|
| 92 | } | 
|---|
| 93 |  | 
|---|
| 94 | static uint32_t * | 
|---|
| 95 | _findRow(UPropsVectors *pv, UChar32 rangeStart) { | 
|---|
| 96 | uint32_t *row; | 
|---|
| 97 | int32_t columns, i, start, limit, prevRow; | 
|---|
| 98 |  | 
|---|
| 99 | columns=pv->columns; | 
|---|
| 100 | limit=pv->rows; | 
|---|
| 101 | prevRow=pv->prevRow; | 
|---|
| 102 |  | 
|---|
| 103 | /* check the vicinity of the last-seen row (start searching with an unrolled loop) */ | 
|---|
| 104 | row=pv->v+prevRow*columns; | 
|---|
| 105 | if(rangeStart>=(UChar32)row[0]) { | 
|---|
| 106 | if(rangeStart<(UChar32)row[1]) { | 
|---|
| 107 | /* same row as last seen */ | 
|---|
| 108 | return row; | 
|---|
| 109 | } else if(rangeStart<(UChar32)(row+=columns)[1]) { | 
|---|
| 110 | /* next row after the last one */ | 
|---|
| 111 | pv->prevRow=prevRow+1; | 
|---|
| 112 | return row; | 
|---|
| 113 | } else if(rangeStart<(UChar32)(row+=columns)[1]) { | 
|---|
| 114 | /* second row after the last one */ | 
|---|
| 115 | pv->prevRow=prevRow+2; | 
|---|
| 116 | return row; | 
|---|
| 117 | } else if((rangeStart-(UChar32)row[1])<10) { | 
|---|
| 118 | /* we are close, continue looping */ | 
|---|
| 119 | prevRow+=2; | 
|---|
| 120 | do { | 
|---|
| 121 | ++prevRow; | 
|---|
| 122 | row+=columns; | 
|---|
| 123 | } while(rangeStart>=(UChar32)row[1]); | 
|---|
| 124 | pv->prevRow=prevRow; | 
|---|
| 125 | return row; | 
|---|
| 126 | } | 
|---|
| 127 | } else if(rangeStart<(UChar32)pv->v[1]) { | 
|---|
| 128 | /* the very first row */ | 
|---|
| 129 | pv->prevRow=0; | 
|---|
| 130 | return pv->v; | 
|---|
| 131 | } | 
|---|
| 132 |  | 
|---|
| 133 | /* do a binary search for the start of the range */ | 
|---|
| 134 | start=0; | 
|---|
| 135 | while(start<limit-1) { | 
|---|
| 136 | i=(start+limit)/2; | 
|---|
| 137 | row=pv->v+i*columns; | 
|---|
| 138 | if(rangeStart<(UChar32)row[0]) { | 
|---|
| 139 | limit=i; | 
|---|
| 140 | } else if(rangeStart<(UChar32)row[1]) { | 
|---|
| 141 | pv->prevRow=i; | 
|---|
| 142 | return row; | 
|---|
| 143 | } else { | 
|---|
| 144 | start=i; | 
|---|
| 145 | } | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|
| 148 | /* must be found because all ranges together always cover all of Unicode */ | 
|---|
| 149 | pv->prevRow=start; | 
|---|
| 150 | return pv->v+start*columns; | 
|---|
| 151 | } | 
|---|
| 152 |  | 
|---|
| 153 | U_CAPI void U_EXPORT2 | 
|---|
| 154 | upvec_setValue(UPropsVectors *pv, | 
|---|
| 155 | UChar32 start, UChar32 end, | 
|---|
| 156 | int32_t column, | 
|---|
| 157 | uint32_t value, uint32_t mask, | 
|---|
| 158 | UErrorCode *pErrorCode) { | 
|---|
| 159 | uint32_t *firstRow, *lastRow; | 
|---|
| 160 | int32_t columns; | 
|---|
| 161 | UChar32 limit; | 
|---|
| 162 | UBool splitFirstRow, splitLastRow; | 
|---|
| 163 |  | 
|---|
| 164 | /* argument checking */ | 
|---|
| 165 | if(U_FAILURE(*pErrorCode)) { | 
|---|
| 166 | return; | 
|---|
| 167 | } | 
|---|
| 168 | if( pv==nullptr || | 
|---|
| 169 | start<0 || start>end || end>UPVEC_MAX_CP || | 
|---|
| 170 | column<0 || column>=(pv->columns-2) | 
|---|
| 171 | ) { | 
|---|
| 172 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
|---|
| 173 | return; | 
|---|
| 174 | } | 
|---|
| 175 | if(pv->isCompacted) { | 
|---|
| 176 | *pErrorCode=U_NO_WRITE_PERMISSION; | 
|---|
| 177 | return; | 
|---|
| 178 | } | 
|---|
| 179 | limit=end+1; | 
|---|
| 180 |  | 
|---|
| 181 | /* initialize */ | 
|---|
| 182 | columns=pv->columns; | 
|---|
| 183 | column+=2; /* skip range start and limit columns */ | 
|---|
| 184 | value&=mask; | 
|---|
| 185 |  | 
|---|
| 186 | /* find the rows whose ranges overlap with the input range */ | 
|---|
| 187 |  | 
|---|
| 188 | /* find the first and last rows, always successful */ | 
|---|
| 189 | firstRow=_findRow(pv, start); | 
|---|
| 190 | lastRow=_findRow(pv, end); | 
|---|
| 191 |  | 
|---|
| 192 | /* | 
|---|
| 193 | * Rows need to be split if they partially overlap with the | 
|---|
| 194 | * input range (only possible for the first and last rows) | 
|---|
| 195 | * and if their value differs from the input value. | 
|---|
| 196 | */ | 
|---|
| 197 | splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask)); | 
|---|
| 198 | splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask)); | 
|---|
| 199 |  | 
|---|
| 200 | /* split first/last rows if necessary */ | 
|---|
| 201 | if(splitFirstRow || splitLastRow) { | 
|---|
| 202 | int32_t count, rows; | 
|---|
| 203 |  | 
|---|
| 204 | rows=pv->rows; | 
|---|
| 205 | if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { | 
|---|
| 206 | uint32_t *newVectors; | 
|---|
| 207 | int32_t newMaxRows; | 
|---|
| 208 |  | 
|---|
| 209 | if(pv->maxRows<UPVEC_MEDIUM_ROWS) { | 
|---|
| 210 | newMaxRows=UPVEC_MEDIUM_ROWS; | 
|---|
| 211 | } else if(pv->maxRows<UPVEC_MAX_ROWS) { | 
|---|
| 212 | newMaxRows=UPVEC_MAX_ROWS; | 
|---|
| 213 | } else { | 
|---|
| 214 | /* Implementation bug, or UPVEC_MAX_ROWS too low. */ | 
|---|
| 215 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; | 
|---|
| 216 | return; | 
|---|
| 217 | } | 
|---|
| 218 | newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4); | 
|---|
| 219 | if(newVectors==nullptr) { | 
|---|
| 220 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | 
|---|
| 221 | return; | 
|---|
| 222 | } | 
|---|
| 223 | uprv_memcpy(newVectors, pv->v, (size_t)rows*columns*4); | 
|---|
| 224 | firstRow=newVectors+(firstRow-pv->v); | 
|---|
| 225 | lastRow=newVectors+(lastRow-pv->v); | 
|---|
| 226 | uprv_free(pv->v); | 
|---|
| 227 | pv->v=newVectors; | 
|---|
| 228 | pv->maxRows=newMaxRows; | 
|---|
| 229 | } | 
|---|
| 230 |  | 
|---|
| 231 | /* count the number of row cells to move after the last row, and move them */ | 
|---|
| 232 | count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); | 
|---|
| 233 | if(count>0) { | 
|---|
| 234 | uprv_memmove( | 
|---|
| 235 | lastRow+(1+splitFirstRow+splitLastRow)*columns, | 
|---|
| 236 | lastRow+columns, | 
|---|
| 237 | count*4); | 
|---|
| 238 | } | 
|---|
| 239 | pv->rows=rows+splitFirstRow+splitLastRow; | 
|---|
| 240 |  | 
|---|
| 241 | /* split the first row, and move the firstRow pointer to the second part */ | 
|---|
| 242 | if(splitFirstRow) { | 
|---|
| 243 | /* copy all affected rows up one and move the lastRow pointer */ | 
|---|
| 244 | count = (int32_t)((lastRow-firstRow)+columns); | 
|---|
| 245 | uprv_memmove(firstRow+columns, firstRow, (size_t)count*4); | 
|---|
| 246 | lastRow+=columns; | 
|---|
| 247 |  | 
|---|
| 248 | /* split the range and move the firstRow pointer */ | 
|---|
| 249 | firstRow[1]=firstRow[columns]=(uint32_t)start; | 
|---|
| 250 | firstRow+=columns; | 
|---|
| 251 | } | 
|---|
| 252 |  | 
|---|
| 253 | /* split the last row */ | 
|---|
| 254 | if(splitLastRow) { | 
|---|
| 255 | /* copy the last row data */ | 
|---|
| 256 | uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4); | 
|---|
| 257 |  | 
|---|
| 258 | /* split the range and move the firstRow pointer */ | 
|---|
| 259 | lastRow[1]=lastRow[columns]=(uint32_t)limit; | 
|---|
| 260 | } | 
|---|
| 261 | } | 
|---|
| 262 |  | 
|---|
| 263 | /* set the "row last seen" to the last row for the range */ | 
|---|
| 264 | pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); | 
|---|
| 265 |  | 
|---|
| 266 | /* set the input value in all remaining rows */ | 
|---|
| 267 | firstRow+=column; | 
|---|
| 268 | lastRow+=column; | 
|---|
| 269 | mask=~mask; | 
|---|
| 270 | for(;;) { | 
|---|
| 271 | *firstRow=(*firstRow&mask)|value; | 
|---|
| 272 | if(firstRow==lastRow) { | 
|---|
| 273 | break; | 
|---|
| 274 | } | 
|---|
| 275 | firstRow+=columns; | 
|---|
| 276 | } | 
|---|
| 277 | } | 
|---|
| 278 |  | 
|---|
| 279 | U_CAPI uint32_t U_EXPORT2 | 
|---|
| 280 | upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) { | 
|---|
| 281 | uint32_t *row; | 
|---|
| 282 | UPropsVectors *ncpv; | 
|---|
| 283 |  | 
|---|
| 284 | if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) { | 
|---|
| 285 | return 0; | 
|---|
| 286 | } | 
|---|
| 287 | ncpv=(UPropsVectors *)pv; | 
|---|
| 288 | row=_findRow(ncpv, c); | 
|---|
| 289 | return row[2+column]; | 
|---|
| 290 | } | 
|---|
| 291 |  | 
|---|
| 292 | U_CAPI uint32_t * U_EXPORT2 | 
|---|
| 293 | upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, | 
|---|
| 294 | UChar32 *pRangeStart, UChar32 *pRangeEnd) { | 
|---|
| 295 | uint32_t *row; | 
|---|
| 296 | int32_t columns; | 
|---|
| 297 |  | 
|---|
| 298 | if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) { | 
|---|
| 299 | return nullptr; | 
|---|
| 300 | } | 
|---|
| 301 |  | 
|---|
| 302 | columns=pv->columns; | 
|---|
| 303 | row=pv->v+rowIndex*columns; | 
|---|
| 304 | if(pRangeStart!=nullptr) { | 
|---|
| 305 | *pRangeStart=(UChar32)row[0]; | 
|---|
| 306 | } | 
|---|
| 307 | if(pRangeEnd!=nullptr) { | 
|---|
| 308 | *pRangeEnd=(UChar32)row[1]-1; | 
|---|
| 309 | } | 
|---|
| 310 | return row+2; | 
|---|
| 311 | } | 
|---|
| 312 |  | 
|---|
| 313 | static int32_t U_CALLCONV | 
|---|
| 314 | upvec_compareRows(const void *context, const void *l, const void *r) { | 
|---|
| 315 | const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; | 
|---|
| 316 | const UPropsVectors *pv=(const UPropsVectors *)context; | 
|---|
| 317 | int32_t i, count, columns; | 
|---|
| 318 |  | 
|---|
| 319 | count=columns=pv->columns; /* includes start/limit columns */ | 
|---|
| 320 |  | 
|---|
| 321 | /* start comparing after start/limit but wrap around to them */ | 
|---|
| 322 | i=2; | 
|---|
| 323 | do { | 
|---|
| 324 | if(left[i]!=right[i]) { | 
|---|
| 325 | return left[i]<right[i] ? -1 : 1; | 
|---|
| 326 | } | 
|---|
| 327 | if(++i==columns) { | 
|---|
| 328 | i=0; | 
|---|
| 329 | } | 
|---|
| 330 | } while(--count>0); | 
|---|
| 331 |  | 
|---|
| 332 | return 0; | 
|---|
| 333 | } | 
|---|
| 334 |  | 
|---|
| 335 | U_CAPI void U_EXPORT2 | 
|---|
| 336 | upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { | 
|---|
| 337 | uint32_t *row; | 
|---|
| 338 | int32_t i, columns, valueColumns, rows, count; | 
|---|
| 339 | UChar32 start, limit; | 
|---|
| 340 |  | 
|---|
| 341 | /* argument checking */ | 
|---|
| 342 | if(U_FAILURE(*pErrorCode)) { | 
|---|
| 343 | return; | 
|---|
| 344 | } | 
|---|
| 345 | if(handler==nullptr) { | 
|---|
| 346 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
|---|
| 347 | return; | 
|---|
| 348 | } | 
|---|
| 349 | if(pv->isCompacted) { | 
|---|
| 350 | return; | 
|---|
| 351 | } | 
|---|
| 352 |  | 
|---|
| 353 | /* Set the flag now: Sorting and compacting destroys the builder data structure. */ | 
|---|
| 354 | pv->isCompacted=true; | 
|---|
| 355 |  | 
|---|
| 356 | rows=pv->rows; | 
|---|
| 357 | columns=pv->columns; | 
|---|
| 358 | U_ASSERT(columns>=3); /* upvec_open asserts this */ | 
|---|
| 359 | valueColumns=columns-2; /* not counting start & limit */ | 
|---|
| 360 |  | 
|---|
| 361 | /* sort the properties vectors to find unique vector values */ | 
|---|
| 362 | uprv_sortArray(pv->v, rows, columns*4, | 
|---|
| 363 | upvec_compareRows, pv, false, pErrorCode); | 
|---|
| 364 | if(U_FAILURE(*pErrorCode)) { | 
|---|
| 365 | return; | 
|---|
| 366 | } | 
|---|
| 367 |  | 
|---|
| 368 | /* | 
|---|
| 369 | * Find and set the special values. | 
|---|
| 370 | * This has to do almost the same work as the compaction below, | 
|---|
| 371 | * to find the indexes where the special-value rows will move. | 
|---|
| 372 | */ | 
|---|
| 373 | row=pv->v; | 
|---|
| 374 | count=-valueColumns; | 
|---|
| 375 | for(i=0; i<rows; ++i) { | 
|---|
| 376 | start=(UChar32)row[0]; | 
|---|
| 377 |  | 
|---|
| 378 | /* count a new values vector if it is different from the current one */ | 
|---|
| 379 | if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) { | 
|---|
| 380 | count+=valueColumns; | 
|---|
| 381 | } | 
|---|
| 382 |  | 
|---|
| 383 | if(start>=UPVEC_FIRST_SPECIAL_CP) { | 
|---|
| 384 | handler(context, start, start, count, row+2, valueColumns, pErrorCode); | 
|---|
| 385 | if(U_FAILURE(*pErrorCode)) { | 
|---|
| 386 | return; | 
|---|
| 387 | } | 
|---|
| 388 | } | 
|---|
| 389 |  | 
|---|
| 390 | row+=columns; | 
|---|
| 391 | } | 
|---|
| 392 |  | 
|---|
| 393 | /* count is at the beginning of the last vector, add valueColumns to include that last vector */ | 
|---|
| 394 | count+=valueColumns; | 
|---|
| 395 |  | 
|---|
| 396 | /* Call the handler once more to signal the start of delivering real values. */ | 
|---|
| 397 | handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, | 
|---|
| 398 | count, row-valueColumns, valueColumns, pErrorCode); | 
|---|
| 399 | if(U_FAILURE(*pErrorCode)) { | 
|---|
| 400 | return; | 
|---|
| 401 | } | 
|---|
| 402 |  | 
|---|
| 403 | /* | 
|---|
| 404 | * Move vector contents up to a contiguous array with only unique | 
|---|
| 405 | * vector values, and call the handler function for each vector. | 
|---|
| 406 | * | 
|---|
| 407 | * This destroys the Properties Vector structure and replaces it | 
|---|
| 408 | * with an array of just vector values. | 
|---|
| 409 | */ | 
|---|
| 410 | row=pv->v; | 
|---|
| 411 | count=-valueColumns; | 
|---|
| 412 | for(i=0; i<rows; ++i) { | 
|---|
| 413 | /* fetch these first before memmove() may overwrite them */ | 
|---|
| 414 | start=(UChar32)row[0]; | 
|---|
| 415 | limit=(UChar32)row[1]; | 
|---|
| 416 |  | 
|---|
| 417 | /* add a new values vector if it is different from the current one */ | 
|---|
| 418 | if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) { | 
|---|
| 419 | count+=valueColumns; | 
|---|
| 420 | uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4); | 
|---|
| 421 | } | 
|---|
| 422 |  | 
|---|
| 423 | if(start<UPVEC_FIRST_SPECIAL_CP) { | 
|---|
| 424 | handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode); | 
|---|
| 425 | if(U_FAILURE(*pErrorCode)) { | 
|---|
| 426 | return; | 
|---|
| 427 | } | 
|---|
| 428 | } | 
|---|
| 429 |  | 
|---|
| 430 | row+=columns; | 
|---|
| 431 | } | 
|---|
| 432 |  | 
|---|
| 433 | /* count is at the beginning of the last vector, add one to include that last vector */ | 
|---|
| 434 | pv->rows=count/valueColumns+1; | 
|---|
| 435 | } | 
|---|
| 436 |  | 
|---|
| 437 | U_CAPI const uint32_t * U_EXPORT2 | 
|---|
| 438 | upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) { | 
|---|
| 439 | if(!pv->isCompacted) { | 
|---|
| 440 | return nullptr; | 
|---|
| 441 | } | 
|---|
| 442 | if(pRows!=nullptr) { | 
|---|
| 443 | *pRows=pv->rows; | 
|---|
| 444 | } | 
|---|
| 445 | if(pColumns!=nullptr) { | 
|---|
| 446 | *pColumns=pv->columns-2; | 
|---|
| 447 | } | 
|---|
| 448 | return pv->v; | 
|---|
| 449 | } | 
|---|
| 450 |  | 
|---|
| 451 | U_CAPI uint32_t * U_EXPORT2 | 
|---|
| 452 | upvec_cloneArray(const UPropsVectors *pv, | 
|---|
| 453 | int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) { | 
|---|
| 454 | uint32_t *clonedArray; | 
|---|
| 455 | int32_t byteLength; | 
|---|
| 456 |  | 
|---|
| 457 | if(U_FAILURE(*pErrorCode)) { | 
|---|
| 458 | return nullptr; | 
|---|
| 459 | } | 
|---|
| 460 | if(!pv->isCompacted) { | 
|---|
| 461 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
|---|
| 462 | return nullptr; | 
|---|
| 463 | } | 
|---|
| 464 | byteLength=pv->rows*(pv->columns-2)*4; | 
|---|
| 465 | clonedArray=(uint32_t *)uprv_malloc(byteLength); | 
|---|
| 466 | if(clonedArray==nullptr) { | 
|---|
| 467 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | 
|---|
| 468 | return nullptr; | 
|---|
| 469 | } | 
|---|
| 470 | uprv_memcpy(clonedArray, pv->v, byteLength); | 
|---|
| 471 | if(pRows!=nullptr) { | 
|---|
| 472 | *pRows=pv->rows; | 
|---|
| 473 | } | 
|---|
| 474 | if(pColumns!=nullptr) { | 
|---|
| 475 | *pColumns=pv->columns-2; | 
|---|
| 476 | } | 
|---|
| 477 | return clonedArray; | 
|---|
| 478 | } | 
|---|
| 479 |  | 
|---|
| 480 | U_CAPI UTrie2 * U_EXPORT2 | 
|---|
| 481 | upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { | 
|---|
| 482 | UPVecToUTrie2Context toUTrie2={ nullptr, 0, 0, 0 }; | 
|---|
| 483 | upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); | 
|---|
| 484 | utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); | 
|---|
| 485 | if(U_FAILURE(*pErrorCode)) { | 
|---|
| 486 | utrie2_close(toUTrie2.trie); | 
|---|
| 487 | toUTrie2.trie=nullptr; | 
|---|
| 488 | } | 
|---|
| 489 | return toUTrie2.trie; | 
|---|
| 490 | } | 
|---|
| 491 |  | 
|---|
| 492 | /* | 
|---|
| 493 | * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts | 
|---|
| 494 | * some 16-bit field and builds and returns a UTrie2. | 
|---|
| 495 | */ | 
|---|
| 496 |  | 
|---|
| 497 | U_CAPI void U_CALLCONV | 
|---|
| 498 | upvec_compactToUTrie2Handler(void *context, | 
|---|
| 499 | UChar32 start, UChar32 end, | 
|---|
| 500 | int32_t rowIndex, uint32_t *row, int32_t columns, | 
|---|
| 501 | UErrorCode *pErrorCode) { | 
|---|
| 502 | (void)row; | 
|---|
| 503 | (void)columns; | 
|---|
| 504 | UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context; | 
|---|
| 505 | if(start<UPVEC_FIRST_SPECIAL_CP) { | 
|---|
| 506 | utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, true, pErrorCode); | 
|---|
| 507 | } else { | 
|---|
| 508 | switch(start) { | 
|---|
| 509 | case UPVEC_INITIAL_VALUE_CP: | 
|---|
| 510 | toUTrie2->initialValue=rowIndex; | 
|---|
| 511 | break; | 
|---|
| 512 | case UPVEC_ERROR_VALUE_CP: | 
|---|
| 513 | toUTrie2->errorValue=rowIndex; | 
|---|
| 514 | break; | 
|---|
| 515 | case UPVEC_START_REAL_VALUES_CP: | 
|---|
| 516 | toUTrie2->maxValue=rowIndex; | 
|---|
| 517 | if(rowIndex>0xffff) { | 
|---|
| 518 | /* too many rows for a 16-bit trie */ | 
|---|
| 519 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | 
|---|
| 520 | } else { | 
|---|
| 521 | toUTrie2->trie=utrie2_open(toUTrie2->initialValue, | 
|---|
| 522 | toUTrie2->errorValue, pErrorCode); | 
|---|
| 523 | } | 
|---|
| 524 | break; | 
|---|
| 525 | default: | 
|---|
| 526 | break; | 
|---|
| 527 | } | 
|---|
| 528 | } | 
|---|
| 529 | } | 
|---|
| 530 |  | 
|---|