| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ******************************************************************************* | 
|---|
| 5 | * | 
|---|
| 6 | *   Copyright (C) 2002-2011 International Business Machines | 
|---|
| 7 | *   Corporation and others.  All Rights Reserved. | 
|---|
| 8 | * | 
|---|
| 9 | ******************************************************************************* | 
|---|
| 10 | *   file name:  uiter.h | 
|---|
| 11 | *   encoding:   UTF-8 | 
|---|
| 12 | *   tab size:   8 (not used) | 
|---|
| 13 | *   indentation:4 | 
|---|
| 14 | * | 
|---|
| 15 | *   created on: 2002jan18 | 
|---|
| 16 | *   created by: Markus W. Scherer | 
|---|
| 17 | */ | 
|---|
| 18 |  | 
|---|
| 19 | #ifndef __UITER_H__ | 
|---|
| 20 | #define __UITER_H__ | 
|---|
| 21 |  | 
|---|
| 22 | /** | 
|---|
| 23 | * \file | 
|---|
| 24 | * \brief C API: Unicode Character Iteration | 
|---|
| 25 | * | 
|---|
| 26 | * @see UCharIterator | 
|---|
| 27 | */ | 
|---|
| 28 |  | 
|---|
| 29 | #include "unicode/utypes.h" | 
|---|
| 30 |  | 
|---|
| 31 | #if U_SHOW_CPLUSPLUS_API | 
|---|
| 32 | U_NAMESPACE_BEGIN | 
|---|
| 33 |  | 
|---|
| 34 | class CharacterIterator; | 
|---|
| 35 | class Replaceable; | 
|---|
| 36 |  | 
|---|
| 37 | U_NAMESPACE_END | 
|---|
| 38 | #endif | 
|---|
| 39 |  | 
|---|
| 40 | U_CDECL_BEGIN | 
|---|
| 41 |  | 
|---|
| 42 | struct UCharIterator; | 
|---|
| 43 | typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ | 
|---|
| 44 |  | 
|---|
| 45 | /** | 
|---|
| 46 | * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). | 
|---|
| 47 | * @see UCharIteratorMove | 
|---|
| 48 | * @see UCharIterator | 
|---|
| 49 | * @stable ICU 2.1 | 
|---|
| 50 | */ | 
|---|
| 51 | typedef enum UCharIteratorOrigin { | 
|---|
| 52 | UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH | 
|---|
| 53 | } UCharIteratorOrigin; | 
|---|
| 54 |  | 
|---|
| 55 | /** Constants for UCharIterator. @stable ICU 2.6 */ | 
|---|
| 56 | enum { | 
|---|
| 57 | /** | 
|---|
| 58 | * Constant value that may be returned by UCharIteratorMove | 
|---|
| 59 | * indicating that the final UTF-16 index is not known, but that the move succeeded. | 
|---|
| 60 | * This can occur when moving relative to limit or length, or | 
|---|
| 61 | * when moving relative to the current index after a setState() | 
|---|
| 62 | * when the current UTF-16 index is not known. | 
|---|
| 63 | * | 
|---|
| 64 | * It would be very inefficient to have to count from the beginning of the text | 
|---|
| 65 | * just to get the current/limit/length index after moving relative to it. | 
|---|
| 66 | * The actual index can be determined with getIndex(UITER_CURRENT) | 
|---|
| 67 | * which will count the UChars if necessary. | 
|---|
| 68 | * | 
|---|
| 69 | * @stable ICU 2.6 | 
|---|
| 70 | */ | 
|---|
| 71 | UITER_UNKNOWN_INDEX=-2 | 
|---|
| 72 | }; | 
|---|
| 73 |  | 
|---|
| 74 |  | 
|---|
| 75 | /** | 
|---|
| 76 | * Constant for UCharIterator getState() indicating an error or | 
|---|
| 77 | * an unknown state. | 
|---|
| 78 | * Returned by uiter_getState()/UCharIteratorGetState | 
|---|
| 79 | * when an error occurs. | 
|---|
| 80 | * Also, some UCharIterator implementations may not be able to return | 
|---|
| 81 | * a valid state for each position. This will be clearly documented | 
|---|
| 82 | * for each such iterator (none of the public ones here). | 
|---|
| 83 | * | 
|---|
| 84 | * @stable ICU 2.6 | 
|---|
| 85 | */ | 
|---|
| 86 | #define UITER_NO_STATE ((uint32_t)0xffffffff) | 
|---|
| 87 |  | 
|---|
| 88 | /** | 
|---|
| 89 | * Function type declaration for UCharIterator.getIndex(). | 
|---|
| 90 | * | 
|---|
| 91 | * Gets the current position, or the start or limit of the | 
|---|
| 92 | * iteration range. | 
|---|
| 93 | * | 
|---|
| 94 | * This function may perform slowly for UITER_CURRENT after setState() was called, | 
|---|
| 95 | * or for UITER_LENGTH, because an iterator implementation may have to count | 
|---|
| 96 | * UChars if the underlying storage is not UTF-16. | 
|---|
| 97 | * | 
|---|
| 98 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 99 | * @param origin get the 0, start, limit, length, or current index | 
|---|
| 100 | * @return the requested index, or U_SENTINEL in an error condition | 
|---|
| 101 | * | 
|---|
| 102 | * @see UCharIteratorOrigin | 
|---|
| 103 | * @see UCharIterator | 
|---|
| 104 | * @stable ICU 2.1 | 
|---|
| 105 | */ | 
|---|
| 106 | typedef int32_t U_CALLCONV | 
|---|
| 107 | UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); | 
|---|
| 108 |  | 
|---|
| 109 | /** | 
|---|
| 110 | * Function type declaration for UCharIterator.move(). | 
|---|
| 111 | * | 
|---|
| 112 | * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). | 
|---|
| 113 | * | 
|---|
| 114 | * Moves the current position relative to the start or limit of the | 
|---|
| 115 | * iteration range, or relative to the current position itself. | 
|---|
| 116 | * The movement is expressed in numbers of code units forward | 
|---|
| 117 | * or backward by specifying a positive or negative delta. | 
|---|
| 118 | * Out of bounds movement will be pinned to the start or limit. | 
|---|
| 119 | * | 
|---|
| 120 | * This function may perform slowly for moving relative to UITER_LENGTH | 
|---|
| 121 | * because an iterator implementation may have to count the rest of the | 
|---|
| 122 | * UChars if the native storage is not UTF-16. | 
|---|
| 123 | * | 
|---|
| 124 | * When moving relative to the limit or length, or | 
|---|
| 125 | * relative to the current position after setState() was called, | 
|---|
| 126 | * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient | 
|---|
| 127 | * determination of the actual UTF-16 index. | 
|---|
| 128 | * The actual index can be determined with getIndex(UITER_CURRENT) | 
|---|
| 129 | * which will count the UChars if necessary. | 
|---|
| 130 | * See UITER_UNKNOWN_INDEX for details. | 
|---|
| 131 | * | 
|---|
| 132 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 133 | * @param delta can be positive, zero, or negative | 
|---|
| 134 | * @param origin move relative to the 0, start, limit, length, or current index | 
|---|
| 135 | * @return the new index, or U_SENTINEL on an error condition, | 
|---|
| 136 | *         or UITER_UNKNOWN_INDEX when the index is not known. | 
|---|
| 137 | * | 
|---|
| 138 | * @see UCharIteratorOrigin | 
|---|
| 139 | * @see UCharIterator | 
|---|
| 140 | * @see UITER_UNKNOWN_INDEX | 
|---|
| 141 | * @stable ICU 2.1 | 
|---|
| 142 | */ | 
|---|
| 143 | typedef int32_t U_CALLCONV | 
|---|
| 144 | UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); | 
|---|
| 145 |  | 
|---|
| 146 | /** | 
|---|
| 147 | * Function type declaration for UCharIterator.hasNext(). | 
|---|
| 148 | * | 
|---|
| 149 | * Check if current() and next() can still | 
|---|
| 150 | * return another code unit. | 
|---|
| 151 | * | 
|---|
| 152 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 153 | * @return boolean value for whether current() and next() can still return another code unit | 
|---|
| 154 | * | 
|---|
| 155 | * @see UCharIterator | 
|---|
| 156 | * @stable ICU 2.1 | 
|---|
| 157 | */ | 
|---|
| 158 | typedef UBool U_CALLCONV | 
|---|
| 159 | UCharIteratorHasNext(UCharIterator *iter); | 
|---|
| 160 |  | 
|---|
| 161 | /** | 
|---|
| 162 | * Function type declaration for UCharIterator.hasPrevious(). | 
|---|
| 163 | * | 
|---|
| 164 | * Check if previous() can still return another code unit. | 
|---|
| 165 | * | 
|---|
| 166 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 167 | * @return boolean value for whether previous() can still return another code unit | 
|---|
| 168 | * | 
|---|
| 169 | * @see UCharIterator | 
|---|
| 170 | * @stable ICU 2.1 | 
|---|
| 171 | */ | 
|---|
| 172 | typedef UBool U_CALLCONV | 
|---|
| 173 | UCharIteratorHasPrevious(UCharIterator *iter); | 
|---|
| 174 |  | 
|---|
| 175 | /** | 
|---|
| 176 | * Function type declaration for UCharIterator.current(). | 
|---|
| 177 | * | 
|---|
| 178 | * Return the code unit at the current position, | 
|---|
| 179 | * or U_SENTINEL if there is none (index is at the limit). | 
|---|
| 180 | * | 
|---|
| 181 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 182 | * @return the current code unit | 
|---|
| 183 | * | 
|---|
| 184 | * @see UCharIterator | 
|---|
| 185 | * @stable ICU 2.1 | 
|---|
| 186 | */ | 
|---|
| 187 | typedef UChar32 U_CALLCONV | 
|---|
| 188 | UCharIteratorCurrent(UCharIterator *iter); | 
|---|
| 189 |  | 
|---|
| 190 | /** | 
|---|
| 191 | * Function type declaration for UCharIterator.next(). | 
|---|
| 192 | * | 
|---|
| 193 | * Return the code unit at the current index and increment | 
|---|
| 194 | * the index (post-increment, like s[i++]), | 
|---|
| 195 | * or return U_SENTINEL if there is none (index is at the limit). | 
|---|
| 196 | * | 
|---|
| 197 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 198 | * @return the current code unit (and post-increment the current index) | 
|---|
| 199 | * | 
|---|
| 200 | * @see UCharIterator | 
|---|
| 201 | * @stable ICU 2.1 | 
|---|
| 202 | */ | 
|---|
| 203 | typedef UChar32 U_CALLCONV | 
|---|
| 204 | UCharIteratorNext(UCharIterator *iter); | 
|---|
| 205 |  | 
|---|
| 206 | /** | 
|---|
| 207 | * Function type declaration for UCharIterator.previous(). | 
|---|
| 208 | * | 
|---|
| 209 | * Decrement the index and return the code unit from there | 
|---|
| 210 | * (pre-decrement, like s[--i]), | 
|---|
| 211 | * or return U_SENTINEL if there is none (index is at the start). | 
|---|
| 212 | * | 
|---|
| 213 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 214 | * @return the previous code unit (after pre-decrementing the current index) | 
|---|
| 215 | * | 
|---|
| 216 | * @see UCharIterator | 
|---|
| 217 | * @stable ICU 2.1 | 
|---|
| 218 | */ | 
|---|
| 219 | typedef UChar32 U_CALLCONV | 
|---|
| 220 | UCharIteratorPrevious(UCharIterator *iter); | 
|---|
| 221 |  | 
|---|
| 222 | /** | 
|---|
| 223 | * Function type declaration for UCharIterator.reservedFn(). | 
|---|
| 224 | * Reserved for future use. | 
|---|
| 225 | * | 
|---|
| 226 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 227 | * @param something some integer argument | 
|---|
| 228 | * @return some integer | 
|---|
| 229 | * | 
|---|
| 230 | * @see UCharIterator | 
|---|
| 231 | * @stable ICU 2.1 | 
|---|
| 232 | */ | 
|---|
| 233 | typedef int32_t U_CALLCONV | 
|---|
| 234 | UCharIteratorReserved(UCharIterator *iter, int32_t something); | 
|---|
| 235 |  | 
|---|
| 236 | /** | 
|---|
| 237 | * Function type declaration for UCharIterator.getState(). | 
|---|
| 238 | * | 
|---|
| 239 | * Get the "state" of the iterator in the form of a single 32-bit word. | 
|---|
| 240 | * It is recommended that the state value be calculated to be as small as | 
|---|
| 241 | * is feasible. For strings with limited lengths, fewer than 32 bits may | 
|---|
| 242 | * be sufficient. | 
|---|
| 243 | * | 
|---|
| 244 | * This is used together with setState()/UCharIteratorSetState | 
|---|
| 245 | * to save and restore the iterator position more efficiently than with | 
|---|
| 246 | * getIndex()/move(). | 
|---|
| 247 | * | 
|---|
| 248 | * The iterator state is defined as a uint32_t value because it is designed | 
|---|
| 249 | * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state | 
|---|
| 250 | * of the character iterator. | 
|---|
| 251 | * | 
|---|
| 252 | * With some UCharIterator implementations (e.g., UTF-8), | 
|---|
| 253 | * getting and setting the UTF-16 index with existing functions | 
|---|
| 254 | * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but | 
|---|
| 255 | * relatively slow because the iterator has to "walk" from a known index | 
|---|
| 256 | * to the requested one. | 
|---|
| 257 | * This takes more time the farther it needs to go. | 
|---|
| 258 | * | 
|---|
| 259 | * An opaque state value allows an iterator implementation to provide | 
|---|
| 260 | * an internal index (UTF-8: the source byte array index) for | 
|---|
| 261 | * fast, constant-time restoration. | 
|---|
| 262 | * | 
|---|
| 263 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | 
|---|
| 264 | * the UTF-16 index may not be restored as well, but the iterator can deliver | 
|---|
| 265 | * the correct text contents and move relative to the current position | 
|---|
| 266 | * without performance degradation. | 
|---|
| 267 | * | 
|---|
| 268 | * Some UCharIterator implementations may not be able to return | 
|---|
| 269 | * a valid state for each position, in which case they return UITER_NO_STATE instead. | 
|---|
| 270 | * This will be clearly documented for each such iterator (none of the public ones here). | 
|---|
| 271 | * | 
|---|
| 272 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 273 | * @return the state word | 
|---|
| 274 | * | 
|---|
| 275 | * @see UCharIterator | 
|---|
| 276 | * @see UCharIteratorSetState | 
|---|
| 277 | * @see UITER_NO_STATE | 
|---|
| 278 | * @stable ICU 2.6 | 
|---|
| 279 | */ | 
|---|
| 280 | typedef uint32_t U_CALLCONV | 
|---|
| 281 | UCharIteratorGetState(const UCharIterator *iter); | 
|---|
| 282 |  | 
|---|
| 283 | /** | 
|---|
| 284 | * Function type declaration for UCharIterator.setState(). | 
|---|
| 285 | * | 
|---|
| 286 | * Restore the "state" of the iterator using a state word from a getState() call. | 
|---|
| 287 | * The iterator object need not be the same one as for which getState() was called, | 
|---|
| 288 | * but it must be of the same type (set up using the same uiter_setXYZ function) | 
|---|
| 289 | * and it must iterate over the same string | 
|---|
| 290 | * (binary identical regardless of memory address). | 
|---|
| 291 | * For more about the state word see UCharIteratorGetState. | 
|---|
| 292 | * | 
|---|
| 293 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | 
|---|
| 294 | * the UTF-16 index may not be restored as well, but the iterator can deliver | 
|---|
| 295 | * the correct text contents and move relative to the current position | 
|---|
| 296 | * without performance degradation. | 
|---|
| 297 | * | 
|---|
| 298 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 299 | * @param state the state word from a getState() call | 
|---|
| 300 | *              on a same-type, same-string iterator | 
|---|
| 301 | * @param pErrorCode Must be a valid pointer to an error code value, | 
|---|
| 302 | *                   which must not indicate a failure before the function call. | 
|---|
| 303 | * | 
|---|
| 304 | * @see UCharIterator | 
|---|
| 305 | * @see UCharIteratorGetState | 
|---|
| 306 | * @stable ICU 2.6 | 
|---|
| 307 | */ | 
|---|
| 308 | typedef void U_CALLCONV | 
|---|
| 309 | UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); | 
|---|
| 310 |  | 
|---|
| 311 |  | 
|---|
| 312 | /** | 
|---|
| 313 | * C API for code unit iteration. | 
|---|
| 314 | * This can be used as a C wrapper around | 
|---|
| 315 | * CharacterIterator, Replaceable, or implemented using simple strings, etc. | 
|---|
| 316 | * | 
|---|
| 317 | * There are two roles for using UCharIterator: | 
|---|
| 318 | * | 
|---|
| 319 | * A "provider" sets the necessary function pointers and controls the "protected" | 
|---|
| 320 | * fields of the UCharIterator structure. A "provider" passes a UCharIterator | 
|---|
| 321 | * into C APIs that need a UCharIterator as an abstract, flexible string interface. | 
|---|
| 322 | * | 
|---|
| 323 | * Implementations of such C APIs are "callers" of UCharIterator functions; | 
|---|
| 324 | * they only use the "public" function pointers and never access the "protected" | 
|---|
| 325 | * fields directly. | 
|---|
| 326 | * | 
|---|
| 327 | * The current() and next() functions only check the current index against the | 
|---|
| 328 | * limit, and previous() only checks the current index against the start, | 
|---|
| 329 | * to see if the iterator already reached the end of the iteration range. | 
|---|
| 330 | * | 
|---|
| 331 | * The assumption - in all iterators - is that the index is moved via the API, | 
|---|
| 332 | * which means it won't go out of bounds, or the index is modified by | 
|---|
| 333 | * user code that knows enough about the iterator implementation to set valid | 
|---|
| 334 | * index values. | 
|---|
| 335 | * | 
|---|
| 336 | * UCharIterator functions return code unit values 0..0xffff, | 
|---|
| 337 | * or U_SENTINEL if the iteration bounds are reached. | 
|---|
| 338 | * | 
|---|
| 339 | * @stable ICU 2.1 | 
|---|
| 340 | */ | 
|---|
| 341 | struct UCharIterator { | 
|---|
| 342 | /** | 
|---|
| 343 | * (protected) Pointer to string or wrapped object or similar. | 
|---|
| 344 | * Not used by caller. | 
|---|
| 345 | * @stable ICU 2.1 | 
|---|
| 346 | */ | 
|---|
| 347 | const void *context; | 
|---|
| 348 |  | 
|---|
| 349 | /** | 
|---|
| 350 | * (protected) Length of string or similar. | 
|---|
| 351 | * Not used by caller. | 
|---|
| 352 | * @stable ICU 2.1 | 
|---|
| 353 | */ | 
|---|
| 354 | int32_t length; | 
|---|
| 355 |  | 
|---|
| 356 | /** | 
|---|
| 357 | * (protected) Start index or similar. | 
|---|
| 358 | * Not used by caller. | 
|---|
| 359 | * @stable ICU 2.1 | 
|---|
| 360 | */ | 
|---|
| 361 | int32_t start; | 
|---|
| 362 |  | 
|---|
| 363 | /** | 
|---|
| 364 | * (protected) Current index or similar. | 
|---|
| 365 | * Not used by caller. | 
|---|
| 366 | * @stable ICU 2.1 | 
|---|
| 367 | */ | 
|---|
| 368 | int32_t index; | 
|---|
| 369 |  | 
|---|
| 370 | /** | 
|---|
| 371 | * (protected) Limit index or similar. | 
|---|
| 372 | * Not used by caller. | 
|---|
| 373 | * @stable ICU 2.1 | 
|---|
| 374 | */ | 
|---|
| 375 | int32_t limit; | 
|---|
| 376 |  | 
|---|
| 377 | /** | 
|---|
| 378 | * (protected) Used by UTF-8 iterators and possibly others. | 
|---|
| 379 | * @stable ICU 2.1 | 
|---|
| 380 | */ | 
|---|
| 381 | int32_t reservedField; | 
|---|
| 382 |  | 
|---|
| 383 | /** | 
|---|
| 384 | * (public) Returns the current position or the | 
|---|
| 385 | * start or limit index of the iteration range. | 
|---|
| 386 | * | 
|---|
| 387 | * @see UCharIteratorGetIndex | 
|---|
| 388 | * @stable ICU 2.1 | 
|---|
| 389 | */ | 
|---|
| 390 | UCharIteratorGetIndex *getIndex; | 
|---|
| 391 |  | 
|---|
| 392 | /** | 
|---|
| 393 | * (public) Moves the current position relative to the start or limit of the | 
|---|
| 394 | * iteration range, or relative to the current position itself. | 
|---|
| 395 | * The movement is expressed in numbers of code units forward | 
|---|
| 396 | * or backward by specifying a positive or negative delta. | 
|---|
| 397 | * | 
|---|
| 398 | * @see UCharIteratorMove | 
|---|
| 399 | * @stable ICU 2.1 | 
|---|
| 400 | */ | 
|---|
| 401 | UCharIteratorMove *move; | 
|---|
| 402 |  | 
|---|
| 403 | /** | 
|---|
| 404 | * (public) Check if current() and next() can still | 
|---|
| 405 | * return another code unit. | 
|---|
| 406 | * | 
|---|
| 407 | * @see UCharIteratorHasNext | 
|---|
| 408 | * @stable ICU 2.1 | 
|---|
| 409 | */ | 
|---|
| 410 | UCharIteratorHasNext *hasNext; | 
|---|
| 411 |  | 
|---|
| 412 | /** | 
|---|
| 413 | * (public) Check if previous() can still return another code unit. | 
|---|
| 414 | * | 
|---|
| 415 | * @see UCharIteratorHasPrevious | 
|---|
| 416 | * @stable ICU 2.1 | 
|---|
| 417 | */ | 
|---|
| 418 | UCharIteratorHasPrevious *hasPrevious; | 
|---|
| 419 |  | 
|---|
| 420 | /** | 
|---|
| 421 | * (public) Return the code unit at the current position, | 
|---|
| 422 | * or U_SENTINEL if there is none (index is at the limit). | 
|---|
| 423 | * | 
|---|
| 424 | * @see UCharIteratorCurrent | 
|---|
| 425 | * @stable ICU 2.1 | 
|---|
| 426 | */ | 
|---|
| 427 | UCharIteratorCurrent *current; | 
|---|
| 428 |  | 
|---|
| 429 | /** | 
|---|
| 430 | * (public) Return the code unit at the current index and increment | 
|---|
| 431 | * the index (post-increment, like s[i++]), | 
|---|
| 432 | * or return U_SENTINEL if there is none (index is at the limit). | 
|---|
| 433 | * | 
|---|
| 434 | * @see UCharIteratorNext | 
|---|
| 435 | * @stable ICU 2.1 | 
|---|
| 436 | */ | 
|---|
| 437 | UCharIteratorNext *next; | 
|---|
| 438 |  | 
|---|
| 439 | /** | 
|---|
| 440 | * (public) Decrement the index and return the code unit from there | 
|---|
| 441 | * (pre-decrement, like s[--i]), | 
|---|
| 442 | * or return U_SENTINEL if there is none (index is at the start). | 
|---|
| 443 | * | 
|---|
| 444 | * @see UCharIteratorPrevious | 
|---|
| 445 | * @stable ICU 2.1 | 
|---|
| 446 | */ | 
|---|
| 447 | UCharIteratorPrevious *previous; | 
|---|
| 448 |  | 
|---|
| 449 | /** | 
|---|
| 450 | * (public) Reserved for future use. Currently NULL. | 
|---|
| 451 | * | 
|---|
| 452 | * @see UCharIteratorReserved | 
|---|
| 453 | * @stable ICU 2.1 | 
|---|
| 454 | */ | 
|---|
| 455 | UCharIteratorReserved *reservedFn; | 
|---|
| 456 |  | 
|---|
| 457 | /** | 
|---|
| 458 | * (public) Return the state of the iterator, to be restored later with setState(). | 
|---|
| 459 | * This function pointer is NULL if the iterator does not implement it. | 
|---|
| 460 | * | 
|---|
| 461 | * @see UCharIteratorGet | 
|---|
| 462 | * @stable ICU 2.6 | 
|---|
| 463 | */ | 
|---|
| 464 | UCharIteratorGetState *getState; | 
|---|
| 465 |  | 
|---|
| 466 | /** | 
|---|
| 467 | * (public) Restore the iterator state from the state word from a call | 
|---|
| 468 | * to getState(). | 
|---|
| 469 | * This function pointer is NULL if the iterator does not implement it. | 
|---|
| 470 | * | 
|---|
| 471 | * @see UCharIteratorSet | 
|---|
| 472 | * @stable ICU 2.6 | 
|---|
| 473 | */ | 
|---|
| 474 | UCharIteratorSetState *setState; | 
|---|
| 475 | }; | 
|---|
| 476 |  | 
|---|
| 477 | /** | 
|---|
| 478 | * Helper function for UCharIterator to get the code point | 
|---|
| 479 | * at the current index. | 
|---|
| 480 | * | 
|---|
| 481 | * Return the code point that includes the code unit at the current position, | 
|---|
| 482 | * or U_SENTINEL if there is none (index is at the limit). | 
|---|
| 483 | * If the current code unit is a lead or trail surrogate, | 
|---|
| 484 | * then the following or preceding surrogate is used to form | 
|---|
| 485 | * the code point value. | 
|---|
| 486 | * | 
|---|
| 487 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 488 | * @return the current code point | 
|---|
| 489 | * | 
|---|
| 490 | * @see UCharIterator | 
|---|
| 491 | * @see U16_GET | 
|---|
| 492 | * @see UnicodeString::char32At() | 
|---|
| 493 | * @stable ICU 2.1 | 
|---|
| 494 | */ | 
|---|
| 495 | U_CAPI UChar32 U_EXPORT2 | 
|---|
| 496 | uiter_current32(UCharIterator *iter); | 
|---|
| 497 |  | 
|---|
| 498 | /** | 
|---|
| 499 | * Helper function for UCharIterator to get the next code point. | 
|---|
| 500 | * | 
|---|
| 501 | * Return the code point at the current index and increment | 
|---|
| 502 | * the index (post-increment, like s[i++]), | 
|---|
| 503 | * or return U_SENTINEL if there is none (index is at the limit). | 
|---|
| 504 | * | 
|---|
| 505 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 506 | * @return the current code point (and post-increment the current index) | 
|---|
| 507 | * | 
|---|
| 508 | * @see UCharIterator | 
|---|
| 509 | * @see U16_NEXT | 
|---|
| 510 | * @stable ICU 2.1 | 
|---|
| 511 | */ | 
|---|
| 512 | U_CAPI UChar32 U_EXPORT2 | 
|---|
| 513 | uiter_next32(UCharIterator *iter); | 
|---|
| 514 |  | 
|---|
| 515 | /** | 
|---|
| 516 | * Helper function for UCharIterator to get the previous code point. | 
|---|
| 517 | * | 
|---|
| 518 | * Decrement the index and return the code point from there | 
|---|
| 519 | * (pre-decrement, like s[--i]), | 
|---|
| 520 | * or return U_SENTINEL if there is none (index is at the start). | 
|---|
| 521 | * | 
|---|
| 522 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 523 | * @return the previous code point (after pre-decrementing the current index) | 
|---|
| 524 | * | 
|---|
| 525 | * @see UCharIterator | 
|---|
| 526 | * @see U16_PREV | 
|---|
| 527 | * @stable ICU 2.1 | 
|---|
| 528 | */ | 
|---|
| 529 | U_CAPI UChar32 U_EXPORT2 | 
|---|
| 530 | uiter_previous32(UCharIterator *iter); | 
|---|
| 531 |  | 
|---|
| 532 | /** | 
|---|
| 533 | * Get the "state" of the iterator in the form of a single 32-bit word. | 
|---|
| 534 | * This is a convenience function that calls iter->getState(iter) | 
|---|
| 535 | * if iter->getState is not NULL; | 
|---|
| 536 | * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. | 
|---|
| 537 | * | 
|---|
| 538 | * Some UCharIterator implementations may not be able to return | 
|---|
| 539 | * a valid state for each position, in which case they return UITER_NO_STATE instead. | 
|---|
| 540 | * This will be clearly documented for each such iterator (none of the public ones here). | 
|---|
| 541 | * | 
|---|
| 542 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 543 | * @return the state word | 
|---|
| 544 | * | 
|---|
| 545 | * @see UCharIterator | 
|---|
| 546 | * @see UCharIteratorGetState | 
|---|
| 547 | * @see UITER_NO_STATE | 
|---|
| 548 | * @stable ICU 2.6 | 
|---|
| 549 | */ | 
|---|
| 550 | U_CAPI uint32_t U_EXPORT2 | 
|---|
| 551 | uiter_getState(const UCharIterator *iter); | 
|---|
| 552 |  | 
|---|
| 553 | /** | 
|---|
| 554 | * Restore the "state" of the iterator using a state word from a getState() call. | 
|---|
| 555 | * This is a convenience function that calls iter->setState(iter, state, pErrorCode) | 
|---|
| 556 | * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. | 
|---|
| 557 | * | 
|---|
| 558 | * @param iter the UCharIterator structure ("this pointer") | 
|---|
| 559 | * @param state the state word from a getState() call | 
|---|
| 560 | *              on a same-type, same-string iterator | 
|---|
| 561 | * @param pErrorCode Must be a valid pointer to an error code value, | 
|---|
| 562 | *                   which must not indicate a failure before the function call. | 
|---|
| 563 | * | 
|---|
| 564 | * @see UCharIterator | 
|---|
| 565 | * @see UCharIteratorSetState | 
|---|
| 566 | * @stable ICU 2.6 | 
|---|
| 567 | */ | 
|---|
| 568 | U_CAPI void U_EXPORT2 | 
|---|
| 569 | uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); | 
|---|
| 570 |  | 
|---|
| 571 | /** | 
|---|
| 572 | * Set up a UCharIterator to iterate over a string. | 
|---|
| 573 | * | 
|---|
| 574 | * Sets the UCharIterator function pointers for iteration over the string s | 
|---|
| 575 | * with iteration boundaries start=index=0 and length=limit=string length. | 
|---|
| 576 | * The "provider" may set the start, index, and limit values at any time | 
|---|
| 577 | * within the range 0..length. | 
|---|
| 578 | * The length field will be ignored. | 
|---|
| 579 | * | 
|---|
| 580 | * The string pointer s is set into UCharIterator.context without copying | 
|---|
| 581 | * or reallocating the string contents. | 
|---|
| 582 | * | 
|---|
| 583 | * getState() simply returns the current index. | 
|---|
| 584 | * move() will always return the final index. | 
|---|
| 585 | * | 
|---|
| 586 | * @param iter UCharIterator structure to be set for iteration | 
|---|
| 587 | * @param s String to iterate over | 
|---|
| 588 | * @param length Length of s, or -1 if NUL-terminated | 
|---|
| 589 | * | 
|---|
| 590 | * @see UCharIterator | 
|---|
| 591 | * @stable ICU 2.1 | 
|---|
| 592 | */ | 
|---|
| 593 | U_CAPI void U_EXPORT2 | 
|---|
| 594 | uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); | 
|---|
| 595 |  | 
|---|
| 596 | /** | 
|---|
| 597 | * Set up a UCharIterator to iterate over a UTF-16BE string | 
|---|
| 598 | * (byte vector with a big-endian pair of bytes per UChar). | 
|---|
| 599 | * | 
|---|
| 600 | * Everything works just like with a normal UChar iterator (uiter_setString), | 
|---|
| 601 | * except that UChars are assembled from byte pairs, | 
|---|
| 602 | * and that the length argument here indicates an even number of bytes. | 
|---|
| 603 | * | 
|---|
| 604 | * getState() simply returns the current index. | 
|---|
| 605 | * move() will always return the final index. | 
|---|
| 606 | * | 
|---|
| 607 | * @param iter UCharIterator structure to be set for iteration | 
|---|
| 608 | * @param s UTF-16BE string to iterate over | 
|---|
| 609 | * @param length Length of s as an even number of bytes, or -1 if NUL-terminated | 
|---|
| 610 | *               (NUL means pair of 0 bytes at even index from s) | 
|---|
| 611 | * | 
|---|
| 612 | * @see UCharIterator | 
|---|
| 613 | * @see uiter_setString | 
|---|
| 614 | * @stable ICU 2.6 | 
|---|
| 615 | */ | 
|---|
| 616 | U_CAPI void U_EXPORT2 | 
|---|
| 617 | uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); | 
|---|
| 618 |  | 
|---|
| 619 | /** | 
|---|
| 620 | * Set up a UCharIterator to iterate over a UTF-8 string. | 
|---|
| 621 | * | 
|---|
| 622 | * Sets the UCharIterator function pointers for iteration over the UTF-8 string s | 
|---|
| 623 | * with UTF-8 iteration boundaries 0 and length. | 
|---|
| 624 | * The implementation counts the UTF-16 index on the fly and | 
|---|
| 625 | * lazily evaluates the UTF-16 length of the text. | 
|---|
| 626 | * | 
|---|
| 627 | * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. | 
|---|
| 628 | * When the reservedField is not 0, then it contains a supplementary code point | 
|---|
| 629 | * and the UTF-16 index is between the two corresponding surrogates. | 
|---|
| 630 | * At that point, the UTF-8 index is behind that code point. | 
|---|
| 631 | * | 
|---|
| 632 | * The UTF-8 string pointer s is set into UCharIterator.context without copying | 
|---|
| 633 | * or reallocating the string contents. | 
|---|
| 634 | * | 
|---|
| 635 | * getState() returns a state value consisting of | 
|---|
| 636 | * - the current UTF-8 source byte index (bits 31..1) | 
|---|
| 637 | * - a flag (bit 0) that indicates whether the UChar position is in the middle | 
|---|
| 638 | *   of a surrogate pair | 
|---|
| 639 | *   (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) | 
|---|
| 640 | * | 
|---|
| 641 | * getState() cannot also encode the UTF-16 index in the state value. | 
|---|
| 642 | * move(relative to limit or length), or | 
|---|
| 643 | * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. | 
|---|
| 644 | * | 
|---|
| 645 | * @param iter UCharIterator structure to be set for iteration | 
|---|
| 646 | * @param s UTF-8 string to iterate over | 
|---|
| 647 | * @param length Length of s in bytes, or -1 if NUL-terminated | 
|---|
| 648 | * | 
|---|
| 649 | * @see UCharIterator | 
|---|
| 650 | * @stable ICU 2.6 | 
|---|
| 651 | */ | 
|---|
| 652 | U_CAPI void U_EXPORT2 | 
|---|
| 653 | uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); | 
|---|
| 654 |  | 
|---|
| 655 | #if U_SHOW_CPLUSPLUS_API | 
|---|
| 656 |  | 
|---|
| 657 | /** | 
|---|
| 658 | * Set up a UCharIterator to wrap around a C++ CharacterIterator. | 
|---|
| 659 | * | 
|---|
| 660 | * Sets the UCharIterator function pointers for iteration using the | 
|---|
| 661 | * CharacterIterator charIter. | 
|---|
| 662 | * | 
|---|
| 663 | * The CharacterIterator pointer charIter is set into UCharIterator.context | 
|---|
| 664 | * without copying or cloning the CharacterIterator object. | 
|---|
| 665 | * The other "protected" UCharIterator fields are set to 0 and will be ignored. | 
|---|
| 666 | * The iteration index and boundaries are controlled by the CharacterIterator. | 
|---|
| 667 | * | 
|---|
| 668 | * getState() simply returns the current index. | 
|---|
| 669 | * move() will always return the final index. | 
|---|
| 670 | * | 
|---|
| 671 | * @param iter UCharIterator structure to be set for iteration | 
|---|
| 672 | * @param charIter CharacterIterator to wrap | 
|---|
| 673 | * | 
|---|
| 674 | * @see UCharIterator | 
|---|
| 675 | * @stable ICU 2.1 | 
|---|
| 676 | */ | 
|---|
| 677 | U_CAPI void U_EXPORT2 | 
|---|
| 678 | uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); | 
|---|
| 679 |  | 
|---|
| 680 | /** | 
|---|
| 681 | * Set up a UCharIterator to iterate over a C++ Replaceable. | 
|---|
| 682 | * | 
|---|
| 683 | * Sets the UCharIterator function pointers for iteration over the | 
|---|
| 684 | * Replaceable rep with iteration boundaries start=index=0 and | 
|---|
| 685 | * length=limit=rep->length(). | 
|---|
| 686 | * The "provider" may set the start, index, and limit values at any time | 
|---|
| 687 | * within the range 0..length=rep->length(). | 
|---|
| 688 | * The length field will be ignored. | 
|---|
| 689 | * | 
|---|
| 690 | * The Replaceable pointer rep is set into UCharIterator.context without copying | 
|---|
| 691 | * or cloning/reallocating the Replaceable object. | 
|---|
| 692 | * | 
|---|
| 693 | * getState() simply returns the current index. | 
|---|
| 694 | * move() will always return the final index. | 
|---|
| 695 | * | 
|---|
| 696 | * @param iter UCharIterator structure to be set for iteration | 
|---|
| 697 | * @param rep Replaceable to iterate over | 
|---|
| 698 | * | 
|---|
| 699 | * @see UCharIterator | 
|---|
| 700 | * @stable ICU 2.1 | 
|---|
| 701 | */ | 
|---|
| 702 | U_CAPI void U_EXPORT2 | 
|---|
| 703 | uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); | 
|---|
| 704 |  | 
|---|
| 705 | #endif | 
|---|
| 706 |  | 
|---|
| 707 | U_CDECL_END | 
|---|
| 708 |  | 
|---|
| 709 | #endif | 
|---|
| 710 |  | 
|---|