| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 |  | 
|---|
| 4 | // edits.h | 
|---|
| 5 | // created: 2016dec30 Markus W. Scherer | 
|---|
| 6 |  | 
|---|
| 7 | #ifndef __EDITS_H__ | 
|---|
| 8 | #define __EDITS_H__ | 
|---|
| 9 |  | 
|---|
| 10 | #include "unicode/utypes.h" | 
|---|
| 11 |  | 
|---|
| 12 | #if U_SHOW_CPLUSPLUS_API | 
|---|
| 13 |  | 
|---|
| 14 | #include "unicode/uobject.h" | 
|---|
| 15 |  | 
|---|
| 16 | /** | 
|---|
| 17 | * \file | 
|---|
| 18 | * \brief C++ API: C++ class Edits for low-level string transformations on styled text. | 
|---|
| 19 | */ | 
|---|
| 20 |  | 
|---|
| 21 | U_NAMESPACE_BEGIN | 
|---|
| 22 |  | 
|---|
| 23 | class UnicodeString; | 
|---|
| 24 |  | 
|---|
| 25 | /** | 
|---|
| 26 | * Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions | 
|---|
| 27 | * in linear progression. Does not support moving/reordering of text. | 
|---|
| 28 | * | 
|---|
| 29 | * There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to | 
|---|
| 30 | * instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and | 
|---|
| 31 | * {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity, | 
|---|
| 32 | * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one | 
|---|
| 33 | * mapping between code points in the source and destination strings. | 
|---|
| 34 | * | 
|---|
| 35 | * After all edits have been added, instances of this class should be considered immutable, and an | 
|---|
| 36 | * {@link Edits::Iterator} can be used for queries. | 
|---|
| 37 | * | 
|---|
| 38 | * There are four flavors of Edits::Iterator: | 
|---|
| 39 | * | 
|---|
| 40 | * <ul> | 
|---|
| 41 | * <li>{@link #getFineIterator()} retains full granularity of change edits. | 
|---|
| 42 | * <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling | 
|---|
| 43 | * next() on the iterator, skips over no-change edits (unchanged regions). | 
|---|
| 44 | * <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change | 
|---|
| 45 | * edits are automatically merged during the construction phase.) | 
|---|
| 46 | * <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when | 
|---|
| 47 | * calling next() on the iterator, skips over no-change edits (unchanged regions). | 
|---|
| 48 | * </ul> | 
|---|
| 49 | * | 
|---|
| 50 | * For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the | 
|---|
| 51 | * following fine edits: | 
|---|
| 52 | * <ul> | 
|---|
| 53 | * <li>abc ⇨ abc (no-change) | 
|---|
| 54 | * <li>ß ⇨ ss (change) | 
|---|
| 55 | * <li>D ⇨ d (change) | 
|---|
| 56 | * <li>e ⇨ e (no-change) | 
|---|
| 57 | * <li>F ⇨ f (change) | 
|---|
| 58 | * </ul> | 
|---|
| 59 | * and the following coarse edits (note how adjacent change edits get merged together): | 
|---|
| 60 | * <ul> | 
|---|
| 61 | * <li>abc ⇨ abc (no-change) | 
|---|
| 62 | * <li>ßD ⇨ ssd (change) | 
|---|
| 63 | * <li>e ⇨ e (no-change) | 
|---|
| 64 | * <li>F ⇨ f (change) | 
|---|
| 65 | * </ul> | 
|---|
| 66 | * | 
|---|
| 67 | * The "fine changes" and "coarse changes" iterators will step through only the change edits when their | 
|---|
| 68 | * `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when | 
|---|
| 69 | * their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()` | 
|---|
| 70 | * methods are used to walk through the string. | 
|---|
| 71 | * | 
|---|
| 72 | * For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in | 
|---|
| 73 | * UCharacterCaseTest.java. | 
|---|
| 74 | * | 
|---|
| 75 | * An Edits object tracks a separate UErrorCode, but ICU string transformation functions | 
|---|
| 76 | * (e.g., case mapping functions) merge any such errors into their API's UErrorCode. | 
|---|
| 77 | * | 
|---|
| 78 | * @stable ICU 59 | 
|---|
| 79 | */ | 
|---|
| 80 | class U_COMMON_API Edits final : public UMemory { | 
|---|
| 81 | public: | 
|---|
| 82 | /** | 
|---|
| 83 | * Constructs an empty object. | 
|---|
| 84 | * @stable ICU 59 | 
|---|
| 85 | */ | 
|---|
| 86 | Edits() : | 
|---|
| 87 | array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0), | 
|---|
| 88 | errorCode_(U_ZERO_ERROR) {} | 
|---|
| 89 | /** | 
|---|
| 90 | * Copy constructor. | 
|---|
| 91 | * @param other source edits | 
|---|
| 92 | * @stable ICU 60 | 
|---|
| 93 | */ | 
|---|
| 94 | Edits(const Edits &other) : | 
|---|
| 95 | array(stackArray), capacity(STACK_CAPACITY), length(other.length), | 
|---|
| 96 | delta(other.delta), numChanges(other.numChanges), | 
|---|
| 97 | errorCode_(other.errorCode_) { | 
|---|
| 98 | copyArray(other); | 
|---|
| 99 | } | 
|---|
| 100 | /** | 
|---|
| 101 | * Move constructor, might leave src empty. | 
|---|
| 102 | * This object will have the same contents that the source object had. | 
|---|
| 103 | * @param src source edits | 
|---|
| 104 | * @stable ICU 60 | 
|---|
| 105 | */ | 
|---|
| 106 | Edits(Edits &&src) noexcept : | 
|---|
| 107 | array(stackArray), capacity(STACK_CAPACITY), length(src.length), | 
|---|
| 108 | delta(src.delta), numChanges(src.numChanges), | 
|---|
| 109 | errorCode_(src.errorCode_) { | 
|---|
| 110 | moveArray(src); | 
|---|
| 111 | } | 
|---|
| 112 |  | 
|---|
| 113 | /** | 
|---|
| 114 | * Destructor. | 
|---|
| 115 | * @stable ICU 59 | 
|---|
| 116 | */ | 
|---|
| 117 | ~Edits(); | 
|---|
| 118 |  | 
|---|
| 119 | /** | 
|---|
| 120 | * Assignment operator. | 
|---|
| 121 | * @param other source edits | 
|---|
| 122 | * @return *this | 
|---|
| 123 | * @stable ICU 60 | 
|---|
| 124 | */ | 
|---|
| 125 | Edits &operator=(const Edits &other); | 
|---|
| 126 |  | 
|---|
| 127 | /** | 
|---|
| 128 | * Move assignment operator, might leave src empty. | 
|---|
| 129 | * This object will have the same contents that the source object had. | 
|---|
| 130 | * The behavior is undefined if *this and src are the same object. | 
|---|
| 131 | * @param src source edits | 
|---|
| 132 | * @return *this | 
|---|
| 133 | * @stable ICU 60 | 
|---|
| 134 | */ | 
|---|
| 135 | Edits &operator=(Edits &&src) noexcept; | 
|---|
| 136 |  | 
|---|
| 137 | /** | 
|---|
| 138 | * Resets the data but may not release memory. | 
|---|
| 139 | * @stable ICU 59 | 
|---|
| 140 | */ | 
|---|
| 141 | void reset() noexcept; | 
|---|
| 142 |  | 
|---|
| 143 | /** | 
|---|
| 144 | * Adds a no-change edit: a record for an unchanged segment of text. | 
|---|
| 145 | * Normally called from inside ICU string transformation functions, not user code. | 
|---|
| 146 | * @stable ICU 59 | 
|---|
| 147 | */ | 
|---|
| 148 | void addUnchanged(int32_t unchangedLength); | 
|---|
| 149 | /** | 
|---|
| 150 | * Adds a change edit: a record for a text replacement/insertion/deletion. | 
|---|
| 151 | * Normally called from inside ICU string transformation functions, not user code. | 
|---|
| 152 | * @stable ICU 59 | 
|---|
| 153 | */ | 
|---|
| 154 | void addReplace(int32_t oldLength, int32_t newLength); | 
|---|
| 155 | /** | 
|---|
| 156 | * Sets the UErrorCode if an error occurred while recording edits. | 
|---|
| 157 | * Preserves older error codes in the outErrorCode. | 
|---|
| 158 | * Normally called from inside ICU string transformation functions, not user code. | 
|---|
| 159 | * @param outErrorCode Set to an error code if it does not contain one already | 
|---|
| 160 | *                  and an error occurred while recording edits. | 
|---|
| 161 | *                  Otherwise unchanged. | 
|---|
| 162 | * @return true if U_FAILURE(outErrorCode) | 
|---|
| 163 | * @stable ICU 59 | 
|---|
| 164 | */ | 
|---|
| 165 | UBool copyErrorTo(UErrorCode &outErrorCode) const; | 
|---|
| 166 |  | 
|---|
| 167 | /** | 
|---|
| 168 | * How much longer is the new text compared with the old text? | 
|---|
| 169 | * @return new length minus old length | 
|---|
| 170 | * @stable ICU 59 | 
|---|
| 171 | */ | 
|---|
| 172 | int32_t lengthDelta() const { return delta; } | 
|---|
| 173 | /** | 
|---|
| 174 | * @return true if there are any change edits | 
|---|
| 175 | * @stable ICU 59 | 
|---|
| 176 | */ | 
|---|
| 177 | UBool hasChanges() const { return numChanges != 0; } | 
|---|
| 178 |  | 
|---|
| 179 | /** | 
|---|
| 180 | * @return the number of change edits | 
|---|
| 181 | * @stable ICU 60 | 
|---|
| 182 | */ | 
|---|
| 183 | int32_t numberOfChanges() const { return numChanges; } | 
|---|
| 184 |  | 
|---|
| 185 | /** | 
|---|
| 186 | * Access to the list of edits. | 
|---|
| 187 | * | 
|---|
| 188 | * At any moment in time, an instance of this class points to a single edit: a "window" into a span | 
|---|
| 189 | * of the source string and the corresponding span of the destination string. The source string span | 
|---|
| 190 | * starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string | 
|---|
| 191 | * span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars. | 
|---|
| 192 | * | 
|---|
| 193 | * The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`, | 
|---|
| 194 | * and `findDestinationIndex(int32_t, UErrorCode &)` methods. | 
|---|
| 195 | * Calling any of these methods mutates the iterator to make it point to the corresponding edit. | 
|---|
| 196 | * | 
|---|
| 197 | * For more information, see the documentation for {@link Edits}. | 
|---|
| 198 | * | 
|---|
| 199 | * @see getCoarseIterator | 
|---|
| 200 | * @see getFineIterator | 
|---|
| 201 | * @stable ICU 59 | 
|---|
| 202 | */ | 
|---|
| 203 | struct U_COMMON_API Iterator final : public UMemory { | 
|---|
| 204 | /** | 
|---|
| 205 | * Default constructor, empty iterator. | 
|---|
| 206 | * @stable ICU 60 | 
|---|
| 207 | */ | 
|---|
| 208 | Iterator() : | 
|---|
| 209 | array(nullptr), index(0), length(0), | 
|---|
| 210 | remaining(0), onlyChanges_(false), coarse(false), | 
|---|
| 211 | dir(0), changed(false), oldLength_(0), newLength_(0), | 
|---|
| 212 | srcIndex(0), replIndex(0), destIndex(0) {} | 
|---|
| 213 | /** | 
|---|
| 214 | * Copy constructor. | 
|---|
| 215 | * @stable ICU 59 | 
|---|
| 216 | */ | 
|---|
| 217 | Iterator(const Iterator &other) = default; | 
|---|
| 218 | /** | 
|---|
| 219 | * Assignment operator. | 
|---|
| 220 | * @stable ICU 59 | 
|---|
| 221 | */ | 
|---|
| 222 | Iterator &operator=(const Iterator &other) = default; | 
|---|
| 223 |  | 
|---|
| 224 | /** | 
|---|
| 225 | * Advances the iterator to the next edit. | 
|---|
| 226 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, | 
|---|
| 227 | *                  or else the function returns immediately. Check for U_FAILURE() | 
|---|
| 228 | *                  on output or use with function chaining. (See User Guide for details.) | 
|---|
| 229 | * @return true if there is another edit | 
|---|
| 230 | * @stable ICU 59 | 
|---|
| 231 | */ | 
|---|
| 232 | UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); } | 
|---|
| 233 |  | 
|---|
| 234 | /** | 
|---|
| 235 | * Moves the iterator to the edit that contains the source index. | 
|---|
| 236 | * The source index may be found in a no-change edit | 
|---|
| 237 | * even if normal iteration would skip no-change edits. | 
|---|
| 238 | * Normal iteration can continue from a found edit. | 
|---|
| 239 | * | 
|---|
| 240 | * The iterator state before this search logically does not matter. | 
|---|
| 241 | * (It may affect the performance of the search.) | 
|---|
| 242 | * | 
|---|
| 243 | * The iterator state after this search is undefined | 
|---|
| 244 | * if the source index is out of bounds for the source string. | 
|---|
| 245 | * | 
|---|
| 246 | * @param i source index | 
|---|
| 247 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, | 
|---|
| 248 | *                  or else the function returns immediately. Check for U_FAILURE() | 
|---|
| 249 | *                  on output or use with function chaining. (See User Guide for details.) | 
|---|
| 250 | * @return true if the edit for the source index was found | 
|---|
| 251 | * @stable ICU 59 | 
|---|
| 252 | */ | 
|---|
| 253 | UBool findSourceIndex(int32_t i, UErrorCode &errorCode) { | 
|---|
| 254 | return findIndex(i, true, errorCode) == 0; | 
|---|
| 255 | } | 
|---|
| 256 |  | 
|---|
| 257 | /** | 
|---|
| 258 | * Moves the iterator to the edit that contains the destination index. | 
|---|
| 259 | * The destination index may be found in a no-change edit | 
|---|
| 260 | * even if normal iteration would skip no-change edits. | 
|---|
| 261 | * Normal iteration can continue from a found edit. | 
|---|
| 262 | * | 
|---|
| 263 | * The iterator state before this search logically does not matter. | 
|---|
| 264 | * (It may affect the performance of the search.) | 
|---|
| 265 | * | 
|---|
| 266 | * The iterator state after this search is undefined | 
|---|
| 267 | * if the source index is out of bounds for the source string. | 
|---|
| 268 | * | 
|---|
| 269 | * @param i destination index | 
|---|
| 270 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, | 
|---|
| 271 | *                  or else the function returns immediately. Check for U_FAILURE() | 
|---|
| 272 | *                  on output or use with function chaining. (See User Guide for details.) | 
|---|
| 273 | * @return true if the edit for the destination index was found | 
|---|
| 274 | * @stable ICU 60 | 
|---|
| 275 | */ | 
|---|
| 276 | UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) { | 
|---|
| 277 | return findIndex(i, false, errorCode) == 0; | 
|---|
| 278 | } | 
|---|
| 279 |  | 
|---|
| 280 | /** | 
|---|
| 281 | * Computes the destination index corresponding to the given source index. | 
|---|
| 282 | * If the source index is inside a change edit (not at its start), | 
|---|
| 283 | * then the destination index at the end of that edit is returned, | 
|---|
| 284 | * since there is no information about index mapping inside a change edit. | 
|---|
| 285 | * | 
|---|
| 286 | * (This means that indexes to the start and middle of an edit, | 
|---|
| 287 | * for example around a grapheme cluster, are mapped to indexes | 
|---|
| 288 | * encompassing the entire edit. | 
|---|
| 289 | * The alternative, mapping an interior index to the start, | 
|---|
| 290 | * would map such an interval to an empty one.) | 
|---|
| 291 | * | 
|---|
| 292 | * This operation will usually but not always modify this object. | 
|---|
| 293 | * The iterator state after this search is undefined. | 
|---|
| 294 | * | 
|---|
| 295 | * @param i source index | 
|---|
| 296 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, | 
|---|
| 297 | *                  or else the function returns immediately. Check for U_FAILURE() | 
|---|
| 298 | *                  on output or use with function chaining. (See User Guide for details.) | 
|---|
| 299 | * @return destination index; undefined if i is not 0..string length | 
|---|
| 300 | * @stable ICU 60 | 
|---|
| 301 | */ | 
|---|
| 302 | int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode); | 
|---|
| 303 |  | 
|---|
| 304 | /** | 
|---|
| 305 | * Computes the source index corresponding to the given destination index. | 
|---|
| 306 | * If the destination index is inside a change edit (not at its start), | 
|---|
| 307 | * then the source index at the end of that edit is returned, | 
|---|
| 308 | * since there is no information about index mapping inside a change edit. | 
|---|
| 309 | * | 
|---|
| 310 | * (This means that indexes to the start and middle of an edit, | 
|---|
| 311 | * for example around a grapheme cluster, are mapped to indexes | 
|---|
| 312 | * encompassing the entire edit. | 
|---|
| 313 | * The alternative, mapping an interior index to the start, | 
|---|
| 314 | * would map such an interval to an empty one.) | 
|---|
| 315 | * | 
|---|
| 316 | * This operation will usually but not always modify this object. | 
|---|
| 317 | * The iterator state after this search is undefined. | 
|---|
| 318 | * | 
|---|
| 319 | * @param i destination index | 
|---|
| 320 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, | 
|---|
| 321 | *                  or else the function returns immediately. Check for U_FAILURE() | 
|---|
| 322 | *                  on output or use with function chaining. (See User Guide for details.) | 
|---|
| 323 | * @return source index; undefined if i is not 0..string length | 
|---|
| 324 | * @stable ICU 60 | 
|---|
| 325 | */ | 
|---|
| 326 | int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode); | 
|---|
| 327 |  | 
|---|
| 328 | /** | 
|---|
| 329 | * Returns whether the edit currently represented by the iterator is a change edit. | 
|---|
| 330 | * | 
|---|
| 331 | * @return true if this edit replaces oldLength() units with newLength() different ones. | 
|---|
| 332 | *         false if oldLength units remain unchanged. | 
|---|
| 333 | * @stable ICU 59 | 
|---|
| 334 | */ | 
|---|
| 335 | UBool hasChange() const { return changed; } | 
|---|
| 336 |  | 
|---|
| 337 | /** | 
|---|
| 338 | * The length of the current span in the source string, which starts at {@link #sourceIndex}. | 
|---|
| 339 | * | 
|---|
| 340 | * @return the number of units in the original string which are replaced or remain unchanged. | 
|---|
| 341 | * @stable ICU 59 | 
|---|
| 342 | */ | 
|---|
| 343 | int32_t oldLength() const { return oldLength_; } | 
|---|
| 344 |  | 
|---|
| 345 | /** | 
|---|
| 346 | * The length of the current span in the destination string, which starts at | 
|---|
| 347 | * {@link #destinationIndex}, or in the replacement string, which starts at | 
|---|
| 348 | * {@link #replacementIndex}. | 
|---|
| 349 | * | 
|---|
| 350 | * @return the number of units in the modified string, if hasChange() is true. | 
|---|
| 351 | *         Same as oldLength if hasChange() is false. | 
|---|
| 352 | * @stable ICU 59 | 
|---|
| 353 | */ | 
|---|
| 354 | int32_t newLength() const { return newLength_; } | 
|---|
| 355 |  | 
|---|
| 356 | /** | 
|---|
| 357 | * The start index of the current span in the source string; the span has length | 
|---|
| 358 | * {@link #oldLength}. | 
|---|
| 359 | * | 
|---|
| 360 | * @return the current index into the source string | 
|---|
| 361 | * @stable ICU 59 | 
|---|
| 362 | */ | 
|---|
| 363 | int32_t sourceIndex() const { return srcIndex; } | 
|---|
| 364 |  | 
|---|
| 365 | /** | 
|---|
| 366 | * The start index of the current span in the replacement string; the span has length | 
|---|
| 367 | * {@link #newLength}. Well-defined only if the current edit is a change edit. | 
|---|
| 368 | * | 
|---|
| 369 | * The *replacement string* is the concatenation of all substrings of the destination | 
|---|
| 370 | * string corresponding to change edits. | 
|---|
| 371 | * | 
|---|
| 372 | * This method is intended to be used together with operations that write only replacement | 
|---|
| 373 | * characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option). | 
|---|
| 374 | * The source string can then be modified in-place. | 
|---|
| 375 | * | 
|---|
| 376 | * @return the current index into the replacement-characters-only string, | 
|---|
| 377 | *         not counting unchanged spans | 
|---|
| 378 | * @stable ICU 59 | 
|---|
| 379 | */ | 
|---|
| 380 | int32_t replacementIndex() const { | 
|---|
| 381 | // TODO: Throw an exception if we aren't in a change edit? | 
|---|
| 382 | return replIndex; | 
|---|
| 383 | } | 
|---|
| 384 |  | 
|---|
| 385 | /** | 
|---|
| 386 | * The start index of the current span in the destination string; the span has length | 
|---|
| 387 | * {@link #newLength}. | 
|---|
| 388 | * | 
|---|
| 389 | * @return the current index into the full destination string | 
|---|
| 390 | * @stable ICU 59 | 
|---|
| 391 | */ | 
|---|
| 392 | int32_t destinationIndex() const { return destIndex; } | 
|---|
| 393 |  | 
|---|
| 394 | #ifndef U_HIDE_INTERNAL_API | 
|---|
| 395 | /** | 
|---|
| 396 | * A string representation of the current edit represented by the iterator for debugging. You | 
|---|
| 397 | * should not depend on the contents of the return string. | 
|---|
| 398 | * @internal | 
|---|
| 399 | */ | 
|---|
| 400 | UnicodeString& toString(UnicodeString& appendTo) const; | 
|---|
| 401 | #endif  // U_HIDE_INTERNAL_API | 
|---|
| 402 |  | 
|---|
| 403 | private: | 
|---|
| 404 | friend class Edits; | 
|---|
| 405 |  | 
|---|
| 406 | Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs); | 
|---|
| 407 |  | 
|---|
| 408 | int32_t readLength(int32_t head); | 
|---|
| 409 | void updateNextIndexes(); | 
|---|
| 410 | void updatePreviousIndexes(); | 
|---|
| 411 | UBool noNext(); | 
|---|
| 412 | UBool next(UBool onlyChanges, UErrorCode &errorCode); | 
|---|
| 413 | UBool previous(UErrorCode &errorCode); | 
|---|
| 414 | /** @return -1: error or i<0; 0: found; 1: i>=string length */ | 
|---|
| 415 | int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode); | 
|---|
| 416 |  | 
|---|
| 417 | const uint16_t *array; | 
|---|
| 418 | int32_t index, length; | 
|---|
| 419 | // 0 if we are not within compressed equal-length changes. | 
|---|
| 420 | // Otherwise the number of remaining changes, including the current one. | 
|---|
| 421 | int32_t remaining; | 
|---|
| 422 | UBool onlyChanges_, coarse; | 
|---|
| 423 |  | 
|---|
| 424 | int8_t dir;  // iteration direction: back(<0), initial(0), forward(>0) | 
|---|
| 425 | UBool changed; | 
|---|
| 426 | int32_t oldLength_, newLength_; | 
|---|
| 427 | int32_t srcIndex, replIndex, destIndex; | 
|---|
| 428 | }; | 
|---|
| 429 |  | 
|---|
| 430 | /** | 
|---|
| 431 | * Returns an Iterator for coarse-grained change edits | 
|---|
| 432 | * (adjacent change edits are treated as one). | 
|---|
| 433 | * Can be used to perform simple string updates. | 
|---|
| 434 | * Skips no-change edits. | 
|---|
| 435 | * @return an Iterator that merges adjacent changes. | 
|---|
| 436 | * @stable ICU 59 | 
|---|
| 437 | */ | 
|---|
| 438 | Iterator getCoarseChangesIterator() const { | 
|---|
| 439 | return Iterator(array, length, true, true); | 
|---|
| 440 | } | 
|---|
| 441 |  | 
|---|
| 442 | /** | 
|---|
| 443 | * Returns an Iterator for coarse-grained change and no-change edits | 
|---|
| 444 | * (adjacent change edits are treated as one). | 
|---|
| 445 | * Can be used to perform simple string updates. | 
|---|
| 446 | * Adjacent change edits are treated as one edit. | 
|---|
| 447 | * @return an Iterator that merges adjacent changes. | 
|---|
| 448 | * @stable ICU 59 | 
|---|
| 449 | */ | 
|---|
| 450 | Iterator getCoarseIterator() const { | 
|---|
| 451 | return Iterator(array, length, false, true); | 
|---|
| 452 | } | 
|---|
| 453 |  | 
|---|
| 454 | /** | 
|---|
| 455 | * Returns an Iterator for fine-grained change edits | 
|---|
| 456 | * (full granularity of change edits is retained). | 
|---|
| 457 | * Can be used for modifying styled text. | 
|---|
| 458 | * Skips no-change edits. | 
|---|
| 459 | * @return an Iterator that separates adjacent changes. | 
|---|
| 460 | * @stable ICU 59 | 
|---|
| 461 | */ | 
|---|
| 462 | Iterator getFineChangesIterator() const { | 
|---|
| 463 | return Iterator(array, length, true, false); | 
|---|
| 464 | } | 
|---|
| 465 |  | 
|---|
| 466 | /** | 
|---|
| 467 | * Returns an Iterator for fine-grained change and no-change edits | 
|---|
| 468 | * (full granularity of change edits is retained). | 
|---|
| 469 | * Can be used for modifying styled text. | 
|---|
| 470 | * @return an Iterator that separates adjacent changes. | 
|---|
| 471 | * @stable ICU 59 | 
|---|
| 472 | */ | 
|---|
| 473 | Iterator getFineIterator() const { | 
|---|
| 474 | return Iterator(array, length, false, false); | 
|---|
| 475 | } | 
|---|
| 476 |  | 
|---|
| 477 | /** | 
|---|
| 478 | * Merges the two input Edits and appends the result to this object. | 
|---|
| 479 | * | 
|---|
| 480 | * Consider two string transformations (for example, normalization and case mapping) | 
|---|
| 481 | * where each records Edits in addition to writing an output string.<br> | 
|---|
| 482 | * Edits ab reflect how substrings of input string a | 
|---|
| 483 | * map to substrings of intermediate string b.<br> | 
|---|
| 484 | * Edits bc reflect how substrings of intermediate string b | 
|---|
| 485 | * map to substrings of output string c.<br> | 
|---|
| 486 | * This function merges ab and bc such that the additional edits | 
|---|
| 487 | * recorded in this object reflect how substrings of input string a | 
|---|
| 488 | * map to substrings of output string c. | 
|---|
| 489 | * | 
|---|
| 490 | * If unrelated Edits are passed in where the output string of the first | 
|---|
| 491 | * has a different length than the input string of the second, | 
|---|
| 492 | * then a U_ILLEGAL_ARGUMENT_ERROR is reported. | 
|---|
| 493 | * | 
|---|
| 494 | * @param ab reflects how substrings of input string a | 
|---|
| 495 | *     map to substrings of intermediate string b. | 
|---|
| 496 | * @param bc reflects how substrings of intermediate string b | 
|---|
| 497 | *     map to substrings of output string c. | 
|---|
| 498 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, | 
|---|
| 499 | *                  or else the function returns immediately. Check for U_FAILURE() | 
|---|
| 500 | *                  on output or use with function chaining. (See User Guide for details.) | 
|---|
| 501 | * @return *this, with the merged edits appended | 
|---|
| 502 | * @stable ICU 60 | 
|---|
| 503 | */ | 
|---|
| 504 | Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode); | 
|---|
| 505 |  | 
|---|
| 506 | private: | 
|---|
| 507 | void releaseArray() noexcept; | 
|---|
| 508 | Edits ©Array(const Edits &other); | 
|---|
| 509 | Edits &moveArray(Edits &src) noexcept; | 
|---|
| 510 |  | 
|---|
| 511 | void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; } | 
|---|
| 512 | int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; } | 
|---|
| 513 |  | 
|---|
| 514 | void append(int32_t r); | 
|---|
| 515 | UBool growArray(); | 
|---|
| 516 |  | 
|---|
| 517 | static const int32_t STACK_CAPACITY = 100; | 
|---|
| 518 | uint16_t *array; | 
|---|
| 519 | int32_t capacity; | 
|---|
| 520 | int32_t length; | 
|---|
| 521 | int32_t delta; | 
|---|
| 522 | int32_t numChanges; | 
|---|
| 523 | UErrorCode errorCode_; | 
|---|
| 524 | uint16_t stackArray[STACK_CAPACITY]; | 
|---|
| 525 | }; | 
|---|
| 526 |  | 
|---|
| 527 | U_NAMESPACE_END | 
|---|
| 528 |  | 
|---|
| 529 | #endif /* U_SHOW_CPLUSPLUS_API */ | 
|---|
| 530 |  | 
|---|
| 531 | #endif  // __EDITS_H__ | 
|---|
| 532 |  | 
|---|