| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ************************************************************************* | 
|---|
| 5 | * COPYRIGHT: | 
|---|
| 6 | * Copyright (c) 1996-2012, International Business Machines Corporation and | 
|---|
| 7 | * others. All Rights Reserved. | 
|---|
| 8 | ************************************************************************* | 
|---|
| 9 | */ | 
|---|
| 10 |  | 
|---|
| 11 | #include "unicode/utypes.h" | 
|---|
| 12 |  | 
|---|
| 13 | #if !UCONFIG_NO_NORMALIZATION | 
|---|
| 14 |  | 
|---|
| 15 | #include "unicode/uniset.h" | 
|---|
| 16 | #include "unicode/unistr.h" | 
|---|
| 17 | #include "unicode/chariter.h" | 
|---|
| 18 | #include "unicode/schriter.h" | 
|---|
| 19 | #include "unicode/uchriter.h" | 
|---|
| 20 | #include "unicode/normlzr.h" | 
|---|
| 21 | #include "unicode/utf16.h" | 
|---|
| 22 | #include "cmemory.h" | 
|---|
| 23 | #include "normalizer2impl.h" | 
|---|
| 24 | #include "uprops.h"  // for uniset_getUnicode32Instance() | 
|---|
| 25 |  | 
|---|
| 26 | #if defined(move32) | 
|---|
| 27 | // System can define move32 intrinsics, but the char iters define move32 method | 
|---|
| 28 | // using same undef trick in headers, so undef here to re-enable the method. | 
|---|
| 29 | #undef move32 | 
|---|
| 30 | #endif | 
|---|
| 31 |  | 
|---|
| 32 | U_NAMESPACE_BEGIN | 
|---|
| 33 |  | 
|---|
| 34 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer) | 
|---|
| 35 |  | 
|---|
| 36 | //------------------------------------------------------------------------- | 
|---|
| 37 | // Constructors and other boilerplate | 
|---|
| 38 | //------------------------------------------------------------------------- | 
|---|
| 39 |  | 
|---|
| 40 | Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) : | 
|---|
| 41 | UObject(), fFilteredNorm2(nullptr), fNorm2(nullptr), fUMode(mode), fOptions(0), | 
|---|
| 42 | text(new StringCharacterIterator(str)), | 
|---|
| 43 | currentIndex(0), nextIndex(0), | 
|---|
| 44 | buffer(), bufferPos(0) | 
|---|
| 45 | { | 
|---|
| 46 | init(); | 
|---|
| 47 | } | 
|---|
| 48 |  | 
|---|
| 49 | Normalizer::Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode) : | 
|---|
| 50 | UObject(), fFilteredNorm2(nullptr), fNorm2(nullptr), fUMode(mode), fOptions(0), | 
|---|
| 51 | text(new UCharCharacterIterator(str, length)), | 
|---|
| 52 | currentIndex(0), nextIndex(0), | 
|---|
| 53 | buffer(), bufferPos(0) | 
|---|
| 54 | { | 
|---|
| 55 | init(); | 
|---|
| 56 | } | 
|---|
| 57 |  | 
|---|
| 58 | Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) : | 
|---|
| 59 | UObject(), fFilteredNorm2(nullptr), fNorm2(nullptr), fUMode(mode), fOptions(0), | 
|---|
| 60 | text(iter.clone()), | 
|---|
| 61 | currentIndex(0), nextIndex(0), | 
|---|
| 62 | buffer(), bufferPos(0) | 
|---|
| 63 | { | 
|---|
| 64 | init(); | 
|---|
| 65 | } | 
|---|
| 66 |  | 
|---|
| 67 | Normalizer::Normalizer(const Normalizer ©) : | 
|---|
| 68 | UObject(copy), fFilteredNorm2(nullptr), fNorm2(nullptr), fUMode(copy.fUMode), fOptions(copy.fOptions), | 
|---|
| 69 | text(copy.text->clone()), | 
|---|
| 70 | currentIndex(copy.currentIndex), nextIndex(copy.nextIndex), | 
|---|
| 71 | buffer(copy.buffer), bufferPos(copy.bufferPos) | 
|---|
| 72 | { | 
|---|
| 73 | init(); | 
|---|
| 74 | } | 
|---|
| 75 |  | 
|---|
| 76 | void | 
|---|
| 77 | Normalizer::init() { | 
|---|
| 78 | UErrorCode errorCode=U_ZERO_ERROR; | 
|---|
| 79 | fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode); | 
|---|
| 80 | if(fOptions&UNORM_UNICODE_3_2) { | 
|---|
| 81 | delete fFilteredNorm2; | 
|---|
| 82 | fNorm2=fFilteredNorm2= | 
|---|
| 83 | new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode)); | 
|---|
| 84 | } | 
|---|
| 85 | if(U_FAILURE(errorCode)) { | 
|---|
| 86 | errorCode=U_ZERO_ERROR; | 
|---|
| 87 | fNorm2=Normalizer2Factory::getNoopInstance(errorCode); | 
|---|
| 88 | } | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | Normalizer::~Normalizer() | 
|---|
| 92 | { | 
|---|
| 93 | delete fFilteredNorm2; | 
|---|
| 94 | delete text; | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | Normalizer* | 
|---|
| 98 | Normalizer::clone() const | 
|---|
| 99 | { | 
|---|
| 100 | return new Normalizer(*this); | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|
| 103 | /** | 
|---|
| 104 | * Generates a hash code for this iterator. | 
|---|
| 105 | */ | 
|---|
| 106 | int32_t Normalizer::hashCode() const | 
|---|
| 107 | { | 
|---|
| 108 | return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex; | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 | bool Normalizer::operator==(const Normalizer& that) const | 
|---|
| 112 | { | 
|---|
| 113 | return | 
|---|
| 114 | this==&that || | 
|---|
| 115 | (fUMode==that.fUMode && | 
|---|
| 116 | fOptions==that.fOptions && | 
|---|
| 117 | *text==*that.text && | 
|---|
| 118 | buffer==that.buffer && | 
|---|
| 119 | bufferPos==that.bufferPos && | 
|---|
| 120 | nextIndex==that.nextIndex); | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | //------------------------------------------------------------------------- | 
|---|
| 124 | // Static utility methods | 
|---|
| 125 | //------------------------------------------------------------------------- | 
|---|
| 126 |  | 
|---|
| 127 | void U_EXPORT2 | 
|---|
| 128 | Normalizer::normalize(const UnicodeString& source, | 
|---|
| 129 | UNormalizationMode mode, int32_t options, | 
|---|
| 130 | UnicodeString& result, | 
|---|
| 131 | UErrorCode &status) { | 
|---|
| 132 | if(source.isBogus() || U_FAILURE(status)) { | 
|---|
| 133 | result.setToBogus(); | 
|---|
| 134 | if(U_SUCCESS(status)) { | 
|---|
| 135 | status=U_ILLEGAL_ARGUMENT_ERROR; | 
|---|
| 136 | } | 
|---|
| 137 | } else { | 
|---|
| 138 | UnicodeString localDest; | 
|---|
| 139 | UnicodeString *dest; | 
|---|
| 140 |  | 
|---|
| 141 | if(&source!=&result) { | 
|---|
| 142 | dest=&result; | 
|---|
| 143 | } else { | 
|---|
| 144 | // the source and result strings are the same object, use a temporary one | 
|---|
| 145 | dest=&localDest; | 
|---|
| 146 | } | 
|---|
| 147 | const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); | 
|---|
| 148 | if(U_SUCCESS(status)) { | 
|---|
| 149 | if(options&UNORM_UNICODE_3_2) { | 
|---|
| 150 | FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). | 
|---|
| 151 | normalize(source, *dest, status); | 
|---|
| 152 | } else { | 
|---|
| 153 | n2->normalize(source, *dest, status); | 
|---|
| 154 | } | 
|---|
| 155 | } | 
|---|
| 156 | if(dest==&localDest && U_SUCCESS(status)) { | 
|---|
| 157 | result=*dest; | 
|---|
| 158 | } | 
|---|
| 159 | } | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | void U_EXPORT2 | 
|---|
| 163 | Normalizer::compose(const UnicodeString& source, | 
|---|
| 164 | UBool compat, int32_t options, | 
|---|
| 165 | UnicodeString& result, | 
|---|
| 166 | UErrorCode &status) { | 
|---|
| 167 | normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status); | 
|---|
| 168 | } | 
|---|
| 169 |  | 
|---|
| 170 | void U_EXPORT2 | 
|---|
| 171 | Normalizer::decompose(const UnicodeString& source, | 
|---|
| 172 | UBool compat, int32_t options, | 
|---|
| 173 | UnicodeString& result, | 
|---|
| 174 | UErrorCode &status) { | 
|---|
| 175 | normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status); | 
|---|
| 176 | } | 
|---|
| 177 |  | 
|---|
| 178 | UNormalizationCheckResult | 
|---|
| 179 | Normalizer::quickCheck(const UnicodeString& source, | 
|---|
| 180 | UNormalizationMode mode, int32_t options, | 
|---|
| 181 | UErrorCode &status) { | 
|---|
| 182 | const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); | 
|---|
| 183 | if(U_SUCCESS(status)) { | 
|---|
| 184 | if(options&UNORM_UNICODE_3_2) { | 
|---|
| 185 | return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). | 
|---|
| 186 | quickCheck(source, status); | 
|---|
| 187 | } else { | 
|---|
| 188 | return n2->quickCheck(source, status); | 
|---|
| 189 | } | 
|---|
| 190 | } else { | 
|---|
| 191 | return UNORM_MAYBE; | 
|---|
| 192 | } | 
|---|
| 193 | } | 
|---|
| 194 |  | 
|---|
| 195 | UBool | 
|---|
| 196 | Normalizer::isNormalized(const UnicodeString& source, | 
|---|
| 197 | UNormalizationMode mode, int32_t options, | 
|---|
| 198 | UErrorCode &status) { | 
|---|
| 199 | const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); | 
|---|
| 200 | if(U_SUCCESS(status)) { | 
|---|
| 201 | if(options&UNORM_UNICODE_3_2) { | 
|---|
| 202 | return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). | 
|---|
| 203 | isNormalized(source, status); | 
|---|
| 204 | } else { | 
|---|
| 205 | return n2->isNormalized(source, status); | 
|---|
| 206 | } | 
|---|
| 207 | } else { | 
|---|
| 208 | return false; | 
|---|
| 209 | } | 
|---|
| 210 | } | 
|---|
| 211 |  | 
|---|
| 212 | UnicodeString & U_EXPORT2 | 
|---|
| 213 | Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right, | 
|---|
| 214 | UnicodeString &result, | 
|---|
| 215 | UNormalizationMode mode, int32_t options, | 
|---|
| 216 | UErrorCode &errorCode) { | 
|---|
| 217 | if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) { | 
|---|
| 218 | result.setToBogus(); | 
|---|
| 219 | if(U_SUCCESS(errorCode)) { | 
|---|
| 220 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
|---|
| 221 | } | 
|---|
| 222 | } else { | 
|---|
| 223 | UnicodeString localDest; | 
|---|
| 224 | UnicodeString *dest; | 
|---|
| 225 |  | 
|---|
| 226 | if(&right!=&result) { | 
|---|
| 227 | dest=&result; | 
|---|
| 228 | } else { | 
|---|
| 229 | // the right and result strings are the same object, use a temporary one | 
|---|
| 230 | dest=&localDest; | 
|---|
| 231 | } | 
|---|
| 232 | *dest=left; | 
|---|
| 233 | const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode); | 
|---|
| 234 | if(U_SUCCESS(errorCode)) { | 
|---|
| 235 | if(options&UNORM_UNICODE_3_2) { | 
|---|
| 236 | FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)). | 
|---|
| 237 | append(*dest, right, errorCode); | 
|---|
| 238 | } else { | 
|---|
| 239 | n2->append(*dest, right, errorCode); | 
|---|
| 240 | } | 
|---|
| 241 | } | 
|---|
| 242 | if(dest==&localDest && U_SUCCESS(errorCode)) { | 
|---|
| 243 | result=*dest; | 
|---|
| 244 | } | 
|---|
| 245 | } | 
|---|
| 246 | return result; | 
|---|
| 247 | } | 
|---|
| 248 |  | 
|---|
| 249 | //------------------------------------------------------------------------- | 
|---|
| 250 | // Iteration API | 
|---|
| 251 | //------------------------------------------------------------------------- | 
|---|
| 252 |  | 
|---|
| 253 | /** | 
|---|
| 254 | * Return the current character in the normalized text. | 
|---|
| 255 | */ | 
|---|
| 256 | UChar32 Normalizer::current() { | 
|---|
| 257 | if(bufferPos<buffer.length() || nextNormalize()) { | 
|---|
| 258 | return buffer.char32At(bufferPos); | 
|---|
| 259 | } else { | 
|---|
| 260 | return DONE; | 
|---|
| 261 | } | 
|---|
| 262 | } | 
|---|
| 263 |  | 
|---|
| 264 | /** | 
|---|
| 265 | * Return the next character in the normalized text and advance | 
|---|
| 266 | * the iteration position by one.  If the end | 
|---|
| 267 | * of the text has already been reached, {@link #DONE} is returned. | 
|---|
| 268 | */ | 
|---|
| 269 | UChar32 Normalizer::next() { | 
|---|
| 270 | if(bufferPos<buffer.length() ||  nextNormalize()) { | 
|---|
| 271 | UChar32 c=buffer.char32At(bufferPos); | 
|---|
| 272 | bufferPos+=U16_LENGTH(c); | 
|---|
| 273 | return c; | 
|---|
| 274 | } else { | 
|---|
| 275 | return DONE; | 
|---|
| 276 | } | 
|---|
| 277 | } | 
|---|
| 278 |  | 
|---|
| 279 | /** | 
|---|
| 280 | * Return the previous character in the normalized text and decrement | 
|---|
| 281 | * the iteration position by one.  If the beginning | 
|---|
| 282 | * of the text has already been reached, {@link #DONE} is returned. | 
|---|
| 283 | */ | 
|---|
| 284 | UChar32 Normalizer::previous() { | 
|---|
| 285 | if(bufferPos>0 || previousNormalize()) { | 
|---|
| 286 | UChar32 c=buffer.char32At(bufferPos-1); | 
|---|
| 287 | bufferPos-=U16_LENGTH(c); | 
|---|
| 288 | return c; | 
|---|
| 289 | } else { | 
|---|
| 290 | return DONE; | 
|---|
| 291 | } | 
|---|
| 292 | } | 
|---|
| 293 |  | 
|---|
| 294 | void Normalizer::reset() { | 
|---|
| 295 | currentIndex=nextIndex=text->setToStart(); | 
|---|
| 296 | clearBuffer(); | 
|---|
| 297 | } | 
|---|
| 298 |  | 
|---|
| 299 | void | 
|---|
| 300 | Normalizer::setIndexOnly(int32_t index) { | 
|---|
| 301 | text->setIndex(index);  // pins index | 
|---|
| 302 | currentIndex=nextIndex=text->getIndex(); | 
|---|
| 303 | clearBuffer(); | 
|---|
| 304 | } | 
|---|
| 305 |  | 
|---|
| 306 | /** | 
|---|
| 307 | * Return the first character in the normalized text.  This resets | 
|---|
| 308 | * the <tt>Normalizer's</tt> position to the beginning of the text. | 
|---|
| 309 | */ | 
|---|
| 310 | UChar32 Normalizer::first() { | 
|---|
| 311 | reset(); | 
|---|
| 312 | return next(); | 
|---|
| 313 | } | 
|---|
| 314 |  | 
|---|
| 315 | /** | 
|---|
| 316 | * Return the last character in the normalized text.  This resets | 
|---|
| 317 | * the <tt>Normalizer's</tt> position to be just before the | 
|---|
| 318 | * the input text corresponding to that normalized character. | 
|---|
| 319 | */ | 
|---|
| 320 | UChar32 Normalizer::last() { | 
|---|
| 321 | currentIndex=nextIndex=text->setToEnd(); | 
|---|
| 322 | clearBuffer(); | 
|---|
| 323 | return previous(); | 
|---|
| 324 | } | 
|---|
| 325 |  | 
|---|
| 326 | /** | 
|---|
| 327 | * Retrieve the current iteration position in the input text that is | 
|---|
| 328 | * being normalized.  This method is useful in applications such as | 
|---|
| 329 | * searching, where you need to be able to determine the position in | 
|---|
| 330 | * the input text that corresponds to a given normalized output character. | 
|---|
| 331 | * <p> | 
|---|
| 332 | * <b>Note:</b> This method sets the position in the <em>input</em>, while | 
|---|
| 333 | * {@link #next} and {@link #previous} iterate through characters in the | 
|---|
| 334 | * <em>output</em>.  This means that there is not necessarily a one-to-one | 
|---|
| 335 | * correspondence between characters returned by <tt>next</tt> and | 
|---|
| 336 | * <tt>previous</tt> and the indices passed to and returned from | 
|---|
| 337 | * <tt>setIndex</tt> and {@link #getIndex}. | 
|---|
| 338 | * | 
|---|
| 339 | */ | 
|---|
| 340 | int32_t Normalizer::getIndex() const { | 
|---|
| 341 | if(bufferPos<buffer.length()) { | 
|---|
| 342 | return currentIndex; | 
|---|
| 343 | } else { | 
|---|
| 344 | return nextIndex; | 
|---|
| 345 | } | 
|---|
| 346 | } | 
|---|
| 347 |  | 
|---|
| 348 | /** | 
|---|
| 349 | * Retrieve the index of the start of the input text.  This is the begin index | 
|---|
| 350 | * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt> | 
|---|
| 351 | * over which this <tt>Normalizer</tt> is iterating | 
|---|
| 352 | */ | 
|---|
| 353 | int32_t Normalizer::startIndex() const { | 
|---|
| 354 | return text->startIndex(); | 
|---|
| 355 | } | 
|---|
| 356 |  | 
|---|
| 357 | /** | 
|---|
| 358 | * Retrieve the index of the end of the input text.  This is the end index | 
|---|
| 359 | * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt> | 
|---|
| 360 | * over which this <tt>Normalizer</tt> is iterating | 
|---|
| 361 | */ | 
|---|
| 362 | int32_t Normalizer::endIndex() const { | 
|---|
| 363 | return text->endIndex(); | 
|---|
| 364 | } | 
|---|
| 365 |  | 
|---|
| 366 | //------------------------------------------------------------------------- | 
|---|
| 367 | // Property access methods | 
|---|
| 368 | //------------------------------------------------------------------------- | 
|---|
| 369 |  | 
|---|
| 370 | void | 
|---|
| 371 | Normalizer::setMode(UNormalizationMode newMode) | 
|---|
| 372 | { | 
|---|
| 373 | fUMode = newMode; | 
|---|
| 374 | init(); | 
|---|
| 375 | } | 
|---|
| 376 |  | 
|---|
| 377 | UNormalizationMode | 
|---|
| 378 | Normalizer::getUMode() const | 
|---|
| 379 | { | 
|---|
| 380 | return fUMode; | 
|---|
| 381 | } | 
|---|
| 382 |  | 
|---|
| 383 | void | 
|---|
| 384 | Normalizer::setOption(int32_t option, | 
|---|
| 385 | UBool value) | 
|---|
| 386 | { | 
|---|
| 387 | if (value) { | 
|---|
| 388 | fOptions |= option; | 
|---|
| 389 | } else { | 
|---|
| 390 | fOptions &= (~option); | 
|---|
| 391 | } | 
|---|
| 392 | init(); | 
|---|
| 393 | } | 
|---|
| 394 |  | 
|---|
| 395 | UBool | 
|---|
| 396 | Normalizer::getOption(int32_t option) const | 
|---|
| 397 | { | 
|---|
| 398 | return (fOptions & option) != 0; | 
|---|
| 399 | } | 
|---|
| 400 |  | 
|---|
| 401 | /** | 
|---|
| 402 | * Set the input text over which this <tt>Normalizer</tt> will iterate. | 
|---|
| 403 | * The iteration position is set to the beginning of the input text. | 
|---|
| 404 | */ | 
|---|
| 405 | void | 
|---|
| 406 | Normalizer::setText(const UnicodeString& newText, | 
|---|
| 407 | UErrorCode &status) | 
|---|
| 408 | { | 
|---|
| 409 | if (U_FAILURE(status)) { | 
|---|
| 410 | return; | 
|---|
| 411 | } | 
|---|
| 412 | CharacterIterator *newIter = new StringCharacterIterator(newText); | 
|---|
| 413 | if (newIter == nullptr) { | 
|---|
| 414 | status = U_MEMORY_ALLOCATION_ERROR; | 
|---|
| 415 | return; | 
|---|
| 416 | } | 
|---|
| 417 | delete text; | 
|---|
| 418 | text = newIter; | 
|---|
| 419 | reset(); | 
|---|
| 420 | } | 
|---|
| 421 |  | 
|---|
| 422 | /** | 
|---|
| 423 | * Set the input text over which this <tt>Normalizer</tt> will iterate. | 
|---|
| 424 | * The iteration position is set to the beginning of the string. | 
|---|
| 425 | */ | 
|---|
| 426 | void | 
|---|
| 427 | Normalizer::setText(const CharacterIterator& newText, | 
|---|
| 428 | UErrorCode &status) | 
|---|
| 429 | { | 
|---|
| 430 | if (U_FAILURE(status)) { | 
|---|
| 431 | return; | 
|---|
| 432 | } | 
|---|
| 433 | CharacterIterator *newIter = newText.clone(); | 
|---|
| 434 | if (newIter == nullptr) { | 
|---|
| 435 | status = U_MEMORY_ALLOCATION_ERROR; | 
|---|
| 436 | return; | 
|---|
| 437 | } | 
|---|
| 438 | delete text; | 
|---|
| 439 | text = newIter; | 
|---|
| 440 | reset(); | 
|---|
| 441 | } | 
|---|
| 442 |  | 
|---|
| 443 | void | 
|---|
| 444 | Normalizer::setText(ConstChar16Ptr newText, | 
|---|
| 445 | int32_t length, | 
|---|
| 446 | UErrorCode &status) | 
|---|
| 447 | { | 
|---|
| 448 | if (U_FAILURE(status)) { | 
|---|
| 449 | return; | 
|---|
| 450 | } | 
|---|
| 451 | CharacterIterator *newIter = new UCharCharacterIterator(newText, length); | 
|---|
| 452 | if (newIter == nullptr) { | 
|---|
| 453 | status = U_MEMORY_ALLOCATION_ERROR; | 
|---|
| 454 | return; | 
|---|
| 455 | } | 
|---|
| 456 | delete text; | 
|---|
| 457 | text = newIter; | 
|---|
| 458 | reset(); | 
|---|
| 459 | } | 
|---|
| 460 |  | 
|---|
| 461 | /** | 
|---|
| 462 | * Copies the text under iteration into the UnicodeString referred to by "result". | 
|---|
| 463 | * @param result Receives a copy of the text under iteration. | 
|---|
| 464 | */ | 
|---|
| 465 | void | 
|---|
| 466 | Normalizer::getText(UnicodeString&  result) | 
|---|
| 467 | { | 
|---|
| 468 | text->getText(result); | 
|---|
| 469 | } | 
|---|
| 470 |  | 
|---|
| 471 | //------------------------------------------------------------------------- | 
|---|
| 472 | // Private utility methods | 
|---|
| 473 | //------------------------------------------------------------------------- | 
|---|
| 474 |  | 
|---|
| 475 | void Normalizer::clearBuffer() { | 
|---|
| 476 | buffer.remove(); | 
|---|
| 477 | bufferPos=0; | 
|---|
| 478 | } | 
|---|
| 479 |  | 
|---|
| 480 | UBool | 
|---|
| 481 | Normalizer::nextNormalize() { | 
|---|
| 482 | clearBuffer(); | 
|---|
| 483 | currentIndex=nextIndex; | 
|---|
| 484 | text->setIndex(nextIndex); | 
|---|
| 485 | if(!text->hasNext()) { | 
|---|
| 486 | return false; | 
|---|
| 487 | } | 
|---|
| 488 | // Skip at least one character so we make progress. | 
|---|
| 489 | UnicodeString segment(text->next32PostInc()); | 
|---|
| 490 | while(text->hasNext()) { | 
|---|
| 491 | UChar32 c; | 
|---|
| 492 | if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) { | 
|---|
| 493 | text->move32(-1, CharacterIterator::kCurrent); | 
|---|
| 494 | break; | 
|---|
| 495 | } | 
|---|
| 496 | segment.append(c); | 
|---|
| 497 | } | 
|---|
| 498 | nextIndex=text->getIndex(); | 
|---|
| 499 | UErrorCode errorCode=U_ZERO_ERROR; | 
|---|
| 500 | fNorm2->normalize(segment, buffer, errorCode); | 
|---|
| 501 | return U_SUCCESS(errorCode) && !buffer.isEmpty(); | 
|---|
| 502 | } | 
|---|
| 503 |  | 
|---|
| 504 | UBool | 
|---|
| 505 | Normalizer::previousNormalize() { | 
|---|
| 506 | clearBuffer(); | 
|---|
| 507 | nextIndex=currentIndex; | 
|---|
| 508 | text->setIndex(currentIndex); | 
|---|
| 509 | if(!text->hasPrevious()) { | 
|---|
| 510 | return false; | 
|---|
| 511 | } | 
|---|
| 512 | UnicodeString segment; | 
|---|
| 513 | while(text->hasPrevious()) { | 
|---|
| 514 | UChar32 c=text->previous32(); | 
|---|
| 515 | segment.insert(0, c); | 
|---|
| 516 | if(fNorm2->hasBoundaryBefore(c)) { | 
|---|
| 517 | break; | 
|---|
| 518 | } | 
|---|
| 519 | } | 
|---|
| 520 | currentIndex=text->getIndex(); | 
|---|
| 521 | UErrorCode errorCode=U_ZERO_ERROR; | 
|---|
| 522 | fNorm2->normalize(segment, buffer, errorCode); | 
|---|
| 523 | bufferPos=buffer.length(); | 
|---|
| 524 | return U_SUCCESS(errorCode) && !buffer.isEmpty(); | 
|---|
| 525 | } | 
|---|
| 526 |  | 
|---|
| 527 | U_NAMESPACE_END | 
|---|
| 528 |  | 
|---|
| 529 | #endif /* #if !UCONFIG_NO_NORMALIZATION */ | 
|---|
| 530 |  | 
|---|