| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | // | 
|---|
| 4 | //   Copyright (C) 2012 International Business Machines Corporation | 
|---|
| 5 | //   and others. All rights reserved. | 
|---|
| 6 | // | 
|---|
| 7 | //   file:  regeximp.cpp | 
|---|
| 8 | // | 
|---|
| 9 | //           ICU Regular Expressions, | 
|---|
| 10 | //             miscellaneous implementation functions. | 
|---|
| 11 | // | 
|---|
| 12 |  | 
|---|
| 13 | #include "unicode/utypes.h" | 
|---|
| 14 |  | 
|---|
| 15 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 
|---|
| 16 | #include "regeximp.h" | 
|---|
| 17 | #include "unicode/utf16.h" | 
|---|
| 18 |  | 
|---|
| 19 | U_NAMESPACE_BEGIN | 
|---|
| 20 |  | 
|---|
| 21 | CaseFoldingUTextIterator::CaseFoldingUTextIterator(UText &text) : | 
|---|
| 22 | fUText(text), fFoldChars(NULL), fFoldLength(0) { | 
|---|
| 23 | } | 
|---|
| 24 |  | 
|---|
| 25 | CaseFoldingUTextIterator::~CaseFoldingUTextIterator() {} | 
|---|
| 26 |  | 
|---|
| 27 | UChar32 CaseFoldingUTextIterator::next() { | 
|---|
| 28 | UChar32  foldedC; | 
|---|
| 29 | UChar32  originalC; | 
|---|
| 30 | if (fFoldChars == NULL) { | 
|---|
| 31 | // We are not in a string folding of an earlier character. | 
|---|
| 32 | // Start handling the next char from the input UText. | 
|---|
| 33 | originalC = UTEXT_NEXT32(&fUText); | 
|---|
| 34 | if (originalC == U_SENTINEL) { | 
|---|
| 35 | return originalC; | 
|---|
| 36 | } | 
|---|
| 37 | fFoldLength = ucase_toFullFolding(originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); | 
|---|
| 38 | if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { | 
|---|
| 39 | // input code point folds to a single code point, possibly itself. | 
|---|
| 40 | // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. | 
|---|
| 41 | if (fFoldLength < 0) { | 
|---|
| 42 | fFoldLength = ~fFoldLength; | 
|---|
| 43 | } | 
|---|
| 44 | foldedC = (UChar32)fFoldLength; | 
|---|
| 45 | fFoldChars = NULL; | 
|---|
| 46 | return foldedC; | 
|---|
| 47 | } | 
|---|
| 48 | // String foldings fall through here. | 
|---|
| 49 | fFoldIndex = 0; | 
|---|
| 50 | } | 
|---|
| 51 |  | 
|---|
| 52 | U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC); | 
|---|
| 53 | if (fFoldIndex >= fFoldLength) { | 
|---|
| 54 | fFoldChars = NULL; | 
|---|
| 55 | } | 
|---|
| 56 | return foldedC; | 
|---|
| 57 | } | 
|---|
| 58 |  | 
|---|
| 59 |  | 
|---|
| 60 | UBool CaseFoldingUTextIterator::inExpansion() { | 
|---|
| 61 | return fFoldChars != NULL; | 
|---|
| 62 | } | 
|---|
| 63 |  | 
|---|
| 64 |  | 
|---|
| 65 |  | 
|---|
| 66 | CaseFoldingUCharIterator::CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit) : | 
|---|
| 67 | fChars(chars), fIndex(start), fLimit(limit), fFoldChars(NULL), fFoldLength(0) { | 
|---|
| 68 | } | 
|---|
| 69 |  | 
|---|
| 70 |  | 
|---|
| 71 | CaseFoldingUCharIterator::~CaseFoldingUCharIterator() {} | 
|---|
| 72 |  | 
|---|
| 73 |  | 
|---|
| 74 | UChar32 CaseFoldingUCharIterator::next() { | 
|---|
| 75 | UChar32  foldedC; | 
|---|
| 76 | UChar32  originalC; | 
|---|
| 77 | if (fFoldChars == NULL) { | 
|---|
| 78 | // We are not in a string folding of an earlier character. | 
|---|
| 79 | // Start handling the next char from the input UText. | 
|---|
| 80 | if (fIndex >= fLimit) { | 
|---|
| 81 | return U_SENTINEL; | 
|---|
| 82 | } | 
|---|
| 83 | U16_NEXT(fChars, fIndex, fLimit, originalC); | 
|---|
| 84 |  | 
|---|
| 85 | fFoldLength = ucase_toFullFolding(originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); | 
|---|
| 86 | if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { | 
|---|
| 87 | // input code point folds to a single code point, possibly itself. | 
|---|
| 88 | // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. | 
|---|
| 89 | if (fFoldLength < 0) { | 
|---|
| 90 | fFoldLength = ~fFoldLength; | 
|---|
| 91 | } | 
|---|
| 92 | foldedC = (UChar32)fFoldLength; | 
|---|
| 93 | fFoldChars = NULL; | 
|---|
| 94 | return foldedC; | 
|---|
| 95 | } | 
|---|
| 96 | // String foldings fall through here. | 
|---|
| 97 | fFoldIndex = 0; | 
|---|
| 98 | } | 
|---|
| 99 |  | 
|---|
| 100 | U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC); | 
|---|
| 101 | if (fFoldIndex >= fFoldLength) { | 
|---|
| 102 | fFoldChars = NULL; | 
|---|
| 103 | } | 
|---|
| 104 | return foldedC; | 
|---|
| 105 | } | 
|---|
| 106 |  | 
|---|
| 107 |  | 
|---|
| 108 | UBool CaseFoldingUCharIterator::inExpansion() { | 
|---|
| 109 | return fFoldChars != NULL; | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | int64_t CaseFoldingUCharIterator::getIndex() { | 
|---|
| 113 | return fIndex; | 
|---|
| 114 | } | 
|---|
| 115 |  | 
|---|
| 116 |  | 
|---|
| 117 | U_NAMESPACE_END | 
|---|
| 118 |  | 
|---|
| 119 | #endif | 
|---|
| 120 |  | 
|---|
| 121 |  | 
|---|