| 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ********************************************************************** |
| 5 | * Copyright (C) 2001-2012, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ********************************************************************** |
| 8 | * Date Name Description |
| 9 | * 07/26/01 aliu Creation. |
| 10 | ********************************************************************** |
| 11 | */ |
| 12 | |
| 13 | #include "unicode/utypes.h" |
| 14 | |
| 15 | #if !UCONFIG_NO_TRANSLITERATION |
| 16 | |
| 17 | #include "quant.h" |
| 18 | #include "unicode/unistr.h" |
| 19 | #include "util.h" |
| 20 | |
| 21 | U_NAMESPACE_BEGIN |
| 22 | |
| 23 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Quantifier) |
| 24 | |
| 25 | Quantifier::Quantifier(UnicodeFunctor *adoptedMatcher, |
| 26 | uint32_t _minCount, uint32_t _maxCount) { |
| 27 | // assert(adopted != 0); |
| 28 | // assert(minCount <= maxCount); |
| 29 | matcher = adoptedMatcher; |
| 30 | this->minCount = _minCount; |
| 31 | this->maxCount = _maxCount; |
| 32 | } |
| 33 | |
| 34 | Quantifier::Quantifier(const Quantifier& o) : |
| 35 | UnicodeFunctor(o), |
| 36 | UnicodeMatcher(o), |
| 37 | matcher(o.matcher->clone()), |
| 38 | minCount(o.minCount), |
| 39 | maxCount(o.maxCount) |
| 40 | { |
| 41 | } |
| 42 | |
| 43 | Quantifier::~Quantifier() { |
| 44 | delete matcher; |
| 45 | } |
| 46 | |
| 47 | /** |
| 48 | * Implement UnicodeFunctor |
| 49 | */ |
| 50 | Quantifier* Quantifier::clone() const { |
| 51 | return new Quantifier(*this); |
| 52 | } |
| 53 | |
| 54 | /** |
| 55 | * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer |
| 56 | * and return the pointer. |
| 57 | */ |
| 58 | UnicodeMatcher* Quantifier::toMatcher() const { |
| 59 | Quantifier *nonconst_this = const_cast<Quantifier *>(this); |
| 60 | UnicodeMatcher *nonconst_base = static_cast<UnicodeMatcher *>(nonconst_this); |
| 61 | |
| 62 | return nonconst_base; |
| 63 | } |
| 64 | |
| 65 | UMatchDegree Quantifier::matches(const Replaceable& text, |
| 66 | int32_t& offset, |
| 67 | int32_t limit, |
| 68 | UBool incremental) { |
| 69 | int32_t start = offset; |
| 70 | uint32_t count = 0; |
| 71 | while (count < maxCount) { |
| 72 | int32_t pos = offset; |
| 73 | UMatchDegree m = matcher->toMatcher()->matches(text, offset, limit, incremental); |
| 74 | if (m == U_MATCH) { |
| 75 | ++count; |
| 76 | if (pos == offset) { |
| 77 | // If offset has not moved we have a zero-width match. |
| 78 | // Don't keep matching it infinitely. |
| 79 | break; |
| 80 | } |
| 81 | } else if (incremental && m == U_PARTIAL_MATCH) { |
| 82 | return U_PARTIAL_MATCH; |
| 83 | } else { |
| 84 | break; |
| 85 | } |
| 86 | } |
| 87 | if (incremental && offset == limit) { |
| 88 | return U_PARTIAL_MATCH; |
| 89 | } |
| 90 | if (count >= minCount) { |
| 91 | return U_MATCH; |
| 92 | } |
| 93 | offset = start; |
| 94 | return U_MISMATCH; |
| 95 | } |
| 96 | |
| 97 | /** |
| 98 | * Implement UnicodeMatcher |
| 99 | */ |
| 100 | UnicodeString& Quantifier::toPattern(UnicodeString& result, |
| 101 | UBool escapeUnprintable) const { |
| 102 | result.truncate(0); |
| 103 | matcher->toMatcher()->toPattern(result, escapeUnprintable); |
| 104 | if (minCount == 0) { |
| 105 | if (maxCount == 1) { |
| 106 | return result.append((UChar)63); /*?*/ |
| 107 | } else if (maxCount == MAX) { |
| 108 | return result.append((UChar)42); /***/ |
| 109 | } |
| 110 | // else fall through |
| 111 | } else if (minCount == 1 && maxCount == MAX) { |
| 112 | return result.append((UChar)43); /*+*/ |
| 113 | } |
| 114 | result.append((UChar)123); /*{*/ |
| 115 | ICU_Utility::appendNumber(result, minCount); |
| 116 | result.append((UChar)44); /*,*/ |
| 117 | if (maxCount != MAX) { |
| 118 | ICU_Utility::appendNumber(result, maxCount); |
| 119 | } |
| 120 | result.append((UChar)125); /*}*/ |
| 121 | return result; |
| 122 | } |
| 123 | |
| 124 | /** |
| 125 | * Implement UnicodeMatcher |
| 126 | */ |
| 127 | UBool Quantifier::matchesIndexValue(uint8_t v) const { |
| 128 | return (minCount == 0) || matcher->toMatcher()->matchesIndexValue(v); |
| 129 | } |
| 130 | |
| 131 | /** |
| 132 | * Implement UnicodeMatcher |
| 133 | */ |
| 134 | void Quantifier::addMatchSetTo(UnicodeSet& toUnionTo) const { |
| 135 | if (maxCount > 0) { |
| 136 | matcher->toMatcher()->addMatchSetTo(toUnionTo); |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | /** |
| 141 | * Implement UnicodeFunctor |
| 142 | */ |
| 143 | void Quantifier::setData(const TransliterationRuleData* d) { |
| 144 | matcher->setData(d); |
| 145 | } |
| 146 | |
| 147 | U_NAMESPACE_END |
| 148 | |
| 149 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
| 150 | |
| 151 | //eof |
| 152 | |