| 1 | // © 2018 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | |
| 4 | #include "unicode/utypes.h" |
| 5 | |
| 6 | #if !UCONFIG_NO_FORMATTING |
| 7 | |
| 8 | // Allow implicit conversion from char16_t* to UnicodeString for this file: |
| 9 | // Helpful in toString methods and elsewhere. |
| 10 | #define UNISTR_FROM_STRING_EXPLICIT |
| 11 | |
| 12 | #include "numparse_types.h" |
| 13 | #include "numparse_symbols.h" |
| 14 | #include "numparse_utils.h" |
| 15 | #include "string_segment.h" |
| 16 | |
| 17 | using namespace icu; |
| 18 | using namespace icu::numparse; |
| 19 | using namespace icu::numparse::impl; |
| 20 | |
| 21 | |
| 22 | SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key) { |
| 23 | fUniSet = unisets::get(key); |
| 24 | if (fUniSet->contains(symbolString)) { |
| 25 | fString.setToBogus(); |
| 26 | } else { |
| 27 | fString = symbolString; |
| 28 | } |
| 29 | } |
| 30 | |
| 31 | const UnicodeSet* SymbolMatcher::getSet() const { |
| 32 | return fUniSet; |
| 33 | } |
| 34 | |
| 35 | bool SymbolMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const { |
| 36 | // Smoke test first; this matcher might be disabled. |
| 37 | if (isDisabled(result)) { |
| 38 | return false; |
| 39 | } |
| 40 | |
| 41 | // Test the string first in order to consume trailing chars greedily. |
| 42 | int overlap = 0; |
| 43 | if (!fString.isEmpty()) { |
| 44 | overlap = segment.getCommonPrefixLength(fString); |
| 45 | if (overlap == fString.length()) { |
| 46 | segment.adjustOffset(fString.length()); |
| 47 | accept(segment, result); |
| 48 | return false; |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | int cp = segment.getCodePoint(); |
| 53 | if (cp != -1 && fUniSet->contains(cp)) { |
| 54 | segment.adjustOffset(U16_LENGTH(cp)); |
| 55 | accept(segment, result); |
| 56 | return false; |
| 57 | } |
| 58 | |
| 59 | return overlap == segment.length(); |
| 60 | } |
| 61 | |
| 62 | bool SymbolMatcher::smokeTest(const StringSegment& segment) const { |
| 63 | return segment.startsWith(*fUniSet) || segment.startsWith(fString); |
| 64 | } |
| 65 | |
| 66 | UnicodeString SymbolMatcher::toString() const { |
| 67 | // TODO: Customize output for each symbol |
| 68 | return u"<Symbol>" ; |
| 69 | } |
| 70 | |
| 71 | |
| 72 | IgnorablesMatcher::IgnorablesMatcher(parse_flags_t parseFlags) : |
| 73 | SymbolMatcher( |
| 74 | {}, |
| 75 | (0 != (parseFlags & PARSE_FLAG_STRICT_IGNORABLES)) ? |
| 76 | unisets::STRICT_IGNORABLES : |
| 77 | unisets::DEFAULT_IGNORABLES) { |
| 78 | } |
| 79 | |
| 80 | bool IgnorablesMatcher::isFlexible() const { |
| 81 | return true; |
| 82 | } |
| 83 | |
| 84 | UnicodeString IgnorablesMatcher::toString() const { |
| 85 | return u"<Ignorables>" ; |
| 86 | } |
| 87 | |
| 88 | bool IgnorablesMatcher::isDisabled(const ParsedNumber&) const { |
| 89 | return false; |
| 90 | } |
| 91 | |
| 92 | void IgnorablesMatcher::accept(StringSegment&, ParsedNumber&) const { |
| 93 | // No-op |
| 94 | } |
| 95 | |
| 96 | |
| 97 | InfinityMatcher::InfinityMatcher(const DecimalFormatSymbols& dfs) |
| 98 | : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol), unisets::INFINITY_SIGN) { |
| 99 | } |
| 100 | |
| 101 | bool InfinityMatcher::isDisabled(const ParsedNumber& result) const { |
| 102 | return 0 != (result.flags & FLAG_INFINITY); |
| 103 | } |
| 104 | |
| 105 | void InfinityMatcher::accept(StringSegment& segment, ParsedNumber& result) const { |
| 106 | result.flags |= FLAG_INFINITY; |
| 107 | result.setCharsConsumed(segment); |
| 108 | } |
| 109 | |
| 110 | |
| 111 | MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) |
| 112 | : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol), unisets::MINUS_SIGN), |
| 113 | fAllowTrailing(allowTrailing) { |
| 114 | } |
| 115 | |
| 116 | bool MinusSignMatcher::isDisabled(const ParsedNumber& result) const { |
| 117 | return !fAllowTrailing && result.seenNumber(); |
| 118 | } |
| 119 | |
| 120 | void MinusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const { |
| 121 | result.flags |= FLAG_NEGATIVE; |
| 122 | result.setCharsConsumed(segment); |
| 123 | } |
| 124 | |
| 125 | |
| 126 | NanMatcher::NanMatcher(const DecimalFormatSymbols& dfs) |
| 127 | : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::EMPTY) { |
| 128 | } |
| 129 | |
| 130 | bool NanMatcher::isDisabled(const ParsedNumber& result) const { |
| 131 | return result.seenNumber(); |
| 132 | } |
| 133 | |
| 134 | void NanMatcher::accept(StringSegment& segment, ParsedNumber& result) const { |
| 135 | result.flags |= FLAG_NAN; |
| 136 | result.setCharsConsumed(segment); |
| 137 | } |
| 138 | |
| 139 | |
| 140 | PaddingMatcher::PaddingMatcher(const UnicodeString& padString) |
| 141 | : SymbolMatcher(padString, unisets::EMPTY) {} |
| 142 | |
| 143 | bool PaddingMatcher::isFlexible() const { |
| 144 | return true; |
| 145 | } |
| 146 | |
| 147 | bool PaddingMatcher::isDisabled(const ParsedNumber&) const { |
| 148 | return false; |
| 149 | } |
| 150 | |
| 151 | void PaddingMatcher::accept(StringSegment&, ParsedNumber&) const { |
| 152 | // No-op |
| 153 | } |
| 154 | |
| 155 | |
| 156 | PercentMatcher::PercentMatcher(const DecimalFormatSymbols& dfs) |
| 157 | : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol), unisets::PERCENT_SIGN) { |
| 158 | } |
| 159 | |
| 160 | bool PercentMatcher::isDisabled(const ParsedNumber& result) const { |
| 161 | return 0 != (result.flags & FLAG_PERCENT); |
| 162 | } |
| 163 | |
| 164 | void PercentMatcher::accept(StringSegment& segment, ParsedNumber& result) const { |
| 165 | result.flags |= FLAG_PERCENT; |
| 166 | result.setCharsConsumed(segment); |
| 167 | } |
| 168 | |
| 169 | |
| 170 | PermilleMatcher::PermilleMatcher(const DecimalFormatSymbols& dfs) |
| 171 | : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol), unisets::PERMILLE_SIGN) { |
| 172 | } |
| 173 | |
| 174 | bool PermilleMatcher::isDisabled(const ParsedNumber& result) const { |
| 175 | return 0 != (result.flags & FLAG_PERMILLE); |
| 176 | } |
| 177 | |
| 178 | void PermilleMatcher::accept(StringSegment& segment, ParsedNumber& result) const { |
| 179 | result.flags |= FLAG_PERMILLE; |
| 180 | result.setCharsConsumed(segment); |
| 181 | } |
| 182 | |
| 183 | |
| 184 | PlusSignMatcher::PlusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) |
| 185 | : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol), unisets::PLUS_SIGN), |
| 186 | fAllowTrailing(allowTrailing) { |
| 187 | } |
| 188 | |
| 189 | bool PlusSignMatcher::isDisabled(const ParsedNumber& result) const { |
| 190 | return !fAllowTrailing && result.seenNumber(); |
| 191 | } |
| 192 | |
| 193 | void PlusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const { |
| 194 | result.setCharsConsumed(segment); |
| 195 | } |
| 196 | |
| 197 | |
| 198 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
| 199 | |