| 1 | // © 2018 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | |
| 4 | #include "unicode/utypes.h" |
| 5 | |
| 6 | #if !UCONFIG_NO_FORMATTING |
| 7 | |
| 8 | // Allow implicit conversion from char16_t* to UnicodeString for this file: |
| 9 | // Helpful in toString methods and elsewhere. |
| 10 | #define UNISTR_FROM_STRING_EXPLICIT |
| 11 | |
| 12 | #include "numparse_types.h" |
| 13 | #include "numparse_currency.h" |
| 14 | #include "ucurrimp.h" |
| 15 | #include "unicode/errorcode.h" |
| 16 | #include "numparse_utils.h" |
| 17 | #include "string_segment.h" |
| 18 | |
| 19 | using namespace icu; |
| 20 | using namespace icu::numparse; |
| 21 | using namespace icu::numparse::impl; |
| 22 | |
| 23 | |
| 24 | CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs, |
| 25 | parse_flags_t parseFlags, UErrorCode& status) |
| 26 | : fCurrency1(currencySymbols.getCurrencySymbol(status)), |
| 27 | fCurrency2(currencySymbols.getIntlCurrencySymbol(status)), |
| 28 | fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)), |
| 29 | afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)), |
| 30 | beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)), |
| 31 | fLocaleName(dfs.getLocale().getName(), -1, status) { |
| 32 | utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode()); |
| 33 | |
| 34 | // Pre-load the long names for the current locale and currency |
| 35 | // if we are parsing without the full currency data. |
| 36 | if (!fUseFullCurrencyData) { |
| 37 | for (int32_t i=0; i<StandardPlural::COUNT; i++) { |
| 38 | auto plural = static_cast<StandardPlural::Form>(i); |
| 39 | fLocalLongNames[i] = currencySymbols.getPluralName(plural, status); |
| 40 | } |
| 41 | } |
| 42 | |
| 43 | // TODO: Figure out how to make this faster and re-enable. |
| 44 | // Computing the "lead code points" set for fastpathing is too slow to use in production. |
| 45 | // See http://bugs.icu-project.org/trac/ticket/13584 |
| 46 | // // Compute the full set of characters that could be the first in a currency to allow for |
| 47 | // // efficient smoke test. |
| 48 | // fLeadCodePoints.add(fCurrency1.char32At(0)); |
| 49 | // fLeadCodePoints.add(fCurrency2.char32At(0)); |
| 50 | // fLeadCodePoints.add(beforeSuffixInsert.char32At(0)); |
| 51 | // uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status); |
| 52 | // // Always apply case mapping closure for currencies |
| 53 | // fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS); |
| 54 | // fLeadCodePoints.freeze(); |
| 55 | } |
| 56 | |
| 57 | bool |
| 58 | CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { |
| 59 | if (result.currencyCode[0] != 0) { |
| 60 | return false; |
| 61 | } |
| 62 | |
| 63 | // Try to match a currency spacing separator. |
| 64 | int32_t initialOffset = segment.getOffset(); |
| 65 | bool maybeMore = false; |
| 66 | if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) { |
| 67 | int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert); |
| 68 | if (overlap == beforeSuffixInsert.length()) { |
| 69 | segment.adjustOffset(overlap); |
| 70 | // Note: let currency spacing be a weak match. Don't update chars consumed. |
| 71 | } |
| 72 | maybeMore = maybeMore || overlap == segment.length(); |
| 73 | } |
| 74 | |
| 75 | // Match the currency string, and reset if we didn't find one. |
| 76 | maybeMore = maybeMore || matchCurrency(segment, result, status); |
| 77 | if (result.currencyCode[0] == 0) { |
| 78 | segment.setOffset(initialOffset); |
| 79 | return maybeMore; |
| 80 | } |
| 81 | |
| 82 | // Try to match a currency spacing separator. |
| 83 | if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) { |
| 84 | int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert); |
| 85 | if (overlap == afterPrefixInsert.length()) { |
| 86 | segment.adjustOffset(overlap); |
| 87 | // Note: let currency spacing be a weak match. Don't update chars consumed. |
| 88 | } |
| 89 | maybeMore = maybeMore || overlap == segment.length(); |
| 90 | } |
| 91 | |
| 92 | return maybeMore; |
| 93 | } |
| 94 | |
| 95 | bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result, |
| 96 | UErrorCode& status) const { |
| 97 | bool maybeMore = false; |
| 98 | |
| 99 | int32_t overlap1; |
| 100 | if (!fCurrency1.isEmpty()) { |
| 101 | overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1); |
| 102 | } else { |
| 103 | overlap1 = -1; |
| 104 | } |
| 105 | maybeMore = maybeMore || overlap1 == segment.length(); |
| 106 | if (overlap1 == fCurrency1.length()) { |
| 107 | utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); |
| 108 | segment.adjustOffset(overlap1); |
| 109 | result.setCharsConsumed(segment); |
| 110 | return maybeMore; |
| 111 | } |
| 112 | |
| 113 | int32_t overlap2; |
| 114 | if (!fCurrency2.isEmpty()) { |
| 115 | // ISO codes should be accepted case-insensitive. |
| 116 | // https://unicode-org.atlassian.net/browse/ICU-13696 |
| 117 | overlap2 = segment.getCommonPrefixLength(fCurrency2); |
| 118 | } else { |
| 119 | overlap2 = -1; |
| 120 | } |
| 121 | maybeMore = maybeMore || overlap2 == segment.length(); |
| 122 | if (overlap2 == fCurrency2.length()) { |
| 123 | utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); |
| 124 | segment.adjustOffset(overlap2); |
| 125 | result.setCharsConsumed(segment); |
| 126 | return maybeMore; |
| 127 | } |
| 128 | |
| 129 | if (fUseFullCurrencyData) { |
| 130 | // Use the full currency data. |
| 131 | // NOTE: This call site should be improved with #13584. |
| 132 | const UnicodeString segmentString = segment.toTempUnicodeString(); |
| 133 | |
| 134 | // Try to parse the currency |
| 135 | ParsePosition ppos(0); |
| 136 | int32_t partialMatchLen = 0; |
| 137 | uprv_parseCurrency( |
| 138 | fLocaleName.data(), |
| 139 | segmentString, |
| 140 | ppos, |
| 141 | UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME |
| 142 | &partialMatchLen, |
| 143 | result.currencyCode, |
| 144 | status); |
| 145 | maybeMore = maybeMore || partialMatchLen == segment.length(); |
| 146 | |
| 147 | if (U_SUCCESS(status) && ppos.getIndex() != 0) { |
| 148 | // Complete match. |
| 149 | // NOTE: The currency code should already be saved in the ParsedNumber. |
| 150 | segment.adjustOffset(ppos.getIndex()); |
| 151 | result.setCharsConsumed(segment); |
| 152 | return maybeMore; |
| 153 | } |
| 154 | |
| 155 | } else { |
| 156 | // Use the locale long names. |
| 157 | int32_t longestFullMatch = 0; |
| 158 | for (int32_t i=0; i<StandardPlural::COUNT; i++) { |
| 159 | const UnicodeString& name = fLocalLongNames[i]; |
| 160 | int32_t overlap = segment.getCommonPrefixLength(name); |
| 161 | if (overlap == name.length() && name.length() > longestFullMatch) { |
| 162 | longestFullMatch = name.length(); |
| 163 | } |
| 164 | maybeMore = maybeMore || overlap > 0; |
| 165 | } |
| 166 | if (longestFullMatch > 0) { |
| 167 | utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); |
| 168 | segment.adjustOffset(longestFullMatch); |
| 169 | result.setCharsConsumed(segment); |
| 170 | return maybeMore; |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | // No match found. |
| 175 | return maybeMore; |
| 176 | } |
| 177 | |
| 178 | bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const { |
| 179 | // TODO: See constructor |
| 180 | return true; |
| 181 | //return segment.startsWith(fLeadCodePoints); |
| 182 | } |
| 183 | |
| 184 | UnicodeString CombinedCurrencyMatcher::toString() const { |
| 185 | return u"<CombinedCurrencyMatcher>" ; |
| 186 | } |
| 187 | |
| 188 | |
| 189 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
| 190 | |