numparse_currency.cpp source code [ClickHouse/contrib/icu/icu4c/source/i18n/numparse_currency.cpp]

1	// © 2018 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3
4	#include "unicode/utypes.h"
5
6	#if !UCONFIG_NO_FORMATTING
7
8	// Allow implicit conversion from char16_t to UnicodeString for this file:*
9	// Helpful in toString methods and elsewhere.
10	#define UNISTR_FROM_STRING_EXPLICIT
11
12	#include "numparse_types.h"
13	#include "numparse_currency.h"
14	#include "ucurrimp.h"
15	#include "unicode/errorcode.h"
16	#include "numparse_utils.h"
17	#include "string_segment.h"
18
19	using namespace icu;
20	using namespace icu::numparse;
21	using namespace icu::numparse::impl;
22
23
24	CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs,
25	parse_flags_t parseFlags, UErrorCode& status)
26	: fCurrency1(currencySymbols.getCurrencySymbol(status)),
27	fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),
28	fUseFullCurrencyData(`0` == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)),
29	afterPrefixInsert (dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),
30	beforeSuffixInsert (dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),
31	fLocaleName (dfs.getLocale().getName(), -`1`, status) {
32	utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
33
34	// Pre-load the long names for the current locale and currency
35	// if we are parsing without the full currency data.
36	if (!fUseFullCurrencyData) {
37	for (int32_t i=`0`; i<StandardPlural::COUNT; i++) {
38	auto plural = static_cast<StandardPlural::Form>(i);
39	fLocalLongNames[i] = currencySymbols.getPluralName(plural, status);
40	}
41	}
42
43	// TODO: Figure out how to make this faster and re-enable.
44	// Computing the "lead code points" set for fastpathing is too slow to use in production.
45	// See http://bugs.icu-project.org/trac/ticket/13584
46	// // Compute the full set of characters that could be the first in a currency to allow for
47	// // efficient smoke test.
48	// fLeadCodePoints.add(fCurrency1.char32At(0));
49	// fLeadCodePoints.add(fCurrency2.char32At(0));
50	// fLeadCodePoints.add(beforeSuffixInsert.char32At(0));
51	// uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
52	// // Always apply case mapping closure for currencies
53	// fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
54	// fLeadCodePoints.freeze();
55	}
56
57	bool
58	CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
59	if (result.currencyCode[`0`] != `0`) {
60	return false;
61	}
62
63	// Try to match a currency spacing separator.
64	int32_t initialOffset = segment.getOffset();
65	bool maybeMore = false;
66	if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
67	int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
68	if (overlap == beforeSuffixInsert.length()) {
69	segment.adjustOffset(overlap);
70	// Note: let currency spacing be a weak match. Don't update chars consumed.
71	}
72	maybeMore = maybeMore \|\| overlap == segment.length();
73	}
74
75	// Match the currency string, and reset if we didn't find one.
76	maybeMore = maybeMore \|\| matchCurrency(segment, result, status);
77	if (result.currencyCode[`0`] == `0`) {
78	segment.setOffset(initialOffset);
79	return maybeMore;
80	}
81
82	// Try to match a currency spacing separator.
83	if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
84	int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
85	if (overlap == afterPrefixInsert.length()) {
86	segment.adjustOffset(overlap);
87	// Note: let currency spacing be a weak match. Don't update chars consumed.
88	}
89	maybeMore = maybeMore \|\| overlap == segment.length();
90	}
91
92	return maybeMore;
93	}
94
95	bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,
96	UErrorCode& status) const {
97	bool maybeMore = false;
98
99	int32_t overlap1;
100	if (!fCurrency1.isEmpty()) {
101	overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
102	} else {
103	overlap1 = -`1`;
104	}
105	maybeMore = maybeMore \|\| overlap1 == segment.length();
106	if (overlap1 == fCurrency1.length()) {
107	utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
108	segment.adjustOffset(overlap1);
109	result.setCharsConsumed(segment);
110	return maybeMore;
111	}
112
113	int32_t overlap2;
114	if (!fCurrency2.isEmpty()) {
115	// ISO codes should be accepted case-insensitive.
116	// https://unicode-org.atlassian.net/browse/ICU-13696
117	overlap2 = segment.getCommonPrefixLength(fCurrency2);
118	} else {
119	overlap2 = -`1`;
120	}
121	maybeMore = maybeMore \|\| overlap2 == segment.length();
122	if (overlap2 == fCurrency2.length()) {
123	utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
124	segment.adjustOffset(overlap2);
125	result.setCharsConsumed(segment);
126	return maybeMore;
127	}
128
129	if (fUseFullCurrencyData) {
130	// Use the full currency data.
131	// NOTE: This call site should be improved with #13584.
132	const UnicodeString segmentString = segment.toTempUnicodeString();
133
134	// Try to parse the currency
135	ParsePosition ppos(`0`);
136	int32_t partialMatchLen = `0`;
137	uprv_parseCurrency(
138	fLocaleName.data(),
139	segmentString,
140	ppos,
141	UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME
142	&partialMatchLen,
143	result.currencyCode,
144	status);
145	maybeMore = maybeMore \|\| partialMatchLen == segment.length();
146
147	if (U_SUCCESS(status) && ppos.getIndex() != `0`) {
148	// Complete match.
149	// NOTE: The currency code should already be saved in the ParsedNumber.
150	segment.adjustOffset(ppos.getIndex());
151	result.setCharsConsumed(segment);
152	return maybeMore;
153	}
154
155	} else {
156	// Use the locale long names.
157	int32_t longestFullMatch = `0`;
158	for (int32_t i=`0`; i<StandardPlural::COUNT; i++) {
159	const UnicodeString& name = fLocalLongNames[i];
160	int32_t overlap = segment.getCommonPrefixLength(name);
161	if (overlap == name.length() && name.length() > longestFullMatch) {
162	longestFullMatch = name.length();
163	}
164	maybeMore = maybeMore \|\| overlap > `0`;
165	}
166	if (longestFullMatch > `0`) {
167	utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
168	segment.adjustOffset(longestFullMatch);
169	result.setCharsConsumed(segment);
170	return maybeMore;
171	}
172	}
173
174	// No match found.
175	return maybeMore;
176	}
177
178	bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const {
179	// TODO: See constructor
180	return true;
181	//return segment.startsWith(fLeadCodePoints);
182	}
183
184	UnicodeString CombinedCurrencyMatcher::toString() const {
185	return u"<CombinedCurrencyMatcher>";
186	}
187
188
189	#endif /* #if !UCONFIG_NO_FORMATTING */
190

Browse the source code of ClickHouse/contrib/icu/icu4c/source/i18n/numparse_currency.cpp