number_compact.cpp source code [ClickHouse/contrib/icu/icu4c/source/i18n/number_compact.cpp]

1	// © 2017 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3
4	#include "unicode/utypes.h"
5
6	#if !UCONFIG_NO_FORMATTING
7
8	#include "unicode/ustring.h"
9	#include "unicode/ures.h"
10	#include "cstring.h"
11	#include "charstr.h"
12	#include "resource.h"
13	#include "number_compact.h"
14	#include "number_microprops.h"
15	#include "uresimp.h"
16
17	using namespace icu;
18	using namespace icu::number;
19	using namespace icu::number::impl;
20
21	namespace {
22
23	// A dummy object used when a "0" compact decimal entry is encountered. This is necessary
24	// in order to prevent falling back to root. Object equality ("==") is intended.
25	const UChar *USE_FALLBACK = u"<USE FALLBACK>";
26
27	/* Produces a string like "NumberElements/latn/patternsShort/decimalFormat". /
28	void getResourceBundleKey(const char *nsName, CompactStyle compactStyle, CompactType compactType,
29	CharString &sb, UErrorCode &status) {
30	sb.clear();
31	sb.append("NumberElements/", status);
32	sb.append(nsName, status);
33	sb.append(compactStyle == CompactStyle::UNUM_SHORT ? "/patternsShort" : "/patternsLong", status);
34	sb.append(compactType == CompactType::TYPE_DECIMAL ? "/decimalFormat" : "/currencyFormat", status);
35	}
36
37	int32_t getIndex(int32_t magnitude, StandardPlural::Form plural) {
38	return magnitude * StandardPlural::COUNT + plural;
39	}
40
41	int32_t countZeros(const UChar *patternString, int32_t patternLength) {
42	// NOTE: This strategy for computing the number of zeros is a hack for efficiency.
43	// It could break if there are any 0s that aren't part of the main pattern.
44	int32_t numZeros = `0`;
45	for (int32_t i = `0`; i < patternLength; i++) {
46	if (patternString[i] == u`'0'`) {
47	numZeros++;
48	} else if (numZeros > `0`) {
49	break; // zeros should always be contiguous
50	}
51	}
52	return numZeros;
53	}
54
55	} // namespace
56
57	// NOTE: patterns and multipliers both get zero-initialized.
58	CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(`0`), isEmpty(TRUE) {
59	}
60
61	void CompactData::populate(const Locale &locale, const char *nsName, CompactStyle compactStyle,
62	CompactType compactType, UErrorCode &status) {
63	CompactDataSink sink(*this);
64	LocalUResourceBundlePointer rb(ures_open(nullptr, locale.getName(), &status));
65	if (U_FAILURE(status)) { return; }
66
67	bool nsIsLatn = strcmp(nsName, "latn") == `0`;
68	bool compactIsShort = compactStyle == CompactStyle::UNUM_SHORT;
69
70	// Fall back to latn numbering system and/or short compact style.
71	CharString resourceKey;
72	getResourceBundleKey(nsName, compactStyle, compactType, resourceKey, status);
73	UErrorCode localStatus = U_ZERO_ERROR;
74	ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
75	if (isEmpty && !nsIsLatn) {
76	getResourceBundleKey("latn", compactStyle, compactType, resourceKey, status);
77	localStatus = U_ZERO_ERROR;
78	ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
79	}
80	if (isEmpty && !compactIsShort) {
81	getResourceBundleKey(nsName, CompactStyle::UNUM_SHORT, compactType, resourceKey, status);
82	localStatus = U_ZERO_ERROR;
83	ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
84	}
85	if (isEmpty && !nsIsLatn && !compactIsShort) {
86	getResourceBundleKey("latn", CompactStyle::UNUM_SHORT, compactType, resourceKey, status);
87	localStatus = U_ZERO_ERROR;
88	ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
89	}
90
91	// The last fallback should be guaranteed to return data.
92	if (isEmpty) {
93	status = U_INTERNAL_PROGRAM_ERROR;
94	}
95	}
96
97	int32_t CompactData::getMultiplier(int32_t magnitude) const {
98	if (magnitude < `0`) {
99	return `0`;
100	}
101	if (magnitude > largestMagnitude) {
102	magnitude = largestMagnitude;
103	}
104	return multipliers[magnitude];
105	}
106
107	const UChar CompactData::getPattern(int32_t magnitude, StandardPlural::Form plural) const* {
108	if (magnitude < `0`) {
109	return nullptr;
110	}
111	if (magnitude > largestMagnitude) {
112	magnitude = largestMagnitude;
113	}
114	const UChar *patternString = patterns[getIndex(magnitude, plural)];
115	if (patternString == nullptr && plural != StandardPlural::OTHER) {
116	// Fall back to "other" plural variant
117	patternString = patterns[getIndex(magnitude, StandardPlural::OTHER)];
118	}
119	if (patternString == USE_FALLBACK) { // == is intended
120	// Return null if USE_FALLBACK is present
121	patternString = nullptr;
122	}
123	return patternString;
124	}
125
126	void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const {
127	U_ASSERT(output.isEmpty());
128	// NOTE: In C++, this is done more manually with a UVector.
129	// In Java, we can take advantage of JDK HashSet.
130	for (auto pattern : patterns) {
131	if (pattern == nullptr \|\| pattern == USE_FALLBACK) {
132	continue;
133	}
134
135	// Insert pattern into the UVector if the UVector does not already contain the pattern.
136	// Search the UVector from the end since identical patterns are likely to be adjacent.
137	for (int32_t i = output.size() - `1`; i >= `0`; i--) {
138	if (u_strcmp(pattern, static_cast<const UChar *>(output [i])) == `0`) {
139	goto continue_outer;
140	}
141	}
142
143	// The string was not found; add it to the UVector.
144	// ANDY: This requires a const_cast. Why?
145	output.addElement(const_cast<UChar *>(pattern), status);
146
147	continue_outer:
148	continue;
149	}
150	}
151
152	void CompactData::CompactDataSink::put(const char key, ResourceValue &value, UBool /noFallback/*,
153	UErrorCode &status) {
154	// traverse into the table of powers of ten
155	ResourceTable powersOfTenTable = value.getTable(status);
156	if (U_FAILURE(status)) { return; }
157	for (int i3 = `0`; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) {
158
159	// Assumes that the keys are always of the form "10000" where the magnitude is the
160	// length of the key minus one. We expect magnitudes to be less than MAX_DIGITS.
161	auto magnitude = static_cast<int8_t> (strlen(key) - `1`);
162	int8_t multiplier = data.multipliers[magnitude];
163	U_ASSERT(magnitude < COMPACT_MAX_DIGITS);
164
165	// Iterate over the plural variants ("one", "other", etc)
166	ResourceTable pluralVariantsTable = value.getTable(status);
167	if (U_FAILURE(status)) { return; }
168	for (int i4 = `0`; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) {
169
170	// Skip this magnitude/plural if we already have it from a child locale.
171	// Note: This also skips USE_FALLBACK entries.
172	StandardPlural::Form plural = StandardPlural::fromString(key, status);
173	if (U_FAILURE(status)) { return; }
174	if (data.patterns[getIndex(magnitude, plural)] != nullptr) {
175	continue;
176	}
177
178	// The value "0" means that we need to use the default pattern and not fall back
179	// to parent locales. Example locale where this is relevant: 'it'.
180	int32_t patternLength;
181	const UChar *patternString = value.getString(patternLength, status);
182	if (U_FAILURE(status)) { return; }
183	if (u_strcmp(patternString, u"0") == `0`) {
184	patternString = USE_FALLBACK;
185	patternLength = `0`;
186	}
187
188	// Save the pattern string. We will parse it lazily.
189	data.patterns[getIndex(magnitude, plural)] = patternString;
190
191	// If necessary, compute the multiplier: the difference between the magnitude
192	// and the number of zeros in the pattern.
193	if (multiplier == `0`) {
194	int32_t numZeros = countZeros(patternString, patternLength);
195	if (numZeros > `0`) { // numZeros==0 in certain cases, like Somali "Kun"
196	multiplier = static_cast<int8_t> (numZeros - magnitude - `1`);
197	}
198	}
199	}
200
201	// Save the multiplier.
202	if (data.multipliers[magnitude] == `0`) {
203	data.multipliers[magnitude] = multiplier;
204	if (magnitude > data.largestMagnitude) {
205	data.largestMagnitude = magnitude;
206	}
207	data.isEmpty = false;
208	} else {
209	U_ASSERT(data.multipliers[magnitude] == multiplier);
210	}
211	}
212	}
213
214	///////////////////////////////////////////////////////////
215	/// END OF CompactData.java; BEGIN CompactNotation.java ///
216	///////////////////////////////////////////////////////////
217
218	CompactHandler::CompactHandler(
219	CompactStyle compactStyle,
220	const Locale &locale,
221	const char *nsName,
222	CompactType compactType,
223	const PluralRules *rules,
224	MutablePatternModifier *buildReference,
225	bool safe,
226	const MicroPropsGenerator *parent,
227	UErrorCode &status)
228	: rules(rules), parent(parent), safe(safe) {
229	data.populate(locale, nsName, compactStyle, compactType, status);
230	if (safe) {
231	// Safe code path
232	precomputeAllModifiers(*buildReference, status);
233	} else {
234	// Unsafe code path
235	// Store the MutablePatternModifier reference.
236	unsafePatternModifier = buildReference;
237	}
238	}
239
240	CompactHandler::~CompactHandler() {
241	for (int32_t i = `0`; i < precomputedModsLength; i++) {
242	delete precomputedMods [i].mod;
243	}
244	}
245
246	void CompactHandler::precomputeAllModifiers(MutablePatternModifier &buildReference, UErrorCode &status) {
247	if (U_FAILURE(status)) { return; }
248
249	// Initial capacity of 12 for 0K, 00K, 000K, ...M, ...B, and ...T
250	UVector allPatterns(`12`, status);
251	if (U_FAILURE(status)) { return; }
252	data.getUniquePatterns(allPatterns, status);
253	if (U_FAILURE(status)) { return; }
254
255	// C++ only: ensure that precomputedMods has room.
256	precomputedModsLength = allPatterns.size();
257	if (precomputedMods.getCapacity() < precomputedModsLength) {
258	precomputedMods.resize(allPatterns.size(), status);
259	if (U_FAILURE(status)) { return; }
260	}
261
262	for (int32_t i = `0`; i < precomputedModsLength; i++) {
263	auto patternString = static_cast<const UChar *>(allPatterns [i]);
264	UnicodeString hello(patternString);
265	CompactModInfo &info = precomputedMods [i];
266	ParsedPatternInfo patternInfo;
267	PatternParser::parseToPatternInfo(UnicodeString (patternString), patternInfo, status);
268	if (U_FAILURE(status)) { return; }
269	buildReference.setPatternInfo(&patternInfo, UNUM_COMPACT_FIELD);
270	info.mod = buildReference.createImmutable(status);
271	if (U_FAILURE(status)) { return; }
272	info.patternString = patternString;
273	}
274	}
275
276	void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
277	UErrorCode &status) const {
278	parent->processQuantity(quantity, micros, status);
279	if (U_FAILURE(status)) { return; }
280
281	// Treat zero, NaN, and infinity as if they had magnitude 0
282	int32_t magnitude;
283	if (quantity.isZeroish()) {
284	magnitude = `0`;
285	micros.rounder.apply(quantity, status);
286	} else {
287	// TODO: Revisit chooseMultiplierAndApply
288	int32_t multiplier = micros.rounder.chooseMultiplierAndApply(quantity, data, status);
289	magnitude = quantity.isZeroish() ? `0` : quantity.getMagnitude();
290	magnitude -= multiplier;
291	}
292
293	StandardPlural::Form plural = utils::getStandardPlural(rules, quantity);
294	const UChar *patternString = data.getPattern(magnitude, plural);
295	if (patternString == nullptr) {
296	// Use the default (non-compact) modifier.
297	// No need to take any action.
298	} else if (safe) {
299	// Safe code path.
300	// Java uses a hash set here for O(1) lookup. C++ uses a linear search.
301	// TODO: Benchmark this and maybe change to a binary search or hash table.
302	int32_t i = `0`;
303	for (; i < precomputedModsLength; i++) {
304	const CompactModInfo &info = precomputedMods [i];
305	if (u_strcmp(patternString, info.patternString) == `0`) {
306	info.mod->applyToMicros(micros, quantity, status);
307	break;
308	}
309	}
310	// It should be guaranteed that we found the entry.
311	U_ASSERT(i < precomputedModsLength);
312	} else {
313	// Unsafe code path.
314	// Overwrite the PatternInfo in the existing modMiddle.
315	// C++ Note: Use unsafePatternInfo for proper lifecycle.
316	ParsedPatternInfo &patternInfo = const_cast<CompactHandler >(this*)->unsafePatternInfo;
317	PatternParser::parseToPatternInfo(UnicodeString (patternString), patternInfo, status);
318	unsafePatternModifier->setPatternInfo(&unsafePatternInfo, UNUM_COMPACT_FIELD);
319	unsafePatternModifier->setNumberProperties(quantity.signum(), StandardPlural::Form::COUNT);
320	micros.modMiddle = unsafePatternModifier;
321	}
322
323	// We already performed rounding. Do not perform it again.
324	micros.rounder = RoundingImpl::passThrough();
325	}
326
327	#endif /* #if !UCONFIG_NO_FORMATTING */
328

Browse the source code of ClickHouse/contrib/icu/icu4c/source/i18n/number_compact.cpp