numrange_impl.cpp source code [ClickHouse/contrib/icu/icu4c/source/i18n/numrange_impl.cpp]

1	// © 2018 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3
4	#include "unicode/utypes.h"
5
6	#if !UCONFIG_NO_FORMATTING
7
8	// Allow implicit conversion from char16_t to UnicodeString for this file:*
9	// Helpful in toString methods and elsewhere.
10	#define UNISTR_FROM_STRING_EXPLICIT
11
12	#include "unicode/numberrangeformatter.h"
13	#include "numrange_impl.h"
14	#include "patternprops.h"
15	#include "uresimp.h"
16	#include "util.h"
17
18	using namespace icu;
19	using namespace icu::number;
20	using namespace icu::number::impl;
21
22	namespace {
23
24	// Helper function for 2-dimensional switch statement
25	constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
26	return static_cast<int8_t>(a) \| (static_cast<int8_t>(b) << `4`);
27	}
28
29
30	struct NumberRangeData {
31	SimpleFormatter rangePattern;
32	SimpleFormatter approximatelyPattern;
33	};
34
35	class NumberRangeDataSink : public ResourceSink {
36	public:
37	NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
38
39	void put(const char* key, ResourceValue& value, UBool /noFallback/, UErrorCode& status) U_OVERRIDE {
40	ResourceTable miscTable = value.getTable(status);
41	if (U_FAILURE(status)) { return; }
42	for (int i = `0`; miscTable.getKeyAndValue(i, key, value); i++) {
43	if (uprv_strcmp(key, "range") == `0`) {
44	if (hasRangeData()) {
45	continue; // have already seen this pattern
46	}
47	fData.rangePattern = {value.getUnicodeString(status), status};
48	} else if (uprv_strcmp(key, "approximately") == `0`) {
49	if (hasApproxData()) {
50	continue; // have already seen this pattern
51	}
52	fData.approximatelyPattern = {value.getUnicodeString(status), status};
53	}
54	}
55	}
56
57	bool hasRangeData() {
58	return fData.rangePattern.getArgumentLimit() != `0`;
59	}
60
61	bool hasApproxData() {
62	return fData.approximatelyPattern.getArgumentLimit() != `0`;
63	}
64
65	bool isComplete() {
66	return hasRangeData() && hasApproxData();
67	}
68
69	void fillInDefaults(UErrorCode& status) {
70	if (!hasRangeData()) {
71	fData.rangePattern = {u"{0}–{1}", status};
72	}
73	if (!hasApproxData()) {
74	fData.approximatelyPattern = {u"~{0}", status};
75	}
76	}
77
78	private:
79	NumberRangeData& fData;
80	};
81
82	void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
83	if (U_FAILURE(status)) { return; }
84	LocalUResourceBundlePointer rb(ures_open(NULL, localeName, &status));
85	if (U_FAILURE(status)) { return; }
86	NumberRangeDataSink sink(data);
87
88	CharString dataPath;
89	dataPath.append("NumberElements/", -`1`, status);
90	dataPath.append(nsName, -`1`, status);
91	dataPath.append("/miscPatterns", -`1`, status);
92	if (U_FAILURE(status)) { return; }
93
94	UErrorCode localStatus = U_ZERO_ERROR;
95	ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, localStatus);
96	if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
97	status = localStatus;
98	return;
99	}
100
101	// Fall back to latn if necessary
102	if (!sink.isComplete()) {
103	ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status);
104	}
105
106	sink.fillInDefaults(status);
107	}
108
109	class PluralRangesDataSink : public ResourceSink {
110	public:
111	PluralRangesDataSink(StandardPluralRanges& output) : fOutput(output) {}
112
113	void put(const char* /key/, ResourceValue& value, UBool /noFallback/, UErrorCode& status) U_OVERRIDE {
114	ResourceArray entriesArray = value.getArray(status);
115	if (U_FAILURE(status)) { return; }
116	fOutput.setCapacity(entriesArray.getSize());
117	for (int i = `0`; entriesArray.getValue(i, value); i++) {
118	ResourceArray pluralFormsArray = value.getArray(status);
119	if (U_FAILURE(status)) { return; }
120	pluralFormsArray.getValue(`0`, value);
121	StandardPlural::Form first = StandardPlural::fromString(value.getUnicodeString(status), status);
122	if (U_FAILURE(status)) { return; }
123	pluralFormsArray.getValue(`1`, value);
124	StandardPlural::Form second = StandardPlural::fromString(value.getUnicodeString(status), status);
125	if (U_FAILURE(status)) { return; }
126	pluralFormsArray.getValue(`2`, value);
127	StandardPlural::Form result = StandardPlural::fromString(value.getUnicodeString(status), status);
128	if (U_FAILURE(status)) { return; }
129	fOutput.addPluralRange(first, second, result);
130	}
131	}
132
133	private:
134	StandardPluralRanges& fOutput;
135	};
136
137	void getPluralRangesData(const Locale& locale, StandardPluralRanges& output, UErrorCode& status) {
138	if (U_FAILURE(status)) { return; }
139	LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "pluralRanges", &status));
140	if (U_FAILURE(status)) { return; }
141
142	CharString dataPath;
143	dataPath.append("locales/", -`1`, status);
144	dataPath.append(locale.getLanguage(), -`1`, status);
145	if (U_FAILURE(status)) { return; }
146	int32_t setLen;
147	// Not all languages are covered: fail gracefully
148	UErrorCode internalStatus = U_ZERO_ERROR;
149	const UChar* set = ures_getStringByKeyWithFallback(rb.getAlias(), dataPath.data(), &setLen, &internalStatus);
150	if (U_FAILURE(internalStatus)) { return; }
151
152	dataPath.clear();
153	dataPath.append("rules/", -`1`, status);
154	dataPath.appendInvariantChars(set, setLen, status);
155	if (U_FAILURE(status)) { return; }
156	PluralRangesDataSink sink(output);
157	ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status);
158	if (U_FAILURE(status)) { return; }
159	}
160
161	} // namespace
162
163
164	void StandardPluralRanges::initialize(const Locale& locale, UErrorCode& status) {
165	getPluralRangesData(locale, *this, status);
166	}
167
168	void StandardPluralRanges::addPluralRange(
169	StandardPlural::Form first,
170	StandardPlural::Form second,
171	StandardPlural::Form result) {
172	U_ASSERT(fTriplesLen < fTriples.getCapacity());
173	fTriples [fTriplesLen] = {first, second, result};
174	fTriplesLen++;
175	}
176
177	void StandardPluralRanges::setCapacity(int32_t length) {
178	if (length > fTriples.getCapacity()) {
179	fTriples.resize(length, `0`);
180	}
181	}
182
183	StandardPlural::Form
184	StandardPluralRanges::resolve(StandardPlural::Form first, StandardPlural::Form second) const {
185	for (int32_t i=`0`; i<fTriplesLen; i++) {
186	const auto& triple = fTriples [i];
187	if (triple.first == first && triple.second == second) {
188	return triple.result;
189	}
190	}
191	// Default fallback
192	return StandardPlural::OTHER;
193	}
194
195
196	NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
197	: formatterImpl1 (macros.formatter1.fMacros, status),
198	formatterImpl2 (macros.formatter2.fMacros, status),
199	fSameFormatters(macros.singleFormatter),
200	fCollapse(macros.collapse),
201	fIdentityFallback(macros.identityFallback) {
202
203	const char* nsName = formatterImpl1.getRawMicroProps().nsName;
204	if (uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != `0`) {
205	status = U_ILLEGAL_ARGUMENT_ERROR;
206	return;
207	}
208
209	NumberRangeData data;
210	getNumberRangeData(macros.locale.getName(), nsName, data, status);
211	if (U_FAILURE(status)) { return; }
212	fRangeFormatter = data.rangePattern;
213	fApproximatelyModifier = {data.approximatelyPattern, UNUM_FIELD_COUNT, false};
214
215	// TODO: Get locale from PluralRules instead?
216	fPluralRanges.initialize(macros.locale, status);
217	if (U_FAILURE(status)) { return; }
218	}
219
220	void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
221	if (U_FAILURE(status)) {
222	return;
223	}
224
225	MicroProps micros1;
226	MicroProps micros2;
227	formatterImpl1.preProcess(data.quantity1, micros1, status);
228	if (fSameFormatters) {
229	formatterImpl1.preProcess(data.quantity2, micros2, status);
230	} else {
231	formatterImpl2.preProcess(data.quantity2, micros2, status);
232	}
233	if (U_FAILURE(status)) {
234	return;
235	}
236
237	// If any of the affixes are different, an identity is not possible
238	// and we must use formatRange().
239	// TODO: Write this as MicroProps operator==() ?
240	// TODO: Avoid the redundancy of these equality operations with the
241	// ones in formatRange?
242	if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
243	\|\| !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
244	\|\| !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
245	formatRange(data, micros1, micros2, status);
246	data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
247	return;
248	}
249
250	// Check for identity
251	if (equalBeforeRounding) {
252	data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
253	} else if (data.quantity1 == data.quantity2) {
254	data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
255	} else {
256	data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
257	}
258
259	switch (identity2d(fIdentityFallback, data.identityResult)) {
260	case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
261	UNUM_IDENTITY_RESULT_NOT_EQUAL):
262	case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
263	UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
264	case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
265	UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
266	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
267	UNUM_IDENTITY_RESULT_NOT_EQUAL):
268	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
269	UNUM_IDENTITY_RESULT_NOT_EQUAL):
270	case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
271	UNUM_IDENTITY_RESULT_NOT_EQUAL):
272	formatRange(data, micros1, micros2, status);
273	break;
274
275	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
276	UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
277	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
278	UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
279	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
280	UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
281	formatApproximately(data, micros1, micros2, status);
282	break;
283
284	case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
285	UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
286	case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
287	UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
288	case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
289	UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
290	formatSingleValue(data, micros1, micros2, status);
291	break;
292
293	default:
294	UPRV_UNREACHABLE;
295	}
296	}
297
298
299	void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
300	MicroProps& micros1, MicroProps& micros2,
301	UErrorCode& status) const {
302	if (U_FAILURE(status)) { return; }
303	if (fSameFormatters) {
304	int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), `0`, status);
305	NumberFormatterImpl::writeAffixes(micros1, data.getStringRef(), `0`, length, status);
306	} else {
307	formatRange(data, micros1, micros2, status);
308	}
309	}
310
311
312	void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
313	MicroProps& micros1, MicroProps& micros2,
314	UErrorCode& status) const {
315	if (U_FAILURE(status)) { return; }
316	if (fSameFormatters) {
317	int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), `0`, status);
318	// HEURISTIC: Desired modifier order: inner, middle, approximately, outer.
319	length += micros1.modInner->apply(data.getStringRef(), `0`, length, status);
320	length += micros1.modMiddle->apply(data.getStringRef(), `0`, length, status);
321	length += fApproximatelyModifier.apply(data.getStringRef(), `0`, length, status);
322	micros1.modOuter->apply(data.getStringRef(), `0`, length, status);
323	} else {
324	formatRange(data, micros1, micros2, status);
325	}
326	}
327
328
329	void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
330	MicroProps& micros1, MicroProps& micros2,
331	UErrorCode& status) const {
332	if (U_FAILURE(status)) { return; }
333
334	// modInner is always notation (scientific); collapsable in ALL.
335	// modOuter is always units; collapsable in ALL, AUTO, and UNIT.
336	// modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
337	// Never collapse an outer mod but not an inner mod.
338	bool collapseOuter, collapseMiddle, collapseInner;
339	switch (fCollapse) {
340	case UNUM_RANGE_COLLAPSE_ALL:
341	case UNUM_RANGE_COLLAPSE_AUTO:
342	case UNUM_RANGE_COLLAPSE_UNIT:
343	{
344	// OUTER MODIFIER
345	collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
346
347	if (!collapseOuter) {
348	// Never collapse inner mods if outer mods are not collapsable
349	collapseMiddle = false;
350	collapseInner = false;
351	break;
352	}
353
354	// MIDDLE MODIFIER
355	collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
356
357	if (!collapseMiddle) {
358	// Never collapse inner mods if outer mods are not collapsable
359	collapseInner = false;
360	break;
361	}
362
363	// MIDDLE MODIFIER HEURISTICS
364	// (could disable collapsing of the middle modifier)
365	// The modifiers are equal by this point, so we can look at just one of them.
366	const Modifier* mm = micros1.modMiddle;
367	if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
368	// Only collapse if the modifier is a unit.
369	// TODO: Make a better way to check for a unit?
370	// TODO: Handle case where the modifier has both notation and unit (compact currency)?
371	if (!mm->containsField(UNUM_CURRENCY_FIELD) && !mm->containsField(UNUM_PERCENT_FIELD)) {
372	collapseMiddle = false;
373	}
374	} else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
375	// Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
376	if (mm->getCodePointCount() <= `1`) {
377	collapseMiddle = false;
378	}
379	}
380
381	if (!collapseMiddle \|\| fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
382	collapseInner = false;
383	break;
384	}
385
386	// INNER MODIFIER
387	collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
388
389	// All done checking for collapsability.
390	break;
391	}
392
393	default:
394	collapseOuter = false;
395	collapseMiddle = false;
396	collapseInner = false;
397	break;
398	}
399
400	FormattedStringBuilder& string = data.getStringRef();
401	int32_t lengthPrefix = `0`;
402	int32_t length1 = `0`;
403	int32_t lengthInfix = `0`;
404	int32_t length2 = `0`;
405	int32_t lengthSuffix = `0`;
406
407	// Use #define so that these are evaluated at the call site.
408	#define UPRV_INDEX_0 (lengthPrefix)
409	#define UPRV_INDEX_1 (lengthPrefix + length1)
410	#define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
411	#define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
412
413	int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
414	fRangeFormatter,
415	string,
416	`0`,
417	&lengthPrefix,
418	&lengthSuffix,
419	UNUM_FIELD_COUNT,
420	status);
421	if (U_FAILURE(status)) { return; }
422	lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
423	U_ASSERT(lengthInfix > `0`);
424
425	// SPACING HEURISTIC
426	// Add spacing unless all modifiers are collapsed.
427	// TODO: add API to control this?
428	// TODO: Use a data-driven heuristic like currency spacing?
429	// TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
430	{
431	bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > `0`;
432	bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > `0`;
433	bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > `0`;
434	if (repeatInner \|\| repeatMiddle \|\| repeatOuter) {
435	// Add spacing if there is not already spacing
436	if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
437	lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u`'\u0020'`, UNUM_FIELD_COUNT, status);
438	}
439	if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - `1`))) {
440	lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u`'\u0020'`, UNUM_FIELD_COUNT, status);
441	}
442	}
443	}
444
445	length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status);
446	length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_2, status);
447
448	// TODO: Support padding?
449
450	if (collapseInner) {
451	// Note: this is actually a mix of prefix and suffix, but adding to infix length works
452	const Modifier& mod = resolveModifierPlurals(micros1.modInner, micros2.modInner);
453	lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
454	} else {
455	length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
456	length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
457	}
458
459	if (collapseMiddle) {
460	// Note: this is actually a mix of prefix and suffix, but adding to infix length works
461	const Modifier& mod = resolveModifierPlurals(micros1.modMiddle, micros2.modMiddle);
462	lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
463	} else {
464	length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
465	length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
466	}
467
468	if (collapseOuter) {
469	// Note: this is actually a mix of prefix and suffix, but adding to infix length works
470	const Modifier& mod = resolveModifierPlurals(micros1.modOuter, micros2.modOuter);
471	lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
472	} else {
473	length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
474	length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
475	}
476	}
477
478
479	const Modifier&
480	NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
481	Modifier::Parameters parameters;
482	first.getParameters(parameters);
483	if (parameters.obj == nullptr) {
484	// No plural form; return a fallback (e.g., the first)
485	return first;
486	}
487	StandardPlural::Form firstPlural = parameters.plural;
488
489	second.getParameters(parameters);
490	if (parameters.obj == nullptr) {
491	// No plural form; return a fallback (e.g., the first)
492	return first;
493	}
494	StandardPlural::Form secondPlural = parameters.plural;
495
496	// Get the required plural form from data
497	StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
498
499	// Get and return the new Modifier
500	const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
501	U_ASSERT(mod != nullptr);
502	return *mod;
503	}
504
505
506
507	#endif /* #if !UCONFIG_NO_FORMATTING */
508

Browse the source code of ClickHouse/contrib/icu/icu4c/source/i18n/numrange_impl.cpp