1// © 2018 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7
8// Allow implicit conversion from char16_t* to UnicodeString for this file:
9// Helpful in toString methods and elsewhere.
10#define UNISTR_FROM_STRING_EXPLICIT
11
12#include "unicode/numberrangeformatter.h"
13#include "numrange_impl.h"
14#include "patternprops.h"
15#include "uresimp.h"
16#include "util.h"
17
18using namespace icu;
19using namespace icu::number;
20using namespace icu::number::impl;
21
22namespace {
23
24// Helper function for 2-dimensional switch statement
25constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
26 return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4);
27}
28
29
30struct NumberRangeData {
31 SimpleFormatter rangePattern;
32 SimpleFormatter approximatelyPattern;
33};
34
35class NumberRangeDataSink : public ResourceSink {
36 public:
37 NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
38
39 void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
40 ResourceTable miscTable = value.getTable(status);
41 if (U_FAILURE(status)) { return; }
42 for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) {
43 if (uprv_strcmp(key, "range") == 0) {
44 if (hasRangeData()) {
45 continue; // have already seen this pattern
46 }
47 fData.rangePattern = {value.getUnicodeString(status), status};
48 } else if (uprv_strcmp(key, "approximately") == 0) {
49 if (hasApproxData()) {
50 continue; // have already seen this pattern
51 }
52 fData.approximatelyPattern = {value.getUnicodeString(status), status};
53 }
54 }
55 }
56
57 bool hasRangeData() {
58 return fData.rangePattern.getArgumentLimit() != 0;
59 }
60
61 bool hasApproxData() {
62 return fData.approximatelyPattern.getArgumentLimit() != 0;
63 }
64
65 bool isComplete() {
66 return hasRangeData() && hasApproxData();
67 }
68
69 void fillInDefaults(UErrorCode& status) {
70 if (!hasRangeData()) {
71 fData.rangePattern = {u"{0}–{1}", status};
72 }
73 if (!hasApproxData()) {
74 fData.approximatelyPattern = {u"~{0}", status};
75 }
76 }
77
78 private:
79 NumberRangeData& fData;
80};
81
82void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
83 if (U_FAILURE(status)) { return; }
84 LocalUResourceBundlePointer rb(ures_open(NULL, localeName, &status));
85 if (U_FAILURE(status)) { return; }
86 NumberRangeDataSink sink(data);
87
88 CharString dataPath;
89 dataPath.append("NumberElements/", -1, status);
90 dataPath.append(nsName, -1, status);
91 dataPath.append("/miscPatterns", -1, status);
92 if (U_FAILURE(status)) { return; }
93
94 UErrorCode localStatus = U_ZERO_ERROR;
95 ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, localStatus);
96 if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
97 status = localStatus;
98 return;
99 }
100
101 // Fall back to latn if necessary
102 if (!sink.isComplete()) {
103 ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status);
104 }
105
106 sink.fillInDefaults(status);
107}
108
109class PluralRangesDataSink : public ResourceSink {
110 public:
111 PluralRangesDataSink(StandardPluralRanges& output) : fOutput(output) {}
112
113 void put(const char* /*key*/, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
114 ResourceArray entriesArray = value.getArray(status);
115 if (U_FAILURE(status)) { return; }
116 fOutput.setCapacity(entriesArray.getSize());
117 for (int i = 0; entriesArray.getValue(i, value); i++) {
118 ResourceArray pluralFormsArray = value.getArray(status);
119 if (U_FAILURE(status)) { return; }
120 pluralFormsArray.getValue(0, value);
121 StandardPlural::Form first = StandardPlural::fromString(value.getUnicodeString(status), status);
122 if (U_FAILURE(status)) { return; }
123 pluralFormsArray.getValue(1, value);
124 StandardPlural::Form second = StandardPlural::fromString(value.getUnicodeString(status), status);
125 if (U_FAILURE(status)) { return; }
126 pluralFormsArray.getValue(2, value);
127 StandardPlural::Form result = StandardPlural::fromString(value.getUnicodeString(status), status);
128 if (U_FAILURE(status)) { return; }
129 fOutput.addPluralRange(first, second, result);
130 }
131 }
132
133 private:
134 StandardPluralRanges& fOutput;
135};
136
137void getPluralRangesData(const Locale& locale, StandardPluralRanges& output, UErrorCode& status) {
138 if (U_FAILURE(status)) { return; }
139 LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "pluralRanges", &status));
140 if (U_FAILURE(status)) { return; }
141
142 CharString dataPath;
143 dataPath.append("locales/", -1, status);
144 dataPath.append(locale.getLanguage(), -1, status);
145 if (U_FAILURE(status)) { return; }
146 int32_t setLen;
147 // Not all languages are covered: fail gracefully
148 UErrorCode internalStatus = U_ZERO_ERROR;
149 const UChar* set = ures_getStringByKeyWithFallback(rb.getAlias(), dataPath.data(), &setLen, &internalStatus);
150 if (U_FAILURE(internalStatus)) { return; }
151
152 dataPath.clear();
153 dataPath.append("rules/", -1, status);
154 dataPath.appendInvariantChars(set, setLen, status);
155 if (U_FAILURE(status)) { return; }
156 PluralRangesDataSink sink(output);
157 ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status);
158 if (U_FAILURE(status)) { return; }
159}
160
161} // namespace
162
163
164void StandardPluralRanges::initialize(const Locale& locale, UErrorCode& status) {
165 getPluralRangesData(locale, *this, status);
166}
167
168void StandardPluralRanges::addPluralRange(
169 StandardPlural::Form first,
170 StandardPlural::Form second,
171 StandardPlural::Form result) {
172 U_ASSERT(fTriplesLen < fTriples.getCapacity());
173 fTriples[fTriplesLen] = {first, second, result};
174 fTriplesLen++;
175}
176
177void StandardPluralRanges::setCapacity(int32_t length) {
178 if (length > fTriples.getCapacity()) {
179 fTriples.resize(length, 0);
180 }
181}
182
183StandardPlural::Form
184StandardPluralRanges::resolve(StandardPlural::Form first, StandardPlural::Form second) const {
185 for (int32_t i=0; i<fTriplesLen; i++) {
186 const auto& triple = fTriples[i];
187 if (triple.first == first && triple.second == second) {
188 return triple.result;
189 }
190 }
191 // Default fallback
192 return StandardPlural::OTHER;
193}
194
195
196NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
197 : formatterImpl1(macros.formatter1.fMacros, status),
198 formatterImpl2(macros.formatter2.fMacros, status),
199 fSameFormatters(macros.singleFormatter),
200 fCollapse(macros.collapse),
201 fIdentityFallback(macros.identityFallback) {
202
203 const char* nsName = formatterImpl1.getRawMicroProps().nsName;
204 if (uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != 0) {
205 status = U_ILLEGAL_ARGUMENT_ERROR;
206 return;
207 }
208
209 NumberRangeData data;
210 getNumberRangeData(macros.locale.getName(), nsName, data, status);
211 if (U_FAILURE(status)) { return; }
212 fRangeFormatter = data.rangePattern;
213 fApproximatelyModifier = {data.approximatelyPattern, kUndefinedField, false};
214
215 // TODO: Get locale from PluralRules instead?
216 fPluralRanges.initialize(macros.locale, status);
217 if (U_FAILURE(status)) { return; }
218}
219
220void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
221 if (U_FAILURE(status)) {
222 return;
223 }
224
225 MicroProps micros1;
226 MicroProps micros2;
227 formatterImpl1.preProcess(data.quantity1, micros1, status);
228 if (fSameFormatters) {
229 formatterImpl1.preProcess(data.quantity2, micros2, status);
230 } else {
231 formatterImpl2.preProcess(data.quantity2, micros2, status);
232 }
233 if (U_FAILURE(status)) {
234 return;
235 }
236
237 // If any of the affixes are different, an identity is not possible
238 // and we must use formatRange().
239 // TODO: Write this as MicroProps operator==() ?
240 // TODO: Avoid the redundancy of these equality operations with the
241 // ones in formatRange?
242 if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
243 || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
244 || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
245 formatRange(data, micros1, micros2, status);
246 data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
247 return;
248 }
249
250 // Check for identity
251 if (equalBeforeRounding) {
252 data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
253 } else if (data.quantity1 == data.quantity2) {
254 data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
255 } else {
256 data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
257 }
258
259 switch (identity2d(fIdentityFallback, data.identityResult)) {
260 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
261 UNUM_IDENTITY_RESULT_NOT_EQUAL):
262 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
263 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
264 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
265 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
266 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
267 UNUM_IDENTITY_RESULT_NOT_EQUAL):
268 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
269 UNUM_IDENTITY_RESULT_NOT_EQUAL):
270 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
271 UNUM_IDENTITY_RESULT_NOT_EQUAL):
272 formatRange(data, micros1, micros2, status);
273 break;
274
275 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
276 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
277 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
278 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
279 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
280 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
281 formatApproximately(data, micros1, micros2, status);
282 break;
283
284 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
285 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
286 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
287 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
288 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
289 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
290 formatSingleValue(data, micros1, micros2, status);
291 break;
292
293 default:
294 UPRV_UNREACHABLE;
295 }
296}
297
298
299void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
300 MicroProps& micros1, MicroProps& micros2,
301 UErrorCode& status) const {
302 if (U_FAILURE(status)) { return; }
303 if (fSameFormatters) {
304 int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), 0, status);
305 NumberFormatterImpl::writeAffixes(micros1, data.getStringRef(), 0, length, status);
306 } else {
307 formatRange(data, micros1, micros2, status);
308 }
309}
310
311
312void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
313 MicroProps& micros1, MicroProps& micros2,
314 UErrorCode& status) const {
315 if (U_FAILURE(status)) { return; }
316 if (fSameFormatters) {
317 int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), 0, status);
318 // HEURISTIC: Desired modifier order: inner, middle, approximately, outer.
319 length += micros1.modInner->apply(data.getStringRef(), 0, length, status);
320 length += micros1.modMiddle->apply(data.getStringRef(), 0, length, status);
321 length += fApproximatelyModifier.apply(data.getStringRef(), 0, length, status);
322 micros1.modOuter->apply(data.getStringRef(), 0, length, status);
323 } else {
324 formatRange(data, micros1, micros2, status);
325 }
326}
327
328
329void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
330 MicroProps& micros1, MicroProps& micros2,
331 UErrorCode& status) const {
332 if (U_FAILURE(status)) { return; }
333
334 // modInner is always notation (scientific); collapsable in ALL.
335 // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
336 // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
337 // Never collapse an outer mod but not an inner mod.
338 bool collapseOuter, collapseMiddle, collapseInner;
339 switch (fCollapse) {
340 case UNUM_RANGE_COLLAPSE_ALL:
341 case UNUM_RANGE_COLLAPSE_AUTO:
342 case UNUM_RANGE_COLLAPSE_UNIT:
343 {
344 // OUTER MODIFIER
345 collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
346
347 if (!collapseOuter) {
348 // Never collapse inner mods if outer mods are not collapsable
349 collapseMiddle = false;
350 collapseInner = false;
351 break;
352 }
353
354 // MIDDLE MODIFIER
355 collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
356
357 if (!collapseMiddle) {
358 // Never collapse inner mods if outer mods are not collapsable
359 collapseInner = false;
360 break;
361 }
362
363 // MIDDLE MODIFIER HEURISTICS
364 // (could disable collapsing of the middle modifier)
365 // The modifiers are equal by this point, so we can look at just one of them.
366 const Modifier* mm = micros1.modMiddle;
367 if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
368 // Only collapse if the modifier is a unit.
369 // TODO: Make a better way to check for a unit?
370 // TODO: Handle case where the modifier has both notation and unit (compact currency)?
371 if (!mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD})
372 && !mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD})) {
373 collapseMiddle = false;
374 }
375 } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
376 // Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
377 if (mm->getCodePointCount() <= 1) {
378 collapseMiddle = false;
379 }
380 }
381
382 if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
383 collapseInner = false;
384 break;
385 }
386
387 // INNER MODIFIER
388 collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
389
390 // All done checking for collapsability.
391 break;
392 }
393
394 default:
395 collapseOuter = false;
396 collapseMiddle = false;
397 collapseInner = false;
398 break;
399 }
400
401 FormattedStringBuilder& string = data.getStringRef();
402 int32_t lengthPrefix = 0;
403 int32_t length1 = 0;
404 int32_t lengthInfix = 0;
405 int32_t length2 = 0;
406 int32_t lengthSuffix = 0;
407
408 // Use #define so that these are evaluated at the call site.
409 #define UPRV_INDEX_0 (lengthPrefix)
410 #define UPRV_INDEX_1 (lengthPrefix + length1)
411 #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
412 #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
413
414 int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
415 fRangeFormatter,
416 string,
417 0,
418 &lengthPrefix,
419 &lengthSuffix,
420 kUndefinedField,
421 status);
422 if (U_FAILURE(status)) { return; }
423 lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
424 U_ASSERT(lengthInfix > 0);
425
426 // SPACING HEURISTIC
427 // Add spacing unless all modifiers are collapsed.
428 // TODO: add API to control this?
429 // TODO: Use a data-driven heuristic like currency spacing?
430 // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
431 {
432 bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0;
433 bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0;
434 bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0;
435 if (repeatInner || repeatMiddle || repeatOuter) {
436 // Add spacing if there is not already spacing
437 if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
438 lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', kUndefinedField, status);
439 }
440 if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) {
441 lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', kUndefinedField, status);
442 }
443 }
444 }
445
446 length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status);
447 length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_2, status);
448
449 // TODO: Support padding?
450
451 if (collapseInner) {
452 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
453 const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner);
454 lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
455 } else {
456 length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
457 length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
458 }
459
460 if (collapseMiddle) {
461 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
462 const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle);
463 lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
464 } else {
465 length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
466 length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
467 }
468
469 if (collapseOuter) {
470 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
471 const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter);
472 lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
473 } else {
474 length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
475 length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
476 }
477}
478
479
480const Modifier&
481NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
482 Modifier::Parameters parameters;
483 first.getParameters(parameters);
484 if (parameters.obj == nullptr) {
485 // No plural form; return a fallback (e.g., the first)
486 return first;
487 }
488 StandardPlural::Form firstPlural = parameters.plural;
489
490 second.getParameters(parameters);
491 if (parameters.obj == nullptr) {
492 // No plural form; return a fallback (e.g., the first)
493 return first;
494 }
495 StandardPlural::Form secondPlural = parameters.plural;
496
497 // Get the required plural form from data
498 StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
499
500 // Get and return the new Modifier
501 const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
502 U_ASSERT(mod != nullptr);
503 return *mod;
504}
505
506
507
508#endif /* #if !UCONFIG_NO_FORMATTING */
509