1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * Copyright (C) 2009-2015, International Business Machines Corporation and |
6 | * others. All Rights Reserved. |
7 | ******************************************************************************* |
8 | * |
9 | * File PLURFMT.CPP |
10 | ******************************************************************************* |
11 | */ |
12 | |
13 | #include "unicode/decimfmt.h" |
14 | #include "unicode/messagepattern.h" |
15 | #include "unicode/plurfmt.h" |
16 | #include "unicode/plurrule.h" |
17 | #include "unicode/utypes.h" |
18 | #include "cmemory.h" |
19 | #include "messageimpl.h" |
20 | #include "nfrule.h" |
21 | #include "plurrule_impl.h" |
22 | #include "uassert.h" |
23 | #include "uhash.h" |
24 | #include "number_decimalquantity.h" |
25 | #include "number_utils.h" |
26 | #include "number_utypes.h" |
27 | |
28 | #if !UCONFIG_NO_FORMATTING |
29 | |
30 | U_NAMESPACE_BEGIN |
31 | |
32 | using number::impl::DecimalQuantity; |
33 | |
34 | static const UChar OTHER_STRING[] = { |
35 | 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" |
36 | }; |
37 | |
38 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat) |
39 | |
40 | PluralFormat::PluralFormat(UErrorCode& status) |
41 | : locale(Locale::getDefault()), |
42 | msgPattern(status), |
43 | numberFormat(NULL), |
44 | offset(0) { |
45 | init(NULL, UPLURAL_TYPE_CARDINAL, status); |
46 | } |
47 | |
48 | PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status) |
49 | : locale(loc), |
50 | msgPattern(status), |
51 | numberFormat(NULL), |
52 | offset(0) { |
53 | init(NULL, UPLURAL_TYPE_CARDINAL, status); |
54 | } |
55 | |
56 | PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status) |
57 | : locale(Locale::getDefault()), |
58 | msgPattern(status), |
59 | numberFormat(NULL), |
60 | offset(0) { |
61 | init(&rules, UPLURAL_TYPE_COUNT, status); |
62 | } |
63 | |
64 | PluralFormat::PluralFormat(const Locale& loc, |
65 | const PluralRules& rules, |
66 | UErrorCode& status) |
67 | : locale(loc), |
68 | msgPattern(status), |
69 | numberFormat(NULL), |
70 | offset(0) { |
71 | init(&rules, UPLURAL_TYPE_COUNT, status); |
72 | } |
73 | |
74 | PluralFormat::PluralFormat(const Locale& loc, |
75 | UPluralType type, |
76 | UErrorCode& status) |
77 | : locale(loc), |
78 | msgPattern(status), |
79 | numberFormat(NULL), |
80 | offset(0) { |
81 | init(NULL, type, status); |
82 | } |
83 | |
84 | PluralFormat::PluralFormat(const UnicodeString& pat, |
85 | UErrorCode& status) |
86 | : locale(Locale::getDefault()), |
87 | msgPattern(status), |
88 | numberFormat(NULL), |
89 | offset(0) { |
90 | init(NULL, UPLURAL_TYPE_CARDINAL, status); |
91 | applyPattern(pat, status); |
92 | } |
93 | |
94 | PluralFormat::PluralFormat(const Locale& loc, |
95 | const UnicodeString& pat, |
96 | UErrorCode& status) |
97 | : locale(loc), |
98 | msgPattern(status), |
99 | numberFormat(NULL), |
100 | offset(0) { |
101 | init(NULL, UPLURAL_TYPE_CARDINAL, status); |
102 | applyPattern(pat, status); |
103 | } |
104 | |
105 | PluralFormat::PluralFormat(const PluralRules& rules, |
106 | const UnicodeString& pat, |
107 | UErrorCode& status) |
108 | : locale(Locale::getDefault()), |
109 | msgPattern(status), |
110 | numberFormat(NULL), |
111 | offset(0) { |
112 | init(&rules, UPLURAL_TYPE_COUNT, status); |
113 | applyPattern(pat, status); |
114 | } |
115 | |
116 | PluralFormat::PluralFormat(const Locale& loc, |
117 | const PluralRules& rules, |
118 | const UnicodeString& pat, |
119 | UErrorCode& status) |
120 | : locale(loc), |
121 | msgPattern(status), |
122 | numberFormat(NULL), |
123 | offset(0) { |
124 | init(&rules, UPLURAL_TYPE_COUNT, status); |
125 | applyPattern(pat, status); |
126 | } |
127 | |
128 | PluralFormat::PluralFormat(const Locale& loc, |
129 | UPluralType type, |
130 | const UnicodeString& pat, |
131 | UErrorCode& status) |
132 | : locale(loc), |
133 | msgPattern(status), |
134 | numberFormat(NULL), |
135 | offset(0) { |
136 | init(NULL, type, status); |
137 | applyPattern(pat, status); |
138 | } |
139 | |
140 | PluralFormat::PluralFormat(const PluralFormat& other) |
141 | : Format(other), |
142 | locale(other.locale), |
143 | msgPattern(other.msgPattern), |
144 | numberFormat(NULL), |
145 | offset(other.offset) { |
146 | copyObjects(other); |
147 | } |
148 | |
149 | void |
150 | PluralFormat::copyObjects(const PluralFormat& other) { |
151 | UErrorCode status = U_ZERO_ERROR; |
152 | if (numberFormat != NULL) { |
153 | delete numberFormat; |
154 | } |
155 | if (pluralRulesWrapper.pluralRules != NULL) { |
156 | delete pluralRulesWrapper.pluralRules; |
157 | } |
158 | |
159 | if (other.numberFormat == NULL) { |
160 | numberFormat = NumberFormat::createInstance(locale, status); |
161 | } else { |
162 | numberFormat = other.numberFormat->clone(); |
163 | } |
164 | if (other.pluralRulesWrapper.pluralRules == NULL) { |
165 | pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status); |
166 | } else { |
167 | pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone(); |
168 | } |
169 | } |
170 | |
171 | |
172 | PluralFormat::~PluralFormat() { |
173 | delete numberFormat; |
174 | } |
175 | |
176 | void |
177 | PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) { |
178 | if (U_FAILURE(status)) { |
179 | return; |
180 | } |
181 | |
182 | if (rules==NULL) { |
183 | pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status); |
184 | } else { |
185 | pluralRulesWrapper.pluralRules = rules->clone(); |
186 | if (pluralRulesWrapper.pluralRules == NULL) { |
187 | status = U_MEMORY_ALLOCATION_ERROR; |
188 | return; |
189 | } |
190 | } |
191 | |
192 | numberFormat= NumberFormat::createInstance(locale, status); |
193 | } |
194 | |
195 | void |
196 | PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { |
197 | msgPattern.parsePluralStyle(newPattern, NULL, status); |
198 | if (U_FAILURE(status)) { |
199 | msgPattern.clear(); |
200 | offset = 0; |
201 | return; |
202 | } |
203 | offset = msgPattern.getPluralOffset(0); |
204 | } |
205 | |
206 | UnicodeString& |
207 | PluralFormat::format(const Formattable& obj, |
208 | UnicodeString& appendTo, |
209 | FieldPosition& pos, |
210 | UErrorCode& status) const |
211 | { |
212 | if (U_FAILURE(status)) return appendTo; |
213 | |
214 | if (obj.isNumeric()) { |
215 | return format(obj, obj.getDouble(), appendTo, pos, status); |
216 | } else { |
217 | status = U_ILLEGAL_ARGUMENT_ERROR; |
218 | return appendTo; |
219 | } |
220 | } |
221 | |
222 | UnicodeString |
223 | PluralFormat::format(int32_t number, UErrorCode& status) const { |
224 | FieldPosition fpos(FieldPosition::DONT_CARE); |
225 | UnicodeString result; |
226 | return format(Formattable(number), number, result, fpos, status); |
227 | } |
228 | |
229 | UnicodeString |
230 | PluralFormat::format(double number, UErrorCode& status) const { |
231 | FieldPosition fpos(FieldPosition::DONT_CARE); |
232 | UnicodeString result; |
233 | return format(Formattable(number), number, result, fpos, status); |
234 | } |
235 | |
236 | |
237 | UnicodeString& |
238 | PluralFormat::format(int32_t number, |
239 | UnicodeString& appendTo, |
240 | FieldPosition& pos, |
241 | UErrorCode& status) const { |
242 | return format(Formattable(number), (double)number, appendTo, pos, status); |
243 | } |
244 | |
245 | UnicodeString& |
246 | PluralFormat::format(double number, |
247 | UnicodeString& appendTo, |
248 | FieldPosition& pos, |
249 | UErrorCode& status) const { |
250 | return format(Formattable(number), (double)number, appendTo, pos, status); |
251 | } |
252 | |
253 | UnicodeString& |
254 | PluralFormat::format(const Formattable& numberObject, double number, |
255 | UnicodeString& appendTo, |
256 | FieldPosition& pos, |
257 | UErrorCode& status) const { |
258 | if (U_FAILURE(status)) { |
259 | return appendTo; |
260 | } |
261 | if (msgPattern.countParts() == 0) { |
262 | return numberFormat->format(numberObject, appendTo, pos, status); |
263 | } |
264 | |
265 | // Get the appropriate sub-message. |
266 | // Select it based on the formatted number-offset. |
267 | double numberMinusOffset = number - offset; |
268 | // Call NumberFormatter to get both the DecimalQuantity and the string. |
269 | // This call site needs to use more internal APIs than the Java equivalent. |
270 | number::impl::UFormattedNumberData data; |
271 | if (offset == 0) { |
272 | // could be BigDecimal etc. |
273 | numberObject.populateDecimalQuantity(data.quantity, status); |
274 | } else { |
275 | data.quantity.setToDouble(numberMinusOffset); |
276 | } |
277 | UnicodeString numberString; |
278 | auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat); |
279 | if(decFmt != nullptr) { |
280 | const number::LocalizedNumberFormatter* lnf = decFmt->toNumberFormatter(status); |
281 | if (U_FAILURE(status)) { |
282 | return appendTo; |
283 | } |
284 | lnf->formatImpl(&data, status); // mutates &data |
285 | if (U_FAILURE(status)) { |
286 | return appendTo; |
287 | } |
288 | numberString = data.getStringRef().toUnicodeString(); |
289 | } else { |
290 | if (offset == 0) { |
291 | numberFormat->format(numberObject, numberString, status); |
292 | } else { |
293 | numberFormat->format(numberMinusOffset, numberString, status); |
294 | } |
295 | } |
296 | |
297 | int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status); |
298 | if (U_FAILURE(status)) { return appendTo; } |
299 | // Replace syntactic # signs in the top level of this sub-message |
300 | // (not in nested arguments) with the formatted number-offset. |
301 | const UnicodeString& pattern = msgPattern.getPatternString(); |
302 | int32_t prevIndex = msgPattern.getPart(partIndex).getLimit(); |
303 | for (;;) { |
304 | const MessagePattern::Part& part = msgPattern.getPart(++partIndex); |
305 | const UMessagePatternPartType type = part.getType(); |
306 | int32_t index = part.getIndex(); |
307 | if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { |
308 | return appendTo.append(pattern, prevIndex, index - prevIndex); |
309 | } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) || |
310 | (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) { |
311 | appendTo.append(pattern, prevIndex, index - prevIndex); |
312 | if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { |
313 | appendTo.append(numberString); |
314 | } |
315 | prevIndex = part.getLimit(); |
316 | } else if (type == UMSGPAT_PART_TYPE_ARG_START) { |
317 | appendTo.append(pattern, prevIndex, index - prevIndex); |
318 | prevIndex = index; |
319 | partIndex = msgPattern.getLimitPartIndex(partIndex); |
320 | index = msgPattern.getPart(partIndex).getLimit(); |
321 | MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo); |
322 | prevIndex = index; |
323 | } |
324 | } |
325 | } |
326 | |
327 | UnicodeString& |
328 | PluralFormat::toPattern(UnicodeString& appendTo) { |
329 | if (0 == msgPattern.countParts()) { |
330 | appendTo.setToBogus(); |
331 | } else { |
332 | appendTo.append(msgPattern.getPatternString()); |
333 | } |
334 | return appendTo; |
335 | } |
336 | |
337 | void |
338 | PluralFormat::setLocale(const Locale& loc, UErrorCode& status) { |
339 | if (U_FAILURE(status)) { |
340 | return; |
341 | } |
342 | locale = loc; |
343 | msgPattern.clear(); |
344 | delete numberFormat; |
345 | offset = 0; |
346 | numberFormat = NULL; |
347 | pluralRulesWrapper.reset(); |
348 | init(NULL, UPLURAL_TYPE_CARDINAL, status); |
349 | } |
350 | |
351 | void |
352 | PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) { |
353 | if (U_FAILURE(status)) { |
354 | return; |
355 | } |
356 | NumberFormat* nf = format->clone(); |
357 | if (nf != NULL) { |
358 | delete numberFormat; |
359 | numberFormat = nf; |
360 | } else { |
361 | status = U_MEMORY_ALLOCATION_ERROR; |
362 | } |
363 | } |
364 | |
365 | PluralFormat* |
366 | PluralFormat::clone() const |
367 | { |
368 | return new PluralFormat(*this); |
369 | } |
370 | |
371 | |
372 | PluralFormat& |
373 | PluralFormat::operator=(const PluralFormat& other) { |
374 | if (this != &other) { |
375 | locale = other.locale; |
376 | msgPattern = other.msgPattern; |
377 | offset = other.offset; |
378 | copyObjects(other); |
379 | } |
380 | |
381 | return *this; |
382 | } |
383 | |
384 | UBool |
385 | PluralFormat::operator==(const Format& other) const { |
386 | if (this == &other) { |
387 | return TRUE; |
388 | } |
389 | if (!Format::operator==(other)) { |
390 | return FALSE; |
391 | } |
392 | const PluralFormat& o = (const PluralFormat&)other; |
393 | return |
394 | locale == o.locale && |
395 | msgPattern == o.msgPattern && // implies same offset |
396 | (numberFormat == NULL) == (o.numberFormat == NULL) && |
397 | (numberFormat == NULL || *numberFormat == *o.numberFormat) && |
398 | (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) && |
399 | (pluralRulesWrapper.pluralRules == NULL || |
400 | *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules); |
401 | } |
402 | |
403 | UBool |
404 | PluralFormat::operator!=(const Format& other) const { |
405 | return !operator==(other); |
406 | } |
407 | |
408 | void |
409 | PluralFormat::parseObject(const UnicodeString& /*source*/, |
410 | Formattable& /*result*/, |
411 | ParsePosition& pos) const |
412 | { |
413 | // Parsing not supported. |
414 | pos.setErrorIndex(pos.getIndex()); |
415 | } |
416 | |
417 | int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex, |
418 | const PluralSelector& selector, void *context, |
419 | double number, UErrorCode& ec) { |
420 | if (U_FAILURE(ec)) { |
421 | return 0; |
422 | } |
423 | int32_t count=pattern.countParts(); |
424 | double offset; |
425 | const MessagePattern::Part* part=&pattern.getPart(partIndex); |
426 | if (MessagePattern::Part::hasNumericValue(part->getType())) { |
427 | offset=pattern.getNumericValue(*part); |
428 | ++partIndex; |
429 | } else { |
430 | offset=0; |
431 | } |
432 | // The keyword is empty until we need to match against a non-explicit, not-"other" value. |
433 | // Then we get the keyword from the selector. |
434 | // (In other words, we never call the selector if we match against an explicit value, |
435 | // or if the only non-explicit keyword is "other".) |
436 | UnicodeString keyword; |
437 | UnicodeString other(FALSE, OTHER_STRING, 5); |
438 | // When we find a match, we set msgStart>0 and also set this boolean to true |
439 | // to avoid matching the keyword again (duplicates are allowed) |
440 | // while we continue to look for an explicit-value match. |
441 | UBool haveKeywordMatch=FALSE; |
442 | // msgStart is 0 until we find any appropriate sub-message. |
443 | // We remember the first "other" sub-message if we have not seen any |
444 | // appropriate sub-message before. |
445 | // We remember the first matching-keyword sub-message if we have not seen |
446 | // one of those before. |
447 | // (The parser allows [does not check for] duplicate keywords. |
448 | // We just have to make sure to take the first one.) |
449 | // We avoid matching the keyword twice by also setting haveKeywordMatch=true |
450 | // at the first keyword match. |
451 | // We keep going until we find an explicit-value match or reach the end of the plural style. |
452 | int32_t msgStart=0; |
453 | // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples |
454 | // until ARG_LIMIT or end of plural-only pattern. |
455 | do { |
456 | part=&pattern.getPart(partIndex++); |
457 | const UMessagePatternPartType type = part->getType(); |
458 | if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) { |
459 | break; |
460 | } |
461 | U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR); |
462 | // part is an ARG_SELECTOR followed by an optional explicit value, and then a message |
463 | if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) { |
464 | // explicit value like "=2" |
465 | part=&pattern.getPart(partIndex++); |
466 | if(number==pattern.getNumericValue(*part)) { |
467 | // matches explicit value |
468 | return partIndex; |
469 | } |
470 | } else if(!haveKeywordMatch) { |
471 | // plural keyword like "few" or "other" |
472 | // Compare "other" first and call the selector if this is not "other". |
473 | if(pattern.partSubstringMatches(*part, other)) { |
474 | if(msgStart==0) { |
475 | msgStart=partIndex; |
476 | if(0 == keyword.compare(other)) { |
477 | // This is the first "other" sub-message, |
478 | // and the selected keyword is also "other". |
479 | // Do not match "other" again. |
480 | haveKeywordMatch=TRUE; |
481 | } |
482 | } |
483 | } else { |
484 | if(keyword.isEmpty()) { |
485 | keyword=selector.select(context, number-offset, ec); |
486 | if(msgStart!=0 && (0 == keyword.compare(other))) { |
487 | // We have already seen an "other" sub-message. |
488 | // Do not match "other" again. |
489 | haveKeywordMatch=TRUE; |
490 | // Skip keyword matching but do getLimitPartIndex(). |
491 | } |
492 | } |
493 | if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) { |
494 | // keyword matches |
495 | msgStart=partIndex; |
496 | // Do not match this keyword again. |
497 | haveKeywordMatch=TRUE; |
498 | } |
499 | } |
500 | } |
501 | partIndex=pattern.getLimitPartIndex(partIndex); |
502 | } while(++partIndex<count); |
503 | return msgStart; |
504 | } |
505 | |
506 | void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const { |
507 | // If no pattern was applied, return null. |
508 | if (msgPattern.countParts() == 0) { |
509 | pos.setBeginIndex(-1); |
510 | pos.setEndIndex(-1); |
511 | return; |
512 | } |
513 | int partIndex = 0; |
514 | int currMatchIndex; |
515 | int count=msgPattern.countParts(); |
516 | int startingAt = pos.getBeginIndex(); |
517 | if (startingAt < 0) { |
518 | startingAt = 0; |
519 | } |
520 | |
521 | // The keyword is null until we need to match against a non-explicit, not-"other" value. |
522 | // Then we get the keyword from the selector. |
523 | // (In other words, we never call the selector if we match against an explicit value, |
524 | // or if the only non-explicit keyword is "other".) |
525 | UnicodeString keyword; |
526 | UnicodeString matchedWord; |
527 | const UnicodeString& pattern = msgPattern.getPatternString(); |
528 | int matchedIndex = -1; |
529 | // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples |
530 | // until the end of the plural-only pattern. |
531 | while (partIndex < count) { |
532 | const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++); |
533 | if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) { |
534 | // Bad format |
535 | continue; |
536 | } |
537 | |
538 | const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++); |
539 | if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) { |
540 | // Bad format |
541 | continue; |
542 | } |
543 | |
544 | const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++); |
545 | if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) { |
546 | // Bad format |
547 | continue; |
548 | } |
549 | |
550 | UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); |
551 | if (rbnfLenientScanner != NULL) { |
552 | // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. |
553 | int32_t length = -1; |
554 | currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length); |
555 | } |
556 | else { |
557 | currMatchIndex = source.indexOf(currArg, startingAt); |
558 | } |
559 | if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) { |
560 | matchedIndex = currMatchIndex; |
561 | matchedWord = currArg; |
562 | keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); |
563 | } |
564 | } |
565 | if (matchedIndex >= 0) { |
566 | pos.setBeginIndex(matchedIndex); |
567 | pos.setEndIndex(matchedIndex + matchedWord.length()); |
568 | result.setString(keyword); |
569 | return; |
570 | } |
571 | |
572 | // Not found! |
573 | pos.setBeginIndex(-1); |
574 | pos.setEndIndex(-1); |
575 | } |
576 | |
577 | PluralFormat::PluralSelector::~PluralSelector() {} |
578 | |
579 | PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() { |
580 | delete pluralRules; |
581 | } |
582 | |
583 | UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number, |
584 | UErrorCode& /*ec*/) const { |
585 | (void)number; // unused except in the assertion |
586 | IFixedDecimal *dec=static_cast<IFixedDecimal *>(context); |
587 | return pluralRules->select(*dec); |
588 | } |
589 | |
590 | void PluralFormat::PluralSelectorAdapter::reset() { |
591 | delete pluralRules; |
592 | pluralRules = NULL; |
593 | } |
594 | |
595 | |
596 | U_NAMESPACE_END |
597 | |
598 | |
599 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
600 | |
601 | //eof |
602 | |