1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * Copyright (C) 2007-2016, International Business Machines Corporation and |
6 | * others. All Rights Reserved. |
7 | ******************************************************************************* |
8 | * |
9 | * File plurrule.cpp |
10 | */ |
11 | |
12 | #include <math.h> |
13 | #include <stdio.h> |
14 | |
15 | #include "unicode/utypes.h" |
16 | #include "unicode/localpointer.h" |
17 | #include "unicode/plurrule.h" |
18 | #include "unicode/upluralrules.h" |
19 | #include "unicode/ures.h" |
20 | #include "unicode/numfmt.h" |
21 | #include "unicode/decimfmt.h" |
22 | #include "charstr.h" |
23 | #include "cmemory.h" |
24 | #include "cstring.h" |
25 | #include "hash.h" |
26 | #include "locutil.h" |
27 | #include "mutex.h" |
28 | #include "patternprops.h" |
29 | #include "plurrule_impl.h" |
30 | #include "putilimp.h" |
31 | #include "ucln_in.h" |
32 | #include "ustrfmt.h" |
33 | #include "uassert.h" |
34 | #include "uvectr32.h" |
35 | #include "sharedpluralrules.h" |
36 | #include "unifiedcache.h" |
37 | #include "number_decimalquantity.h" |
38 | #include "util.h" |
39 | |
40 | #if !UCONFIG_NO_FORMATTING |
41 | |
42 | U_NAMESPACE_BEGIN |
43 | |
44 | using namespace icu::pluralimpl; |
45 | using icu::number::impl::DecimalQuantity; |
46 | |
47 | static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0}; |
48 | static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0}; |
49 | static const UChar PK_IN[]={LOW_I,LOW_N,0}; |
50 | static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0}; |
51 | static const UChar PK_IS[]={LOW_I,LOW_S,0}; |
52 | static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0}; |
53 | static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0}; |
54 | static const UChar PK_OR[]={LOW_O,LOW_R,0}; |
55 | static const UChar PK_VAR_N[]={LOW_N,0}; |
56 | static const UChar PK_VAR_I[]={LOW_I,0}; |
57 | static const UChar PK_VAR_F[]={LOW_F,0}; |
58 | static const UChar PK_VAR_T[]={LOW_T,0}; |
59 | static const UChar PK_VAR_V[]={LOW_V,0}; |
60 | static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; |
61 | static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0}; |
62 | static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0}; |
63 | |
64 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules) |
65 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) |
66 | |
67 | PluralRules::PluralRules(UErrorCode& /*status*/) |
68 | : UObject(), |
69 | mRules(nullptr), |
70 | mInternalStatus(U_ZERO_ERROR) |
71 | { |
72 | } |
73 | |
74 | PluralRules::PluralRules(const PluralRules& other) |
75 | : UObject(other), |
76 | mRules(nullptr), |
77 | mInternalStatus(U_ZERO_ERROR) |
78 | { |
79 | *this=other; |
80 | } |
81 | |
82 | PluralRules::~PluralRules() { |
83 | delete mRules; |
84 | } |
85 | |
86 | SharedPluralRules::~SharedPluralRules() { |
87 | delete ptr; |
88 | } |
89 | |
90 | PluralRules* |
91 | PluralRules::clone() const { |
92 | PluralRules* newObj = new PluralRules(*this); |
93 | // Since clone doesn't have a 'status' parameter, the best we can do is return nullptr if |
94 | // the newly created object was not fully constructed properly (an error occurred). |
95 | if (newObj != nullptr && U_FAILURE(newObj->mInternalStatus)) { |
96 | delete newObj; |
97 | newObj = nullptr; |
98 | } |
99 | return newObj; |
100 | } |
101 | |
102 | PluralRules& |
103 | PluralRules::operator=(const PluralRules& other) { |
104 | if (this != &other) { |
105 | delete mRules; |
106 | mRules = nullptr; |
107 | mInternalStatus = other.mInternalStatus; |
108 | if (U_FAILURE(mInternalStatus)) { |
109 | // bail out early if the object we were copying from was already 'invalid'. |
110 | return *this; |
111 | } |
112 | if (other.mRules != nullptr) { |
113 | mRules = new RuleChain(*other.mRules); |
114 | if (mRules == nullptr) { |
115 | mInternalStatus = U_MEMORY_ALLOCATION_ERROR; |
116 | } |
117 | else if (U_FAILURE(mRules->fInternalStatus)) { |
118 | // If the RuleChain wasn't fully copied, then set our status to failure as well. |
119 | mInternalStatus = mRules->fInternalStatus; |
120 | } |
121 | } |
122 | } |
123 | return *this; |
124 | } |
125 | |
126 | StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) { |
127 | if (U_FAILURE(status)) { |
128 | return nullptr; |
129 | } |
130 | LocalPointer<StringEnumeration> result(new PluralAvailableLocalesEnumeration(status), status); |
131 | if (U_FAILURE(status)) { |
132 | return nullptr; |
133 | } |
134 | return result.orphan(); |
135 | } |
136 | |
137 | |
138 | PluralRules* U_EXPORT2 |
139 | PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { |
140 | if (U_FAILURE(status)) { |
141 | return nullptr; |
142 | } |
143 | PluralRuleParser parser; |
144 | LocalPointer<PluralRules> newRules(new PluralRules(status), status); |
145 | if (U_FAILURE(status)) { |
146 | return nullptr; |
147 | } |
148 | parser.parse(description, newRules.getAlias(), status); |
149 | if (U_FAILURE(status)) { |
150 | newRules.adoptInstead(nullptr); |
151 | } |
152 | return newRules.orphan(); |
153 | } |
154 | |
155 | |
156 | PluralRules* U_EXPORT2 |
157 | PluralRules::createDefaultRules(UErrorCode& status) { |
158 | return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status); |
159 | } |
160 | |
161 | /******************************************************************************/ |
162 | /* Create PluralRules cache */ |
163 | |
164 | template<> U_I18N_API |
165 | const SharedPluralRules *LocaleCacheKey<SharedPluralRules>::createObject( |
166 | const void * /*unused*/, UErrorCode &status) const { |
167 | const char *localeId = fLoc.getName(); |
168 | LocalPointer<PluralRules> pr(PluralRules::internalForLocale(localeId, UPLURAL_TYPE_CARDINAL, status), status); |
169 | if (U_FAILURE(status)) { |
170 | return nullptr; |
171 | } |
172 | LocalPointer<SharedPluralRules> result(new SharedPluralRules(pr.getAlias()), status); |
173 | if (U_FAILURE(status)) { |
174 | return nullptr; |
175 | } |
176 | pr.orphan(); // result was successfully created so it nows pr. |
177 | result->addRef(); |
178 | return result.orphan(); |
179 | } |
180 | |
181 | /* end plural rules cache */ |
182 | /******************************************************************************/ |
183 | |
184 | const SharedPluralRules* U_EXPORT2 |
185 | PluralRules::createSharedInstance( |
186 | const Locale& locale, UPluralType type, UErrorCode& status) { |
187 | if (U_FAILURE(status)) { |
188 | return nullptr; |
189 | } |
190 | if (type != UPLURAL_TYPE_CARDINAL) { |
191 | status = U_UNSUPPORTED_ERROR; |
192 | return nullptr; |
193 | } |
194 | const SharedPluralRules *result = nullptr; |
195 | UnifiedCache::getByLocale(locale, result, status); |
196 | return result; |
197 | } |
198 | |
199 | PluralRules* U_EXPORT2 |
200 | PluralRules::forLocale(const Locale& locale, UErrorCode& status) { |
201 | return forLocale(locale, UPLURAL_TYPE_CARDINAL, status); |
202 | } |
203 | |
204 | PluralRules* U_EXPORT2 |
205 | PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) { |
206 | if (type != UPLURAL_TYPE_CARDINAL) { |
207 | return internalForLocale(locale, type, status); |
208 | } |
209 | const SharedPluralRules *shared = createSharedInstance( |
210 | locale, type, status); |
211 | if (U_FAILURE(status)) { |
212 | return nullptr; |
213 | } |
214 | PluralRules *result = (*shared)->clone(); |
215 | shared->removeRef(); |
216 | if (result == nullptr) { |
217 | status = U_MEMORY_ALLOCATION_ERROR; |
218 | } |
219 | return result; |
220 | } |
221 | |
222 | PluralRules* U_EXPORT2 |
223 | PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCode& status) { |
224 | if (U_FAILURE(status)) { |
225 | return nullptr; |
226 | } |
227 | if (type >= UPLURAL_TYPE_COUNT) { |
228 | status = U_ILLEGAL_ARGUMENT_ERROR; |
229 | return nullptr; |
230 | } |
231 | LocalPointer<PluralRules> newObj(new PluralRules(status), status); |
232 | if (U_FAILURE(status)) { |
233 | return nullptr; |
234 | } |
235 | UnicodeString locRule = newObj->getRuleFromResource(locale, type, status); |
236 | // TODO: which other errors, if any, should be returned? |
237 | if (locRule.length() == 0) { |
238 | // If an out-of-memory error occurred, then stop and report the failure. |
239 | if (status == U_MEMORY_ALLOCATION_ERROR) { |
240 | return nullptr; |
241 | } |
242 | // Locales with no specific rules (all numbers have the "other" category |
243 | // will return a U_MISSING_RESOURCE_ERROR at this point. This is not |
244 | // an error. |
245 | locRule = UnicodeString(PLURAL_DEFAULT_RULE); |
246 | status = U_ZERO_ERROR; |
247 | } |
248 | PluralRuleParser parser; |
249 | parser.parse(locRule, newObj.getAlias(), status); |
250 | // TODO: should rule parse errors be returned, or |
251 | // should we silently use default rules? |
252 | // Original impl used default rules. |
253 | // Ask the question to ICU Core. |
254 | |
255 | return newObj.orphan(); |
256 | } |
257 | |
258 | UnicodeString |
259 | PluralRules::select(int32_t number) const { |
260 | return select(FixedDecimal(number)); |
261 | } |
262 | |
263 | UnicodeString |
264 | PluralRules::select(double number) const { |
265 | return select(FixedDecimal(number)); |
266 | } |
267 | |
268 | UnicodeString |
269 | PluralRules::select(const number::FormattedNumber& number, UErrorCode& status) const { |
270 | DecimalQuantity dq; |
271 | number.getDecimalQuantity(dq, status); |
272 | if (U_FAILURE(status)) { |
273 | return ICU_Utility::makeBogusString(); |
274 | } |
275 | return select(dq); |
276 | } |
277 | |
278 | UnicodeString |
279 | PluralRules::select(const IFixedDecimal &number) const { |
280 | if (mRules == nullptr) { |
281 | return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1); |
282 | } |
283 | else { |
284 | return mRules->select(number); |
285 | } |
286 | } |
287 | |
288 | |
289 | |
290 | StringEnumeration* |
291 | PluralRules::getKeywords(UErrorCode& status) const { |
292 | if (U_FAILURE(status)) { |
293 | return nullptr; |
294 | } |
295 | if (U_FAILURE(mInternalStatus)) { |
296 | status = mInternalStatus; |
297 | return nullptr; |
298 | } |
299 | LocalPointer<StringEnumeration> nameEnumerator(new PluralKeywordEnumeration(mRules, status), status); |
300 | if (U_FAILURE(status)) { |
301 | return nullptr; |
302 | } |
303 | return nameEnumerator.orphan(); |
304 | } |
305 | |
306 | double |
307 | PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) { |
308 | // Not Implemented. |
309 | return UPLRULES_NO_UNIQUE_VALUE; |
310 | } |
311 | |
312 | int32_t |
313 | PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */, |
314 | int32_t /* destCapacity */, UErrorCode& error) { |
315 | error = U_UNSUPPORTED_ERROR; |
316 | return 0; |
317 | } |
318 | |
319 | |
320 | static double scaleForInt(double d) { |
321 | double scale = 1.0; |
322 | while (d != floor(d)) { |
323 | d = d * 10.0; |
324 | scale = scale * 10.0; |
325 | } |
326 | return scale; |
327 | } |
328 | |
329 | static int32_t |
330 | getSamplesFromString(const UnicodeString &samples, double *dest, |
331 | int32_t destCapacity, UErrorCode& status) { |
332 | int32_t sampleCount = 0; |
333 | int32_t sampleStartIdx = 0; |
334 | int32_t sampleEndIdx = 0; |
335 | |
336 | //std::string ss; // TODO: debugging. |
337 | // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n"; |
338 | for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) { |
339 | sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx); |
340 | if (sampleEndIdx == -1) { |
341 | sampleEndIdx = samples.length(); |
342 | } |
343 | const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx); |
344 | // ss.erase(); |
345 | // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n"; |
346 | int32_t tildeIndex = sampleRange.indexOf(TILDE); |
347 | if (tildeIndex < 0) { |
348 | FixedDecimal fixed(sampleRange, status); |
349 | double sampleValue = fixed.source; |
350 | if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) { |
351 | dest[sampleCount++] = sampleValue; |
352 | } |
353 | } else { |
354 | |
355 | FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status); |
356 | FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status); |
357 | double rangeLo = fixedLo.source; |
358 | double rangeHi = fixedHi.source; |
359 | if (U_FAILURE(status)) { |
360 | break; |
361 | } |
362 | if (rangeHi < rangeLo) { |
363 | status = U_INVALID_FORMAT_ERROR; |
364 | break; |
365 | } |
366 | |
367 | // For ranges of samples with fraction decimal digits, scale the number up so that we |
368 | // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths. |
369 | |
370 | double scale = scaleForInt(rangeLo); |
371 | double t = scaleForInt(rangeHi); |
372 | if (t > scale) { |
373 | scale = t; |
374 | } |
375 | rangeLo *= scale; |
376 | rangeHi *= scale; |
377 | for (double n=rangeLo; n<=rangeHi; n+=1) { |
378 | // Hack Alert: don't return any decimal samples with integer values that |
379 | // originated from a format with trailing decimals. |
380 | // This API is returning doubles, which can't distinguish having displayed |
381 | // zeros to the right of the decimal. |
382 | // This results in test failures with values mapping back to a different keyword. |
383 | double sampleValue = n/scale; |
384 | if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) { |
385 | dest[sampleCount++] = sampleValue; |
386 | } |
387 | if (sampleCount >= destCapacity) { |
388 | break; |
389 | } |
390 | } |
391 | } |
392 | sampleStartIdx = sampleEndIdx + 1; |
393 | } |
394 | return sampleCount; |
395 | } |
396 | |
397 | |
398 | int32_t |
399 | PluralRules::getSamples(const UnicodeString &keyword, double *dest, |
400 | int32_t destCapacity, UErrorCode& status) { |
401 | if (destCapacity == 0 || U_FAILURE(status)) { |
402 | return 0; |
403 | } |
404 | if (U_FAILURE(mInternalStatus)) { |
405 | status = mInternalStatus; |
406 | return 0; |
407 | } |
408 | RuleChain *rc = rulesForKeyword(keyword); |
409 | if (rc == nullptr) { |
410 | return 0; |
411 | } |
412 | int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status); |
413 | if (numSamples == 0) { |
414 | numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status); |
415 | } |
416 | return numSamples; |
417 | } |
418 | |
419 | |
420 | RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const { |
421 | RuleChain *rc; |
422 | for (rc = mRules; rc != nullptr; rc = rc->fNext) { |
423 | if (rc->fKeyword == keyword) { |
424 | break; |
425 | } |
426 | } |
427 | return rc; |
428 | } |
429 | |
430 | |
431 | UBool |
432 | PluralRules::isKeyword(const UnicodeString& keyword) const { |
433 | if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { |
434 | return true; |
435 | } |
436 | return rulesForKeyword(keyword) != nullptr; |
437 | } |
438 | |
439 | UnicodeString |
440 | PluralRules::getKeywordOther() const { |
441 | return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); |
442 | } |
443 | |
444 | UBool |
445 | PluralRules::operator==(const PluralRules& other) const { |
446 | const UnicodeString *ptrKeyword; |
447 | UErrorCode status= U_ZERO_ERROR; |
448 | |
449 | if ( this == &other ) { |
450 | return TRUE; |
451 | } |
452 | LocalPointer<StringEnumeration> myKeywordList(getKeywords(status)); |
453 | LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status)); |
454 | if (U_FAILURE(status)) { |
455 | return FALSE; |
456 | } |
457 | |
458 | if (myKeywordList->count(status)!=otherKeywordList->count(status)) { |
459 | return FALSE; |
460 | } |
461 | myKeywordList->reset(status); |
462 | while ((ptrKeyword=myKeywordList->snext(status))!=nullptr) { |
463 | if (!other.isKeyword(*ptrKeyword)) { |
464 | return FALSE; |
465 | } |
466 | } |
467 | otherKeywordList->reset(status); |
468 | while ((ptrKeyword=otherKeywordList->snext(status))!=nullptr) { |
469 | if (!this->isKeyword(*ptrKeyword)) { |
470 | return FALSE; |
471 | } |
472 | } |
473 | if (U_FAILURE(status)) { |
474 | return FALSE; |
475 | } |
476 | |
477 | return TRUE; |
478 | } |
479 | |
480 | |
481 | void |
482 | PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status) |
483 | { |
484 | if (U_FAILURE(status)) { |
485 | return; |
486 | } |
487 | U_ASSERT(ruleIndex == 0); // Parsers are good for a single use only! |
488 | ruleSrc = &ruleData; |
489 | |
490 | while (ruleIndex< ruleSrc->length()) { |
491 | getNextToken(status); |
492 | if (U_FAILURE(status)) { |
493 | return; |
494 | } |
495 | checkSyntax(status); |
496 | if (U_FAILURE(status)) { |
497 | return; |
498 | } |
499 | switch (type) { |
500 | case tAnd: |
501 | U_ASSERT(curAndConstraint != nullptr); |
502 | curAndConstraint = curAndConstraint->add(status); |
503 | break; |
504 | case tOr: |
505 | { |
506 | U_ASSERT(currentChain != nullptr); |
507 | OrConstraint *orNode=currentChain->ruleHeader; |
508 | while (orNode->next != nullptr) { |
509 | orNode = orNode->next; |
510 | } |
511 | orNode->next= new OrConstraint(); |
512 | if (orNode->next == nullptr) { |
513 | status = U_MEMORY_ALLOCATION_ERROR; |
514 | break; |
515 | } |
516 | orNode=orNode->next; |
517 | orNode->next=nullptr; |
518 | curAndConstraint = orNode->add(status); |
519 | } |
520 | break; |
521 | case tIs: |
522 | U_ASSERT(curAndConstraint != nullptr); |
523 | U_ASSERT(curAndConstraint->value == -1); |
524 | U_ASSERT(curAndConstraint->rangeList == nullptr); |
525 | break; |
526 | case tNot: |
527 | U_ASSERT(curAndConstraint != nullptr); |
528 | curAndConstraint->negated=TRUE; |
529 | break; |
530 | |
531 | case tNotEqual: |
532 | curAndConstraint->negated=TRUE; |
533 | U_FALLTHROUGH; |
534 | case tIn: |
535 | case tWithin: |
536 | case tEqual: |
537 | { |
538 | U_ASSERT(curAndConstraint != nullptr); |
539 | LocalPointer<UVector32> newRangeList(new UVector32(status), status); |
540 | if (U_FAILURE(status)) { |
541 | break; |
542 | } |
543 | curAndConstraint->rangeList = newRangeList.orphan(); |
544 | curAndConstraint->rangeList->addElement(-1, status); // range Low |
545 | curAndConstraint->rangeList->addElement(-1, status); // range Hi |
546 | rangeLowIdx = 0; |
547 | rangeHiIdx = 1; |
548 | curAndConstraint->value=PLURAL_RANGE_HIGH; |
549 | curAndConstraint->integerOnly = (type != tWithin); |
550 | } |
551 | break; |
552 | case tNumber: |
553 | U_ASSERT(curAndConstraint != nullptr); |
554 | if ( (curAndConstraint->op==AndConstraint::MOD)&& |
555 | (curAndConstraint->opNum == -1 ) ) { |
556 | curAndConstraint->opNum=getNumberValue(token); |
557 | } |
558 | else { |
559 | if (curAndConstraint->rangeList == nullptr) { |
560 | // this is for an 'is' rule |
561 | curAndConstraint->value = getNumberValue(token); |
562 | } else { |
563 | // this is for an 'in' or 'within' rule |
564 | if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) { |
565 | curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx); |
566 | curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx); |
567 | } |
568 | else { |
569 | curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx); |
570 | if (curAndConstraint->rangeList->elementAti(rangeLowIdx) > |
571 | curAndConstraint->rangeList->elementAti(rangeHiIdx)) { |
572 | // Range Lower bound > Range Upper bound. |
573 | // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently |
574 | // used for all plural rule parse errors. |
575 | status = U_UNEXPECTED_TOKEN; |
576 | break; |
577 | } |
578 | } |
579 | } |
580 | } |
581 | break; |
582 | case tComma: |
583 | // TODO: rule syntax checking is inadequate, can happen with badly formed rules. |
584 | // Catch cases like "n mod 10, is 1" here instead. |
585 | if (curAndConstraint == nullptr || curAndConstraint->rangeList == nullptr) { |
586 | status = U_UNEXPECTED_TOKEN; |
587 | break; |
588 | } |
589 | U_ASSERT(curAndConstraint->rangeList->size() >= 2); |
590 | rangeLowIdx = curAndConstraint->rangeList->size(); |
591 | curAndConstraint->rangeList->addElement(-1, status); // range Low |
592 | rangeHiIdx = curAndConstraint->rangeList->size(); |
593 | curAndConstraint->rangeList->addElement(-1, status); // range Hi |
594 | break; |
595 | case tMod: |
596 | U_ASSERT(curAndConstraint != nullptr); |
597 | curAndConstraint->op=AndConstraint::MOD; |
598 | break; |
599 | case tVariableN: |
600 | case tVariableI: |
601 | case tVariableF: |
602 | case tVariableT: |
603 | case tVariableV: |
604 | U_ASSERT(curAndConstraint != nullptr); |
605 | curAndConstraint->digitsType = type; |
606 | break; |
607 | case tKeyword: |
608 | { |
609 | RuleChain *newChain = new RuleChain; |
610 | if (newChain == nullptr) { |
611 | status = U_MEMORY_ALLOCATION_ERROR; |
612 | break; |
613 | } |
614 | newChain->fKeyword = token; |
615 | if (prules->mRules == nullptr) { |
616 | prules->mRules = newChain; |
617 | } else { |
618 | // The new rule chain goes at the end of the linked list of rule chains, |
619 | // unless there is an "other" keyword & chain. "other" must remain last. |
620 | RuleChain *insertAfter = prules->mRules; |
621 | while (insertAfter->fNext!=nullptr && |
622 | insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){ |
623 | insertAfter=insertAfter->fNext; |
624 | } |
625 | newChain->fNext = insertAfter->fNext; |
626 | insertAfter->fNext = newChain; |
627 | } |
628 | OrConstraint *orNode = new OrConstraint(); |
629 | if (orNode == nullptr) { |
630 | status = U_MEMORY_ALLOCATION_ERROR; |
631 | break; |
632 | } |
633 | newChain->ruleHeader = orNode; |
634 | curAndConstraint = orNode->add(status); |
635 | currentChain = newChain; |
636 | } |
637 | break; |
638 | |
639 | case tInteger: |
640 | for (;;) { |
641 | getNextToken(status); |
642 | if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) { |
643 | break; |
644 | } |
645 | if (type == tEllipsis) { |
646 | currentChain->fIntegerSamplesUnbounded = TRUE; |
647 | continue; |
648 | } |
649 | currentChain->fIntegerSamples.append(token); |
650 | } |
651 | break; |
652 | |
653 | case tDecimal: |
654 | for (;;) { |
655 | getNextToken(status); |
656 | if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) { |
657 | break; |
658 | } |
659 | if (type == tEllipsis) { |
660 | currentChain->fDecimalSamplesUnbounded = TRUE; |
661 | continue; |
662 | } |
663 | currentChain->fDecimalSamples.append(token); |
664 | } |
665 | break; |
666 | |
667 | default: |
668 | break; |
669 | } |
670 | prevType=type; |
671 | if (U_FAILURE(status)) { |
672 | break; |
673 | } |
674 | } |
675 | } |
676 | |
677 | UnicodeString |
678 | PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) { |
679 | UnicodeString emptyStr; |
680 | |
681 | if (U_FAILURE(errCode)) { |
682 | return emptyStr; |
683 | } |
684 | LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "plurals" , &errCode)); |
685 | if(U_FAILURE(errCode)) { |
686 | return emptyStr; |
687 | } |
688 | const char *typeKey; |
689 | switch (type) { |
690 | case UPLURAL_TYPE_CARDINAL: |
691 | typeKey = "locales" ; |
692 | break; |
693 | case UPLURAL_TYPE_ORDINAL: |
694 | typeKey = "locales_ordinals" ; |
695 | break; |
696 | default: |
697 | // Must not occur: The caller should have checked for valid types. |
698 | errCode = U_ILLEGAL_ARGUMENT_ERROR; |
699 | return emptyStr; |
700 | } |
701 | LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, nullptr, &errCode)); |
702 | if(U_FAILURE(errCode)) { |
703 | return emptyStr; |
704 | } |
705 | int32_t resLen=0; |
706 | const char *curLocaleName=locale.getBaseName(); |
707 | const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode); |
708 | |
709 | if (s == nullptr) { |
710 | // Check parent locales. |
711 | UErrorCode status = U_ZERO_ERROR; |
712 | char parentLocaleName[ULOC_FULLNAME_CAPACITY]; |
713 | const char *curLocaleName2=locale.getBaseName(); |
714 | uprv_strcpy(parentLocaleName, curLocaleName2); |
715 | |
716 | while (uloc_getParent(parentLocaleName, parentLocaleName, |
717 | ULOC_FULLNAME_CAPACITY, &status) > 0) { |
718 | resLen=0; |
719 | s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status); |
720 | if (s != nullptr) { |
721 | errCode = U_ZERO_ERROR; |
722 | break; |
723 | } |
724 | status = U_ZERO_ERROR; |
725 | } |
726 | } |
727 | if (s==nullptr) { |
728 | return emptyStr; |
729 | } |
730 | |
731 | char setKey[256]; |
732 | u_UCharsToChars(s, setKey, resLen + 1); |
733 | // printf("\n PluralRule: %s\n", setKey); |
734 | |
735 | LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules" , nullptr, &errCode)); |
736 | if(U_FAILURE(errCode)) { |
737 | return emptyStr; |
738 | } |
739 | LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, nullptr, &errCode)); |
740 | if (U_FAILURE(errCode)) { |
741 | return emptyStr; |
742 | } |
743 | |
744 | int32_t numberKeys = ures_getSize(setRes.getAlias()); |
745 | UnicodeString result; |
746 | const char *key=nullptr; |
747 | for(int32_t i=0; i<numberKeys; ++i) { // Keys are zero, one, few, ... |
748 | UnicodeString rules = ures_getNextUnicodeString(setRes.getAlias(), &key, &errCode); |
749 | UnicodeString uKey(key, -1, US_INV); |
750 | result.append(uKey); |
751 | result.append(COLON); |
752 | result.append(rules); |
753 | result.append(SEMI_COLON); |
754 | } |
755 | return result; |
756 | } |
757 | |
758 | |
759 | UnicodeString |
760 | PluralRules::getRules() const { |
761 | UnicodeString rules; |
762 | if (mRules != nullptr) { |
763 | mRules->dumpRules(rules); |
764 | } |
765 | return rules; |
766 | } |
767 | |
768 | AndConstraint::AndConstraint(const AndConstraint& other) { |
769 | this->fInternalStatus = other.fInternalStatus; |
770 | if (U_FAILURE(fInternalStatus)) { |
771 | return; // stop early if the object we are copying from is invalid. |
772 | } |
773 | this->op = other.op; |
774 | this->opNum=other.opNum; |
775 | this->value=other.value; |
776 | if (other.rangeList != nullptr) { |
777 | LocalPointer<UVector32> newRangeList(new UVector32(fInternalStatus), fInternalStatus); |
778 | if (U_FAILURE(fInternalStatus)) { |
779 | return; |
780 | } |
781 | this->rangeList = newRangeList.orphan(); |
782 | this->rangeList->assign(*other.rangeList, fInternalStatus); |
783 | } |
784 | this->integerOnly=other.integerOnly; |
785 | this->negated=other.negated; |
786 | this->digitsType = other.digitsType; |
787 | if (other.next != nullptr) { |
788 | this->next = new AndConstraint(*other.next); |
789 | if (this->next == nullptr) { |
790 | fInternalStatus = U_MEMORY_ALLOCATION_ERROR; |
791 | } |
792 | } |
793 | } |
794 | |
795 | AndConstraint::~AndConstraint() { |
796 | delete rangeList; |
797 | rangeList = nullptr; |
798 | delete next; |
799 | next = nullptr; |
800 | } |
801 | |
802 | UBool |
803 | AndConstraint::isFulfilled(const IFixedDecimal &number) { |
804 | UBool result = TRUE; |
805 | if (digitsType == none) { |
806 | // An empty AndConstraint, created by a rule with a keyword but no following expression. |
807 | return TRUE; |
808 | } |
809 | |
810 | PluralOperand operand = tokenTypeToPluralOperand(digitsType); |
811 | double n = number.getPluralOperand(operand); // pulls n | i | v | f value for the number. |
812 | // Will always be positive. |
813 | // May be non-integer (n option only) |
814 | do { |
815 | if (integerOnly && n != uprv_floor(n)) { |
816 | result = FALSE; |
817 | break; |
818 | } |
819 | |
820 | if (op == MOD) { |
821 | n = fmod(n, opNum); |
822 | } |
823 | if (rangeList == nullptr) { |
824 | result = value == -1 || // empty rule |
825 | n == value; // 'is' rule |
826 | break; |
827 | } |
828 | result = FALSE; // 'in' or 'within' rule |
829 | for (int32_t r=0; r<rangeList->size(); r+=2) { |
830 | if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) { |
831 | result = TRUE; |
832 | break; |
833 | } |
834 | } |
835 | } while (FALSE); |
836 | |
837 | if (negated) { |
838 | result = !result; |
839 | } |
840 | return result; |
841 | } |
842 | |
843 | AndConstraint* |
844 | AndConstraint::add(UErrorCode& status) { |
845 | if (U_FAILURE(fInternalStatus)) { |
846 | status = fInternalStatus; |
847 | return nullptr; |
848 | } |
849 | this->next = new AndConstraint(); |
850 | if (this->next == nullptr) { |
851 | status = U_MEMORY_ALLOCATION_ERROR; |
852 | } |
853 | return this->next; |
854 | } |
855 | |
856 | |
857 | OrConstraint::OrConstraint(const OrConstraint& other) { |
858 | this->fInternalStatus = other.fInternalStatus; |
859 | if (U_FAILURE(fInternalStatus)) { |
860 | return; // stop early if the object we are copying from is invalid. |
861 | } |
862 | if ( other.childNode != nullptr ) { |
863 | this->childNode = new AndConstraint(*(other.childNode)); |
864 | if (this->childNode == nullptr) { |
865 | fInternalStatus = U_MEMORY_ALLOCATION_ERROR; |
866 | return; |
867 | } |
868 | } |
869 | if (other.next != nullptr ) { |
870 | this->next = new OrConstraint(*(other.next)); |
871 | if (this->next == nullptr) { |
872 | fInternalStatus = U_MEMORY_ALLOCATION_ERROR; |
873 | return; |
874 | } |
875 | if (U_FAILURE(this->next->fInternalStatus)) { |
876 | this->fInternalStatus = this->next->fInternalStatus; |
877 | } |
878 | } |
879 | } |
880 | |
881 | OrConstraint::~OrConstraint() { |
882 | delete childNode; |
883 | childNode = nullptr; |
884 | delete next; |
885 | next = nullptr; |
886 | } |
887 | |
888 | AndConstraint* |
889 | OrConstraint::add(UErrorCode& status) { |
890 | if (U_FAILURE(fInternalStatus)) { |
891 | status = fInternalStatus; |
892 | return nullptr; |
893 | } |
894 | OrConstraint *curOrConstraint=this; |
895 | { |
896 | while (curOrConstraint->next!=nullptr) { |
897 | curOrConstraint = curOrConstraint->next; |
898 | } |
899 | U_ASSERT(curOrConstraint->childNode == nullptr); |
900 | curOrConstraint->childNode = new AndConstraint(); |
901 | if (curOrConstraint->childNode == nullptr) { |
902 | status = U_MEMORY_ALLOCATION_ERROR; |
903 | } |
904 | } |
905 | return curOrConstraint->childNode; |
906 | } |
907 | |
908 | UBool |
909 | OrConstraint::isFulfilled(const IFixedDecimal &number) { |
910 | OrConstraint* orRule=this; |
911 | UBool result=FALSE; |
912 | |
913 | while (orRule!=nullptr && !result) { |
914 | result=TRUE; |
915 | AndConstraint* andRule = orRule->childNode; |
916 | while (andRule!=nullptr && result) { |
917 | result = andRule->isFulfilled(number); |
918 | andRule=andRule->next; |
919 | } |
920 | orRule = orRule->next; |
921 | } |
922 | |
923 | return result; |
924 | } |
925 | |
926 | |
927 | RuleChain::RuleChain(const RuleChain& other) : |
928 | fKeyword(other.fKeyword), fDecimalSamples(other.fDecimalSamples), |
929 | fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded), |
930 | fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded), fInternalStatus(other.fInternalStatus) { |
931 | if (U_FAILURE(this->fInternalStatus)) { |
932 | return; // stop early if the object we are copying from is invalid. |
933 | } |
934 | if (other.ruleHeader != nullptr) { |
935 | this->ruleHeader = new OrConstraint(*(other.ruleHeader)); |
936 | if (this->ruleHeader == nullptr) { |
937 | this->fInternalStatus = U_MEMORY_ALLOCATION_ERROR; |
938 | } |
939 | else if (U_FAILURE(this->ruleHeader->fInternalStatus)) { |
940 | // If the OrConstraint wasn't fully copied, then set our status to failure as well. |
941 | this->fInternalStatus = this->ruleHeader->fInternalStatus; |
942 | return; // exit early. |
943 | } |
944 | } |
945 | if (other.fNext != nullptr ) { |
946 | this->fNext = new RuleChain(*other.fNext); |
947 | if (this->fNext == nullptr) { |
948 | this->fInternalStatus = U_MEMORY_ALLOCATION_ERROR; |
949 | } |
950 | else if (U_FAILURE(this->fNext->fInternalStatus)) { |
951 | // If the RuleChain wasn't fully copied, then set our status to failure as well. |
952 | this->fInternalStatus = this->fNext->fInternalStatus; |
953 | } |
954 | } |
955 | } |
956 | |
957 | RuleChain::~RuleChain() { |
958 | delete fNext; |
959 | delete ruleHeader; |
960 | } |
961 | |
962 | UnicodeString |
963 | RuleChain::select(const IFixedDecimal &number) const { |
964 | if (!number.isNaN() && !number.isInfinite()) { |
965 | for (const RuleChain *rules = this; rules != nullptr; rules = rules->fNext) { |
966 | if (rules->ruleHeader->isFulfilled(number)) { |
967 | return rules->fKeyword; |
968 | } |
969 | } |
970 | } |
971 | return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); |
972 | } |
973 | |
974 | static UnicodeString tokenString(tokenType tok) { |
975 | UnicodeString s; |
976 | switch (tok) { |
977 | case tVariableN: |
978 | s.append(LOW_N); break; |
979 | case tVariableI: |
980 | s.append(LOW_I); break; |
981 | case tVariableF: |
982 | s.append(LOW_F); break; |
983 | case tVariableV: |
984 | s.append(LOW_V); break; |
985 | case tVariableT: |
986 | s.append(LOW_T); break; |
987 | default: |
988 | s.append(TILDE); |
989 | } |
990 | return s; |
991 | } |
992 | |
993 | void |
994 | RuleChain::dumpRules(UnicodeString& result) { |
995 | UChar digitString[16]; |
996 | |
997 | if ( ruleHeader != nullptr ) { |
998 | result += fKeyword; |
999 | result += COLON; |
1000 | result += SPACE; |
1001 | OrConstraint* orRule=ruleHeader; |
1002 | while ( orRule != nullptr ) { |
1003 | AndConstraint* andRule=orRule->childNode; |
1004 | while ( andRule != nullptr ) { |
1005 | if ((andRule->op==AndConstraint::NONE) && (andRule->rangeList==nullptr) && (andRule->value == -1)) { |
1006 | // Empty Rules. |
1007 | } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==nullptr) ) { |
1008 | result += tokenString(andRule->digitsType); |
1009 | result += UNICODE_STRING_SIMPLE(" is " ); |
1010 | if (andRule->negated) { |
1011 | result += UNICODE_STRING_SIMPLE("not " ); |
1012 | } |
1013 | uprv_itou(digitString,16, andRule->value,10,0); |
1014 | result += UnicodeString(digitString); |
1015 | } |
1016 | else { |
1017 | result += tokenString(andRule->digitsType); |
1018 | result += SPACE; |
1019 | if (andRule->op==AndConstraint::MOD) { |
1020 | result += UNICODE_STRING_SIMPLE("mod " ); |
1021 | uprv_itou(digitString,16, andRule->opNum,10,0); |
1022 | result += UnicodeString(digitString); |
1023 | } |
1024 | if (andRule->rangeList==nullptr) { |
1025 | if (andRule->negated) { |
1026 | result += UNICODE_STRING_SIMPLE(" is not " ); |
1027 | uprv_itou(digitString,16, andRule->value,10,0); |
1028 | result += UnicodeString(digitString); |
1029 | } |
1030 | else { |
1031 | result += UNICODE_STRING_SIMPLE(" is " ); |
1032 | uprv_itou(digitString,16, andRule->value,10,0); |
1033 | result += UnicodeString(digitString); |
1034 | } |
1035 | } |
1036 | else { |
1037 | if (andRule->negated) { |
1038 | if ( andRule->integerOnly ) { |
1039 | result += UNICODE_STRING_SIMPLE(" not in " ); |
1040 | } |
1041 | else { |
1042 | result += UNICODE_STRING_SIMPLE(" not within " ); |
1043 | } |
1044 | } |
1045 | else { |
1046 | if ( andRule->integerOnly ) { |
1047 | result += UNICODE_STRING_SIMPLE(" in " ); |
1048 | } |
1049 | else { |
1050 | result += UNICODE_STRING_SIMPLE(" within " ); |
1051 | } |
1052 | } |
1053 | for (int32_t r=0; r<andRule->rangeList->size(); r+=2) { |
1054 | int32_t rangeLo = andRule->rangeList->elementAti(r); |
1055 | int32_t rangeHi = andRule->rangeList->elementAti(r+1); |
1056 | uprv_itou(digitString,16, rangeLo, 10, 0); |
1057 | result += UnicodeString(digitString); |
1058 | result += UNICODE_STRING_SIMPLE(".." ); |
1059 | uprv_itou(digitString,16, rangeHi, 10,0); |
1060 | result += UnicodeString(digitString); |
1061 | if (r+2 < andRule->rangeList->size()) { |
1062 | result += UNICODE_STRING_SIMPLE(", " ); |
1063 | } |
1064 | } |
1065 | } |
1066 | } |
1067 | if ( (andRule=andRule->next) != nullptr) { |
1068 | result += UNICODE_STRING_SIMPLE(" and " ); |
1069 | } |
1070 | } |
1071 | if ( (orRule = orRule->next) != nullptr ) { |
1072 | result += UNICODE_STRING_SIMPLE(" or " ); |
1073 | } |
1074 | } |
1075 | } |
1076 | if ( fNext != nullptr ) { |
1077 | result += UNICODE_STRING_SIMPLE("; " ); |
1078 | fNext->dumpRules(result); |
1079 | } |
1080 | } |
1081 | |
1082 | |
1083 | UErrorCode |
1084 | RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const { |
1085 | if (U_FAILURE(fInternalStatus)) { |
1086 | return fInternalStatus; |
1087 | } |
1088 | if ( arraySize < capacityOfKeywords-1 ) { |
1089 | keywords[arraySize++]=fKeyword; |
1090 | } |
1091 | else { |
1092 | return U_BUFFER_OVERFLOW_ERROR; |
1093 | } |
1094 | |
1095 | if ( fNext != nullptr ) { |
1096 | return fNext->getKeywords(capacityOfKeywords, keywords, arraySize); |
1097 | } |
1098 | else { |
1099 | return U_ZERO_ERROR; |
1100 | } |
1101 | } |
1102 | |
1103 | UBool |
1104 | RuleChain::isKeyword(const UnicodeString& keywordParam) const { |
1105 | if ( fKeyword == keywordParam ) { |
1106 | return TRUE; |
1107 | } |
1108 | |
1109 | if ( fNext != nullptr ) { |
1110 | return fNext->isKeyword(keywordParam); |
1111 | } |
1112 | else { |
1113 | return FALSE; |
1114 | } |
1115 | } |
1116 | |
1117 | |
1118 | PluralRuleParser::PluralRuleParser() : |
1119 | ruleIndex(0), token(), type(none), prevType(none), |
1120 | curAndConstraint(nullptr), currentChain(nullptr), rangeLowIdx(-1), rangeHiIdx(-1) |
1121 | { |
1122 | } |
1123 | |
1124 | PluralRuleParser::~PluralRuleParser() { |
1125 | } |
1126 | |
1127 | |
1128 | int32_t |
1129 | PluralRuleParser::getNumberValue(const UnicodeString& token) { |
1130 | int32_t i; |
1131 | char digits[128]; |
1132 | |
1133 | i = token.extract(0, token.length(), digits, UPRV_LENGTHOF(digits), US_INV); |
1134 | digits[i]='\0'; |
1135 | |
1136 | return((int32_t)atoi(digits)); |
1137 | } |
1138 | |
1139 | |
1140 | void |
1141 | PluralRuleParser::checkSyntax(UErrorCode &status) |
1142 | { |
1143 | if (U_FAILURE(status)) { |
1144 | return; |
1145 | } |
1146 | if (!(prevType==none || prevType==tSemiColon)) { |
1147 | type = getKeyType(token, type); // Switch token type from tKeyword if we scanned a reserved word, |
1148 | // and we are not at the start of a rule, where a |
1149 | // keyword is expected. |
1150 | } |
1151 | |
1152 | switch(prevType) { |
1153 | case none: |
1154 | case tSemiColon: |
1155 | if (type!=tKeyword && type != tEOF) { |
1156 | status = U_UNEXPECTED_TOKEN; |
1157 | } |
1158 | break; |
1159 | case tVariableN: |
1160 | case tVariableI: |
1161 | case tVariableF: |
1162 | case tVariableT: |
1163 | case tVariableV: |
1164 | if (type != tIs && type != tMod && type != tIn && |
1165 | type != tNot && type != tWithin && type != tEqual && type != tNotEqual) { |
1166 | status = U_UNEXPECTED_TOKEN; |
1167 | } |
1168 | break; |
1169 | case tKeyword: |
1170 | if (type != tColon) { |
1171 | status = U_UNEXPECTED_TOKEN; |
1172 | } |
1173 | break; |
1174 | case tColon: |
1175 | if (!(type == tVariableN || |
1176 | type == tVariableI || |
1177 | type == tVariableF || |
1178 | type == tVariableT || |
1179 | type == tVariableV || |
1180 | type == tAt)) { |
1181 | status = U_UNEXPECTED_TOKEN; |
1182 | } |
1183 | break; |
1184 | case tIs: |
1185 | if ( type != tNumber && type != tNot) { |
1186 | status = U_UNEXPECTED_TOKEN; |
1187 | } |
1188 | break; |
1189 | case tNot: |
1190 | if (type != tNumber && type != tIn && type != tWithin) { |
1191 | status = U_UNEXPECTED_TOKEN; |
1192 | } |
1193 | break; |
1194 | case tMod: |
1195 | case tDot2: |
1196 | case tIn: |
1197 | case tWithin: |
1198 | case tEqual: |
1199 | case tNotEqual: |
1200 | if (type != tNumber) { |
1201 | status = U_UNEXPECTED_TOKEN; |
1202 | } |
1203 | break; |
1204 | case tAnd: |
1205 | case tOr: |
1206 | if ( type != tVariableN && |
1207 | type != tVariableI && |
1208 | type != tVariableF && |
1209 | type != tVariableT && |
1210 | type != tVariableV) { |
1211 | status = U_UNEXPECTED_TOKEN; |
1212 | } |
1213 | break; |
1214 | case tComma: |
1215 | if (type != tNumber) { |
1216 | status = U_UNEXPECTED_TOKEN; |
1217 | } |
1218 | break; |
1219 | case tNumber: |
1220 | if (type != tDot2 && type != tSemiColon && type != tIs && type != tNot && |
1221 | type != tIn && type != tEqual && type != tNotEqual && type != tWithin && |
1222 | type != tAnd && type != tOr && type != tComma && type != tAt && |
1223 | type != tEOF) |
1224 | { |
1225 | status = U_UNEXPECTED_TOKEN; |
1226 | } |
1227 | // TODO: a comma following a number that is not part of a range will be allowed. |
1228 | // It's not the only case of this sort of thing. Parser needs a re-write. |
1229 | break; |
1230 | case tAt: |
1231 | if (type != tDecimal && type != tInteger) { |
1232 | status = U_UNEXPECTED_TOKEN; |
1233 | } |
1234 | break; |
1235 | default: |
1236 | status = U_UNEXPECTED_TOKEN; |
1237 | break; |
1238 | } |
1239 | } |
1240 | |
1241 | |
1242 | /* |
1243 | * Scan the next token from the input rules. |
1244 | * rules and returned token type are in the parser state variables. |
1245 | */ |
1246 | void |
1247 | PluralRuleParser::getNextToken(UErrorCode &status) |
1248 | { |
1249 | if (U_FAILURE(status)) { |
1250 | return; |
1251 | } |
1252 | |
1253 | UChar ch; |
1254 | while (ruleIndex < ruleSrc->length()) { |
1255 | ch = ruleSrc->charAt(ruleIndex); |
1256 | type = charType(ch); |
1257 | if (type != tSpace) { |
1258 | break; |
1259 | } |
1260 | ++(ruleIndex); |
1261 | } |
1262 | if (ruleIndex >= ruleSrc->length()) { |
1263 | type = tEOF; |
1264 | return; |
1265 | } |
1266 | int32_t curIndex= ruleIndex; |
1267 | |
1268 | switch (type) { |
1269 | case tColon: |
1270 | case tSemiColon: |
1271 | case tComma: |
1272 | case tEllipsis: |
1273 | case tTilde: // scanned '~' |
1274 | case tAt: // scanned '@' |
1275 | case tEqual: // scanned '=' |
1276 | case tMod: // scanned '%' |
1277 | // Single character tokens. |
1278 | ++curIndex; |
1279 | break; |
1280 | |
1281 | case tNotEqual: // scanned '!' |
1282 | if (ruleSrc->charAt(curIndex+1) == EQUALS) { |
1283 | curIndex += 2; |
1284 | } else { |
1285 | type = none; |
1286 | curIndex += 1; |
1287 | } |
1288 | break; |
1289 | |
1290 | case tKeyword: |
1291 | while (type == tKeyword && ++curIndex < ruleSrc->length()) { |
1292 | ch = ruleSrc->charAt(curIndex); |
1293 | type = charType(ch); |
1294 | } |
1295 | type = tKeyword; |
1296 | break; |
1297 | |
1298 | case tNumber: |
1299 | while (type == tNumber && ++curIndex < ruleSrc->length()) { |
1300 | ch = ruleSrc->charAt(curIndex); |
1301 | type = charType(ch); |
1302 | } |
1303 | type = tNumber; |
1304 | break; |
1305 | |
1306 | case tDot: |
1307 | // We could be looking at either ".." in a range, or "..." at the end of a sample. |
1308 | if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) { |
1309 | ++curIndex; |
1310 | break; // Single dot |
1311 | } |
1312 | if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) { |
1313 | curIndex += 2; |
1314 | type = tDot2; |
1315 | break; // double dot |
1316 | } |
1317 | type = tEllipsis; |
1318 | curIndex += 3; |
1319 | break; // triple dot |
1320 | |
1321 | default: |
1322 | status = U_UNEXPECTED_TOKEN; |
1323 | ++curIndex; |
1324 | break; |
1325 | } |
1326 | |
1327 | U_ASSERT(ruleIndex <= ruleSrc->length()); |
1328 | U_ASSERT(curIndex <= ruleSrc->length()); |
1329 | token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex); |
1330 | ruleIndex = curIndex; |
1331 | } |
1332 | |
1333 | tokenType |
1334 | PluralRuleParser::charType(UChar ch) { |
1335 | if ((ch>=U_ZERO) && (ch<=U_NINE)) { |
1336 | return tNumber; |
1337 | } |
1338 | if (ch>=LOW_A && ch<=LOW_Z) { |
1339 | return tKeyword; |
1340 | } |
1341 | switch (ch) { |
1342 | case COLON: |
1343 | return tColon; |
1344 | case SPACE: |
1345 | return tSpace; |
1346 | case SEMI_COLON: |
1347 | return tSemiColon; |
1348 | case DOT: |
1349 | return tDot; |
1350 | case COMMA: |
1351 | return tComma; |
1352 | case EXCLAMATION: |
1353 | return tNotEqual; |
1354 | case EQUALS: |
1355 | return tEqual; |
1356 | case PERCENT_SIGN: |
1357 | return tMod; |
1358 | case AT: |
1359 | return tAt; |
1360 | case ELLIPSIS: |
1361 | return tEllipsis; |
1362 | case TILDE: |
1363 | return tTilde; |
1364 | default : |
1365 | return none; |
1366 | } |
1367 | } |
1368 | |
1369 | |
1370 | // Set token type for reserved words in the Plural Rule syntax. |
1371 | |
1372 | tokenType |
1373 | PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType) |
1374 | { |
1375 | if (keyType != tKeyword) { |
1376 | return keyType; |
1377 | } |
1378 | |
1379 | if (0 == token.compare(PK_VAR_N, 1)) { |
1380 | keyType = tVariableN; |
1381 | } else if (0 == token.compare(PK_VAR_I, 1)) { |
1382 | keyType = tVariableI; |
1383 | } else if (0 == token.compare(PK_VAR_F, 1)) { |
1384 | keyType = tVariableF; |
1385 | } else if (0 == token.compare(PK_VAR_T, 1)) { |
1386 | keyType = tVariableT; |
1387 | } else if (0 == token.compare(PK_VAR_V, 1)) { |
1388 | keyType = tVariableV; |
1389 | } else if (0 == token.compare(PK_IS, 2)) { |
1390 | keyType = tIs; |
1391 | } else if (0 == token.compare(PK_AND, 3)) { |
1392 | keyType = tAnd; |
1393 | } else if (0 == token.compare(PK_IN, 2)) { |
1394 | keyType = tIn; |
1395 | } else if (0 == token.compare(PK_WITHIN, 6)) { |
1396 | keyType = tWithin; |
1397 | } else if (0 == token.compare(PK_NOT, 3)) { |
1398 | keyType = tNot; |
1399 | } else if (0 == token.compare(PK_MOD, 3)) { |
1400 | keyType = tMod; |
1401 | } else if (0 == token.compare(PK_OR, 2)) { |
1402 | keyType = tOr; |
1403 | } else if (0 == token.compare(PK_DECIMAL, 7)) { |
1404 | keyType = tDecimal; |
1405 | } else if (0 == token.compare(PK_INTEGER, 7)) { |
1406 | keyType = tInteger; |
1407 | } |
1408 | return keyType; |
1409 | } |
1410 | |
1411 | |
1412 | PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *, UErrorCode& status) |
1413 | : pos(0), fKeywordNames(status) { |
1414 | if (U_FAILURE(status)) { |
1415 | return; |
1416 | } |
1417 | fKeywordNames.setDeleter(uprv_deleteUObject); |
1418 | UBool addKeywordOther = TRUE; |
1419 | RuleChain *node = header; |
1420 | while (node != nullptr) { |
1421 | auto newElem = new UnicodeString(node->fKeyword); |
1422 | if (newElem == nullptr) { |
1423 | status = U_MEMORY_ALLOCATION_ERROR; |
1424 | return; |
1425 | } |
1426 | fKeywordNames.addElement(newElem, status); |
1427 | if (U_FAILURE(status)) { |
1428 | delete newElem; |
1429 | return; |
1430 | } |
1431 | if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) { |
1432 | addKeywordOther = FALSE; |
1433 | } |
1434 | node = node->fNext; |
1435 | } |
1436 | |
1437 | if (addKeywordOther) { |
1438 | auto newElem = new UnicodeString(PLURAL_KEYWORD_OTHER); |
1439 | if (newElem == nullptr) { |
1440 | status = U_MEMORY_ALLOCATION_ERROR; |
1441 | return; |
1442 | } |
1443 | fKeywordNames.addElement(newElem, status); |
1444 | if (U_FAILURE(status)) { |
1445 | delete newElem; |
1446 | return; |
1447 | } |
1448 | } |
1449 | } |
1450 | |
1451 | const UnicodeString* |
1452 | PluralKeywordEnumeration::snext(UErrorCode& status) { |
1453 | if (U_SUCCESS(status) && pos < fKeywordNames.size()) { |
1454 | return (const UnicodeString*)fKeywordNames.elementAt(pos++); |
1455 | } |
1456 | return nullptr; |
1457 | } |
1458 | |
1459 | void |
1460 | PluralKeywordEnumeration::reset(UErrorCode& /*status*/) { |
1461 | pos=0; |
1462 | } |
1463 | |
1464 | int32_t |
1465 | PluralKeywordEnumeration::count(UErrorCode& /*status*/) const { |
1466 | return fKeywordNames.size(); |
1467 | } |
1468 | |
1469 | PluralKeywordEnumeration::~PluralKeywordEnumeration() { |
1470 | } |
1471 | |
1472 | PluralOperand tokenTypeToPluralOperand(tokenType tt) { |
1473 | switch(tt) { |
1474 | case tVariableN: |
1475 | return PLURAL_OPERAND_N; |
1476 | case tVariableI: |
1477 | return PLURAL_OPERAND_I; |
1478 | case tVariableF: |
1479 | return PLURAL_OPERAND_F; |
1480 | case tVariableV: |
1481 | return PLURAL_OPERAND_V; |
1482 | case tVariableT: |
1483 | return PLURAL_OPERAND_T; |
1484 | default: |
1485 | UPRV_UNREACHABLE; // unexpected. |
1486 | } |
1487 | } |
1488 | |
1489 | FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) { |
1490 | init(n, v, f); |
1491 | // check values. TODO make into unit test. |
1492 | // |
1493 | // long visiblePower = (int) Math.pow(10, v); |
1494 | // if (decimalDigits > visiblePower) { |
1495 | // throw new IllegalArgumentException(); |
1496 | // } |
1497 | // double fraction = intValue + (decimalDigits / (double) visiblePower); |
1498 | // if (fraction != source) { |
1499 | // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source)); |
1500 | // if (diff > 0.00000001d) { |
1501 | // throw new IllegalArgumentException(); |
1502 | // } |
1503 | // } |
1504 | } |
1505 | |
1506 | FixedDecimal::FixedDecimal(double n, int32_t v) { |
1507 | // Ugly, but for samples we don't care. |
1508 | init(n, v, getFractionalDigits(n, v)); |
1509 | } |
1510 | |
1511 | FixedDecimal::FixedDecimal(double n) { |
1512 | init(n); |
1513 | } |
1514 | |
1515 | FixedDecimal::FixedDecimal() { |
1516 | init(0, 0, 0); |
1517 | } |
1518 | |
1519 | |
1520 | // Create a FixedDecimal from a UnicodeString containing a number. |
1521 | // Inefficient, but only used for samples, so simplicity trumps efficiency. |
1522 | |
1523 | FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) { |
1524 | CharString cs; |
1525 | cs.appendInvariantChars(num, status); |
1526 | DecimalQuantity dl; |
1527 | dl.setToDecNumber(cs.toStringPiece(), status); |
1528 | if (U_FAILURE(status)) { |
1529 | init(0, 0, 0); |
1530 | return; |
1531 | } |
1532 | int32_t decimalPoint = num.indexOf(DOT); |
1533 | double n = dl.toDouble(); |
1534 | if (decimalPoint == -1) { |
1535 | init(n, 0, 0); |
1536 | } else { |
1537 | int32_t v = num.length() - decimalPoint - 1; |
1538 | init(n, v, getFractionalDigits(n, v)); |
1539 | } |
1540 | } |
1541 | |
1542 | |
1543 | FixedDecimal::FixedDecimal(const FixedDecimal &other) { |
1544 | source = other.source; |
1545 | visibleDecimalDigitCount = other.visibleDecimalDigitCount; |
1546 | decimalDigits = other.decimalDigits; |
1547 | decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros; |
1548 | intValue = other.intValue; |
1549 | _hasIntegerValue = other._hasIntegerValue; |
1550 | isNegative = other.isNegative; |
1551 | _isNaN = other._isNaN; |
1552 | _isInfinite = other._isInfinite; |
1553 | } |
1554 | |
1555 | FixedDecimal::~FixedDecimal() = default; |
1556 | |
1557 | |
1558 | void FixedDecimal::init(double n) { |
1559 | int32_t numFractionDigits = decimals(n); |
1560 | init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits)); |
1561 | } |
1562 | |
1563 | |
1564 | void FixedDecimal::init(double n, int32_t v, int64_t f) { |
1565 | isNegative = n < 0.0; |
1566 | source = fabs(n); |
1567 | _isNaN = uprv_isNaN(source); |
1568 | _isInfinite = uprv_isInfinite(source); |
1569 | if (_isNaN || _isInfinite) { |
1570 | v = 0; |
1571 | f = 0; |
1572 | intValue = 0; |
1573 | _hasIntegerValue = FALSE; |
1574 | } else { |
1575 | intValue = (int64_t)source; |
1576 | _hasIntegerValue = (source == intValue); |
1577 | } |
1578 | |
1579 | visibleDecimalDigitCount = v; |
1580 | decimalDigits = f; |
1581 | if (f == 0) { |
1582 | decimalDigitsWithoutTrailingZeros = 0; |
1583 | } else { |
1584 | int64_t fdwtz = f; |
1585 | while ((fdwtz%10) == 0) { |
1586 | fdwtz /= 10; |
1587 | } |
1588 | decimalDigitsWithoutTrailingZeros = fdwtz; |
1589 | } |
1590 | } |
1591 | |
1592 | |
1593 | // Fast path only exact initialization. Return true if successful. |
1594 | // Note: Do not multiply by 10 each time through loop, rounding cruft can build |
1595 | // up that makes the check for an integer result fail. |
1596 | // A single multiply of the original number works more reliably. |
1597 | static int32_t p10[] = {1, 10, 100, 1000, 10000}; |
1598 | UBool FixedDecimal::quickInit(double n) { |
1599 | UBool success = FALSE; |
1600 | n = fabs(n); |
1601 | int32_t numFractionDigits; |
1602 | for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) { |
1603 | double scaledN = n * p10[numFractionDigits]; |
1604 | if (scaledN == floor(scaledN)) { |
1605 | success = TRUE; |
1606 | break; |
1607 | } |
1608 | } |
1609 | if (success) { |
1610 | init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits)); |
1611 | } |
1612 | return success; |
1613 | } |
1614 | |
1615 | |
1616 | |
1617 | int32_t FixedDecimal::decimals(double n) { |
1618 | // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros. |
1619 | // fastpath the common cases, integers or fractions with 3 or fewer digits |
1620 | n = fabs(n); |
1621 | for (int ndigits=0; ndigits<=3; ndigits++) { |
1622 | double scaledN = n * p10[ndigits]; |
1623 | if (scaledN == floor(scaledN)) { |
1624 | return ndigits; |
1625 | } |
1626 | } |
1627 | |
1628 | // Slow path, convert with sprintf, parse converted output. |
1629 | char buf[30] = {0}; |
1630 | sprintf(buf, "%1.15e" , n); |
1631 | // formatted number looks like this: 1.234567890123457e-01 |
1632 | int exponent = atoi(buf+18); |
1633 | int numFractionDigits = 15; |
1634 | for (int i=16; ; --i) { |
1635 | if (buf[i] != '0') { |
1636 | break; |
1637 | } |
1638 | --numFractionDigits; |
1639 | } |
1640 | numFractionDigits -= exponent; // Fraction part of fixed point representation. |
1641 | return numFractionDigits; |
1642 | } |
1643 | |
1644 | |
1645 | // Get the fraction digits of a double, represented as an integer. |
1646 | // v is the number of visible fraction digits in the displayed form of the number. |
1647 | // Example: n = 1001.234, v = 6, result = 234000 |
1648 | // TODO: need to think through how this is used in the plural rule context. |
1649 | // This function can easily encounter integer overflow, |
1650 | // and can easily return noise digits when the precision of a double is exceeded. |
1651 | |
1652 | int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) { |
1653 | if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) { |
1654 | return 0; |
1655 | } |
1656 | n = fabs(n); |
1657 | double fract = n - floor(n); |
1658 | switch (v) { |
1659 | case 1: return (int64_t)(fract*10.0 + 0.5); |
1660 | case 2: return (int64_t)(fract*100.0 + 0.5); |
1661 | case 3: return (int64_t)(fract*1000.0 + 0.5); |
1662 | default: |
1663 | double scaled = floor(fract * pow(10.0, (double)v) + 0.5); |
1664 | if (scaled > U_INT64_MAX) { |
1665 | return U_INT64_MAX; |
1666 | } else { |
1667 | return (int64_t)scaled; |
1668 | } |
1669 | } |
1670 | } |
1671 | |
1672 | |
1673 | void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) { |
1674 | int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount; |
1675 | if (numTrailingFractionZeros > 0) { |
1676 | for (int32_t i=0; i<numTrailingFractionZeros; i++) { |
1677 | // Do not let the decimalDigits value overflow if there are many trailing zeros. |
1678 | // Limit the value to 18 digits, the most that a 64 bit int can fully represent. |
1679 | if (decimalDigits >= 100000000000000000LL) { |
1680 | break; |
1681 | } |
1682 | decimalDigits *= 10; |
1683 | } |
1684 | visibleDecimalDigitCount += numTrailingFractionZeros; |
1685 | } |
1686 | } |
1687 | |
1688 | |
1689 | double FixedDecimal::getPluralOperand(PluralOperand operand) const { |
1690 | switch(operand) { |
1691 | case PLURAL_OPERAND_N: return source; |
1692 | case PLURAL_OPERAND_I: return static_cast<double>(intValue); |
1693 | case PLURAL_OPERAND_F: return static_cast<double>(decimalDigits); |
1694 | case PLURAL_OPERAND_T: return static_cast<double>(decimalDigitsWithoutTrailingZeros); |
1695 | case PLURAL_OPERAND_V: return visibleDecimalDigitCount; |
1696 | case PLURAL_OPERAND_E: return 0; |
1697 | default: |
1698 | UPRV_UNREACHABLE; // unexpected. |
1699 | } |
1700 | } |
1701 | |
1702 | bool FixedDecimal::isNaN() const { |
1703 | return _isNaN; |
1704 | } |
1705 | |
1706 | bool FixedDecimal::isInfinite() const { |
1707 | return _isInfinite; |
1708 | } |
1709 | |
1710 | bool FixedDecimal::hasIntegerValue() const { |
1711 | return _hasIntegerValue; |
1712 | } |
1713 | |
1714 | bool FixedDecimal::isNanOrInfinity() const { |
1715 | return _isNaN || _isInfinite; |
1716 | } |
1717 | |
1718 | int32_t FixedDecimal::getVisibleFractionDigitCount() const { |
1719 | return visibleDecimalDigitCount; |
1720 | } |
1721 | |
1722 | |
1723 | |
1724 | PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) { |
1725 | fOpenStatus = status; |
1726 | if (U_FAILURE(status)) { |
1727 | return; |
1728 | } |
1729 | fOpenStatus = U_ZERO_ERROR; // clear any warnings. |
1730 | LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "plurals" , &fOpenStatus)); |
1731 | fLocales = ures_getByKey(rb.getAlias(), "locales" , nullptr, &fOpenStatus); |
1732 | } |
1733 | |
1734 | PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() { |
1735 | ures_close(fLocales); |
1736 | ures_close(fRes); |
1737 | fLocales = nullptr; |
1738 | fRes = nullptr; |
1739 | } |
1740 | |
1741 | const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) { |
1742 | if (U_FAILURE(status)) { |
1743 | return nullptr; |
1744 | } |
1745 | if (U_FAILURE(fOpenStatus)) { |
1746 | status = fOpenStatus; |
1747 | return nullptr; |
1748 | } |
1749 | fRes = ures_getNextResource(fLocales, fRes, &status); |
1750 | if (fRes == nullptr || U_FAILURE(status)) { |
1751 | if (status == U_INDEX_OUTOFBOUNDS_ERROR) { |
1752 | status = U_ZERO_ERROR; |
1753 | } |
1754 | return nullptr; |
1755 | } |
1756 | const char *result = ures_getKey(fRes); |
1757 | if (resultLength != nullptr) { |
1758 | *resultLength = static_cast<int32_t>(uprv_strlen(result)); |
1759 | } |
1760 | return result; |
1761 | } |
1762 | |
1763 | |
1764 | void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) { |
1765 | if (U_FAILURE(status)) { |
1766 | return; |
1767 | } |
1768 | if (U_FAILURE(fOpenStatus)) { |
1769 | status = fOpenStatus; |
1770 | return; |
1771 | } |
1772 | ures_resetIterator(fLocales); |
1773 | } |
1774 | |
1775 | int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const { |
1776 | if (U_FAILURE(status)) { |
1777 | return 0; |
1778 | } |
1779 | if (U_FAILURE(fOpenStatus)) { |
1780 | status = fOpenStatus; |
1781 | return 0; |
1782 | } |
1783 | return ures_getSize(fLocales); |
1784 | } |
1785 | |
1786 | U_NAMESPACE_END |
1787 | |
1788 | |
1789 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
1790 | |
1791 | //eof |
1792 | |