1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 1997-2015, International Business Machines Corporation
6* and others. All Rights Reserved.
7*******************************************************************************
8*/
9
10#include "unicode/utypes.h"
11#include "utypeinfo.h" // for 'typeid' to work
12
13#include "unicode/rbnf.h"
14
15#if U_HAVE_RBNF
16
17#include "unicode/normlzr.h"
18#include "unicode/plurfmt.h"
19#include "unicode/tblcoll.h"
20#include "unicode/uchar.h"
21#include "unicode/ucol.h"
22#include "unicode/uloc.h"
23#include "unicode/unum.h"
24#include "unicode/ures.h"
25#include "unicode/ustring.h"
26#include "unicode/utf16.h"
27#include "unicode/udata.h"
28#include "unicode/udisplaycontext.h"
29#include "unicode/brkiter.h"
30#include "unicode/ucasemap.h"
31
32#include "cmemory.h"
33#include "cstring.h"
34#include "patternprops.h"
35#include "uresimp.h"
36#include "nfrs.h"
37#include "number_decimalquantity.h"
38
39// debugging
40// #define RBNF_DEBUG
41
42#ifdef RBNF_DEBUG
43#include <stdio.h>
44#endif
45
46#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
47
48static const UChar gPercentPercent[] =
49{
50 0x25, 0x25, 0
51}; /* "%%" */
52
53// All urbnf objects are created through openRules, so we init all of the
54// Unicode string constants required by rbnf, nfrs, or nfr here.
55static const UChar gLenientParse[] =
56{
57 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
58}; /* "%%lenient-parse:" */
59static const UChar gSemiColon = 0x003B;
60static const UChar gSemiPercent[] =
61{
62 0x3B, 0x25, 0
63}; /* ";%" */
64
65#define kSomeNumberOfBitsDiv2 22
66#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
67#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
68
69U_NAMESPACE_BEGIN
70
71using number::impl::DecimalQuantity;
72
73UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
74
75/*
76This is a utility class. It does not use ICU's RTTI.
77If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
78Please make sure that intltest passes on Windows in Release mode,
79since the string pooling per compilation unit will mess up how RTTI works.
80The RTTI code was also removed due to lack of code coverage.
81*/
82class LocalizationInfo : public UMemory {
83protected:
84 virtual ~LocalizationInfo();
85 uint32_t refcount;
86
87public:
88 LocalizationInfo() : refcount(0) {}
89
90 LocalizationInfo* ref(void) {
91 ++refcount;
92 return this;
93 }
94
95 LocalizationInfo* unref(void) {
96 if (refcount && --refcount == 0) {
97 delete this;
98 }
99 return NULL;
100 }
101
102 virtual UBool operator==(const LocalizationInfo* rhs) const;
103 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
104
105 virtual int32_t getNumberOfRuleSets(void) const = 0;
106 virtual const UChar* getRuleSetName(int32_t index) const = 0;
107 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
108 virtual const UChar* getLocaleName(int32_t index) const = 0;
109 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
110
111 virtual int32_t indexForLocale(const UChar* locale) const;
112 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
113
114// virtual UClassID getDynamicClassID() const = 0;
115// static UClassID getStaticClassID(void);
116};
117
118LocalizationInfo::~LocalizationInfo() {}
119
120//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
121
122// if both strings are NULL, this returns TRUE
123static UBool
124streq(const UChar* lhs, const UChar* rhs) {
125 if (rhs == lhs) {
126 return TRUE;
127 }
128 if (lhs && rhs) {
129 return u_strcmp(lhs, rhs) == 0;
130 }
131 return FALSE;
132}
133
134UBool
135LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
136 if (rhs) {
137 if (this == rhs) {
138 return TRUE;
139 }
140
141 int32_t rsc = getNumberOfRuleSets();
142 if (rsc == rhs->getNumberOfRuleSets()) {
143 for (int i = 0; i < rsc; ++i) {
144 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
145 return FALSE;
146 }
147 }
148 int32_t dlc = getNumberOfDisplayLocales();
149 if (dlc == rhs->getNumberOfDisplayLocales()) {
150 for (int i = 0; i < dlc; ++i) {
151 const UChar* locale = getLocaleName(i);
152 int32_t ix = rhs->indexForLocale(locale);
153 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
154 if (!streq(locale, rhs->getLocaleName(ix))) {
155 return FALSE;
156 }
157 for (int j = 0; j < rsc; ++j) {
158 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
159 return FALSE;
160 }
161 }
162 }
163 return TRUE;
164 }
165 }
166 }
167 return FALSE;
168}
169
170int32_t
171LocalizationInfo::indexForLocale(const UChar* locale) const {
172 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
173 if (streq(locale, getLocaleName(i))) {
174 return i;
175 }
176 }
177 return -1;
178}
179
180int32_t
181LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
182 if (ruleset) {
183 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
184 if (streq(ruleset, getRuleSetName(i))) {
185 return i;
186 }
187 }
188 }
189 return -1;
190}
191
192
193typedef void (*Fn_Deleter)(void*);
194
195class VArray {
196 void** buf;
197 int32_t cap;
198 int32_t size;
199 Fn_Deleter deleter;
200public:
201 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
202
203 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
204
205 ~VArray() {
206 if (deleter) {
207 for (int i = 0; i < size; ++i) {
208 (*deleter)(buf[i]);
209 }
210 }
211 uprv_free(buf);
212 }
213
214 int32_t length() {
215 return size;
216 }
217
218 void add(void* elem, UErrorCode& status) {
219 if (U_SUCCESS(status)) {
220 if (size == cap) {
221 if (cap == 0) {
222 cap = 1;
223 } else if (cap < 256) {
224 cap *= 2;
225 } else {
226 cap += 256;
227 }
228 if (buf == NULL) {
229 buf = (void**)uprv_malloc(cap * sizeof(void*));
230 } else {
231 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
232 }
233 if (buf == NULL) {
234 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
235 status = U_MEMORY_ALLOCATION_ERROR;
236 return;
237 }
238 void* start = &buf[size];
239 size_t count = (cap - size) * sizeof(void*);
240 uprv_memset(start, 0, count); // fill with nulls, just because
241 }
242 buf[size++] = elem;
243 }
244 }
245
246 void** release(void) {
247 void** result = buf;
248 buf = NULL;
249 cap = 0;
250 size = 0;
251 return result;
252 }
253};
254
255class LocDataParser;
256
257class StringLocalizationInfo : public LocalizationInfo {
258 UChar* info;
259 UChar*** data;
260 int32_t numRuleSets;
261 int32_t numLocales;
262
263friend class LocDataParser;
264
265 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
266 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
267 {
268 }
269
270public:
271 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
272
273 virtual ~StringLocalizationInfo();
274 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
275 virtual const UChar* getRuleSetName(int32_t index) const;
276 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
277 virtual const UChar* getLocaleName(int32_t index) const;
278 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
279
280// virtual UClassID getDynamicClassID() const;
281// static UClassID getStaticClassID(void);
282
283private:
284 void init(UErrorCode& status) const;
285};
286
287
288enum {
289 OPEN_ANGLE = 0x003c, /* '<' */
290 CLOSE_ANGLE = 0x003e, /* '>' */
291 COMMA = 0x002c,
292 TICK = 0x0027,
293 QUOTE = 0x0022,
294 SPACE = 0x0020
295};
296
297/**
298 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
299 */
300class LocDataParser {
301 UChar* data;
302 const UChar* e;
303 UChar* p;
304 UChar ch;
305 UParseError& pe;
306 UErrorCode& ec;
307
308public:
309 LocDataParser(UParseError& parseError, UErrorCode& status)
310 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
311 ~LocDataParser() {}
312
313 /*
314 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
315 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
316 */
317 StringLocalizationInfo* parse(UChar* data, int32_t len);
318
319private:
320
321 inline void inc(void) {
322 ++p;
323 ch = 0xffff;
324 }
325 inline UBool checkInc(UChar c) {
326 if (p < e && (ch == c || *p == c)) {
327 inc();
328 return TRUE;
329 }
330 return FALSE;
331 }
332 inline UBool check(UChar c) {
333 return p < e && (ch == c || *p == c);
334 }
335 inline void skipWhitespace(void) {
336 while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) {
337 inc();
338 }
339 }
340 inline UBool inList(UChar c, const UChar* list) const {
341 if (*list == SPACE && PatternProps::isWhiteSpace(c)) {
342 return TRUE;
343 }
344 while (*list && *list != c) {
345 ++list;
346 }
347 return *list == c;
348 }
349 void parseError(const char* msg);
350
351 StringLocalizationInfo* doParse(void);
352
353 UChar** nextArray(int32_t& requiredLength);
354 UChar* nextString(void);
355};
356
357#ifdef RBNF_DEBUG
358#define ERROR(msg) UPRV_BLOCK_MACRO_BEGIN { \
359 parseError(msg); \
360 return NULL; \
361} UPRV_BLOCK_MACRO_END
362#define EXPLANATION_ARG explanationArg
363#else
364#define ERROR(msg) UPRV_BLOCK_MACRO_BEGIN { \
365 parseError(NULL); \
366 return NULL; \
367} UPRV_BLOCK_MACRO_END
368#define EXPLANATION_ARG
369#endif
370
371
372static const UChar DQUOTE_STOPLIST[] = {
373 QUOTE, 0
374};
375
376static const UChar SQUOTE_STOPLIST[] = {
377 TICK, 0
378};
379
380static const UChar NOQUOTE_STOPLIST[] = {
381 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
382};
383
384static void
385DeleteFn(void* p) {
386 uprv_free(p);
387}
388
389StringLocalizationInfo*
390LocDataParser::parse(UChar* _data, int32_t len) {
391 if (U_FAILURE(ec)) {
392 if (_data) uprv_free(_data);
393 return NULL;
394 }
395
396 pe.line = 0;
397 pe.offset = -1;
398 pe.postContext[0] = 0;
399 pe.preContext[0] = 0;
400
401 if (_data == NULL) {
402 ec = U_ILLEGAL_ARGUMENT_ERROR;
403 return NULL;
404 }
405
406 if (len <= 0) {
407 ec = U_ILLEGAL_ARGUMENT_ERROR;
408 uprv_free(_data);
409 return NULL;
410 }
411
412 data = _data;
413 e = data + len;
414 p = _data;
415 ch = 0xffff;
416
417 return doParse();
418}
419
420
421StringLocalizationInfo*
422LocDataParser::doParse(void) {
423 skipWhitespace();
424 if (!checkInc(OPEN_ANGLE)) {
425 ERROR("Missing open angle");
426 } else {
427 VArray array(DeleteFn);
428 UBool mightHaveNext = TRUE;
429 int32_t requiredLength = -1;
430 while (mightHaveNext) {
431 mightHaveNext = FALSE;
432 UChar** elem = nextArray(requiredLength);
433 skipWhitespace();
434 UBool haveComma = check(COMMA);
435 if (elem) {
436 array.add(elem, ec);
437 if (haveComma) {
438 inc();
439 mightHaveNext = TRUE;
440 }
441 } else if (haveComma) {
442 ERROR("Unexpected character");
443 }
444 }
445
446 skipWhitespace();
447 if (!checkInc(CLOSE_ANGLE)) {
448 if (check(OPEN_ANGLE)) {
449 ERROR("Missing comma in outer array");
450 } else {
451 ERROR("Missing close angle bracket in outer array");
452 }
453 }
454
455 skipWhitespace();
456 if (p != e) {
457 ERROR("Extra text after close of localization data");
458 }
459
460 array.add(NULL, ec);
461 if (U_SUCCESS(ec)) {
462 int32_t numLocs = array.length() - 2; // subtract first, NULL
463 UChar*** result = (UChar***)array.release();
464
465 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
466 }
467 }
468
469 ERROR("Unknown error");
470}
471
472UChar**
473LocDataParser::nextArray(int32_t& requiredLength) {
474 if (U_FAILURE(ec)) {
475 return NULL;
476 }
477
478 skipWhitespace();
479 if (!checkInc(OPEN_ANGLE)) {
480 ERROR("Missing open angle");
481 }
482
483 VArray array;
484 UBool mightHaveNext = TRUE;
485 while (mightHaveNext) {
486 mightHaveNext = FALSE;
487 UChar* elem = nextString();
488 skipWhitespace();
489 UBool haveComma = check(COMMA);
490 if (elem) {
491 array.add(elem, ec);
492 if (haveComma) {
493 inc();
494 mightHaveNext = TRUE;
495 }
496 } else if (haveComma) {
497 ERROR("Unexpected comma");
498 }
499 }
500 skipWhitespace();
501 if (!checkInc(CLOSE_ANGLE)) {
502 if (check(OPEN_ANGLE)) {
503 ERROR("Missing close angle bracket in inner array");
504 } else {
505 ERROR("Missing comma in inner array");
506 }
507 }
508
509 array.add(NULL, ec);
510 if (U_SUCCESS(ec)) {
511 if (requiredLength == -1) {
512 requiredLength = array.length() + 1;
513 } else if (array.length() != requiredLength) {
514 ec = U_ILLEGAL_ARGUMENT_ERROR;
515 ERROR("Array not of required length");
516 }
517
518 return (UChar**)array.release();
519 }
520 ERROR("Unknown Error");
521}
522
523UChar*
524LocDataParser::nextString() {
525 UChar* result = NULL;
526
527 skipWhitespace();
528 if (p < e) {
529 const UChar* terminators;
530 UChar c = *p;
531 UBool haveQuote = c == QUOTE || c == TICK;
532 if (haveQuote) {
533 inc();
534 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
535 } else {
536 terminators = NOQUOTE_STOPLIST;
537 }
538 UChar* start = p;
539 while (p < e && !inList(*p, terminators)) ++p;
540 if (p == e) {
541 ERROR("Unexpected end of data");
542 }
543
544 UChar x = *p;
545 if (p > start) {
546 ch = x;
547 *p = 0x0; // terminate by writing to data
548 result = start; // just point into data
549 }
550 if (haveQuote) {
551 if (x != c) {
552 ERROR("Missing matching quote");
553 } else if (p == start) {
554 ERROR("Empty string");
555 }
556 inc();
557 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
558 ERROR("Unexpected character in string");
559 }
560 }
561
562 // ok for there to be no next string
563 return result;
564}
565
566void LocDataParser::parseError(const char* EXPLANATION_ARG)
567{
568 if (!data) {
569 return;
570 }
571
572 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
573 if (start < data) {
574 start = data;
575 }
576 for (UChar* x = p; --x >= start;) {
577 if (!*x) {
578 start = x+1;
579 break;
580 }
581 }
582 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
583 if (limit > e) {
584 limit = e;
585 }
586 u_strncpy(pe.preContext, start, (int32_t)(p-start));
587 pe.preContext[p-start] = 0;
588 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
589 pe.postContext[limit-p] = 0;
590 pe.offset = (int32_t)(p - data);
591
592#ifdef RBNF_DEBUG
593 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
594
595 UnicodeString msg;
596 msg.append(start, p - start);
597 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
598 msg.append(p, limit-p);
599 msg.append(UNICODE_STRING_SIMPLE("'"));
600
601 char buf[128];
602 int32_t len = msg.extract(0, msg.length(), buf, 128);
603 if (len >= 128) {
604 buf[127] = 0;
605 } else {
606 buf[len] = 0;
607 }
608 fprintf(stderr, "%s\n", buf);
609 fflush(stderr);
610#endif
611
612 uprv_free(data);
613 data = NULL;
614 p = NULL;
615 e = NULL;
616
617 if (U_SUCCESS(ec)) {
618 ec = U_PARSE_ERROR;
619 }
620}
621
622//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
623
624StringLocalizationInfo*
625StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
626 if (U_FAILURE(status)) {
627 return NULL;
628 }
629
630 int32_t len = info.length();
631 if (len == 0) {
632 return NULL; // no error;
633 }
634
635 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
636 if (!p) {
637 status = U_MEMORY_ALLOCATION_ERROR;
638 return NULL;
639 }
640 info.extract(p, len, status);
641 if (!U_FAILURE(status)) {
642 status = U_ZERO_ERROR; // clear warning about non-termination
643 }
644
645 LocDataParser parser(perror, status);
646 return parser.parse(p, len);
647}
648
649StringLocalizationInfo::~StringLocalizationInfo() {
650 for (UChar*** p = (UChar***)data; *p; ++p) {
651 // remaining data is simply pointer into our unicode string data.
652 if (*p) uprv_free(*p);
653 }
654 if (data) uprv_free(data);
655 if (info) uprv_free(info);
656}
657
658
659const UChar*
660StringLocalizationInfo::getRuleSetName(int32_t index) const {
661 if (index >= 0 && index < getNumberOfRuleSets()) {
662 return data[0][index];
663 }
664 return NULL;
665}
666
667const UChar*
668StringLocalizationInfo::getLocaleName(int32_t index) const {
669 if (index >= 0 && index < getNumberOfDisplayLocales()) {
670 return data[index+1][0];
671 }
672 return NULL;
673}
674
675const UChar*
676StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
677 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
678 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
679 return data[localeIndex+1][ruleIndex+1];
680 }
681 return NULL;
682}
683
684// ----------
685
686RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
687 const UnicodeString& locs,
688 const Locale& alocale, UParseError& perror, UErrorCode& status)
689 : fRuleSets(NULL)
690 , ruleSetDescriptions(NULL)
691 , numRuleSets(0)
692 , defaultRuleSet(NULL)
693 , locale(alocale)
694 , collator(NULL)
695 , decimalFormatSymbols(NULL)
696 , defaultInfinityRule(NULL)
697 , defaultNaNRule(NULL)
698 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
699 , lenient(FALSE)
700 , lenientParseRules(NULL)
701 , localizations(NULL)
702 , capitalizationInfoSet(FALSE)
703 , capitalizationForUIListMenu(FALSE)
704 , capitalizationForStandAlone(FALSE)
705 , capitalizationBrkIter(NULL)
706{
707 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
708 init(description, locinfo, perror, status);
709}
710
711RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
712 const UnicodeString& locs,
713 UParseError& perror, UErrorCode& status)
714 : fRuleSets(NULL)
715 , ruleSetDescriptions(NULL)
716 , numRuleSets(0)
717 , defaultRuleSet(NULL)
718 , locale(Locale::getDefault())
719 , collator(NULL)
720 , decimalFormatSymbols(NULL)
721 , defaultInfinityRule(NULL)
722 , defaultNaNRule(NULL)
723 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
724 , lenient(FALSE)
725 , lenientParseRules(NULL)
726 , localizations(NULL)
727 , capitalizationInfoSet(FALSE)
728 , capitalizationForUIListMenu(FALSE)
729 , capitalizationForStandAlone(FALSE)
730 , capitalizationBrkIter(NULL)
731{
732 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
733 init(description, locinfo, perror, status);
734}
735
736RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
737 LocalizationInfo* info,
738 const Locale& alocale, UParseError& perror, UErrorCode& status)
739 : fRuleSets(NULL)
740 , ruleSetDescriptions(NULL)
741 , numRuleSets(0)
742 , defaultRuleSet(NULL)
743 , locale(alocale)
744 , collator(NULL)
745 , decimalFormatSymbols(NULL)
746 , defaultInfinityRule(NULL)
747 , defaultNaNRule(NULL)
748 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
749 , lenient(FALSE)
750 , lenientParseRules(NULL)
751 , localizations(NULL)
752 , capitalizationInfoSet(FALSE)
753 , capitalizationForUIListMenu(FALSE)
754 , capitalizationForStandAlone(FALSE)
755 , capitalizationBrkIter(NULL)
756{
757 init(description, info, perror, status);
758}
759
760RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
761 UParseError& perror,
762 UErrorCode& status)
763 : fRuleSets(NULL)
764 , ruleSetDescriptions(NULL)
765 , numRuleSets(0)
766 , defaultRuleSet(NULL)
767 , locale(Locale::getDefault())
768 , collator(NULL)
769 , decimalFormatSymbols(NULL)
770 , defaultInfinityRule(NULL)
771 , defaultNaNRule(NULL)
772 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
773 , lenient(FALSE)
774 , lenientParseRules(NULL)
775 , localizations(NULL)
776 , capitalizationInfoSet(FALSE)
777 , capitalizationForUIListMenu(FALSE)
778 , capitalizationForStandAlone(FALSE)
779 , capitalizationBrkIter(NULL)
780{
781 init(description, NULL, perror, status);
782}
783
784RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
785 const Locale& aLocale,
786 UParseError& perror,
787 UErrorCode& status)
788 : fRuleSets(NULL)
789 , ruleSetDescriptions(NULL)
790 , numRuleSets(0)
791 , defaultRuleSet(NULL)
792 , locale(aLocale)
793 , collator(NULL)
794 , decimalFormatSymbols(NULL)
795 , defaultInfinityRule(NULL)
796 , defaultNaNRule(NULL)
797 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
798 , lenient(FALSE)
799 , lenientParseRules(NULL)
800 , localizations(NULL)
801 , capitalizationInfoSet(FALSE)
802 , capitalizationForUIListMenu(FALSE)
803 , capitalizationForStandAlone(FALSE)
804 , capitalizationBrkIter(NULL)
805{
806 init(description, NULL, perror, status);
807}
808
809RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
810 : fRuleSets(NULL)
811 , ruleSetDescriptions(NULL)
812 , numRuleSets(0)
813 , defaultRuleSet(NULL)
814 , locale(alocale)
815 , collator(NULL)
816 , decimalFormatSymbols(NULL)
817 , defaultInfinityRule(NULL)
818 , defaultNaNRule(NULL)
819 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
820 , lenient(FALSE)
821 , lenientParseRules(NULL)
822 , localizations(NULL)
823 , capitalizationInfoSet(FALSE)
824 , capitalizationForUIListMenu(FALSE)
825 , capitalizationForStandAlone(FALSE)
826 , capitalizationBrkIter(NULL)
827{
828 if (U_FAILURE(status)) {
829 return;
830 }
831
832 const char* rules_tag = "RBNFRules";
833 const char* fmt_tag = "";
834 switch (tag) {
835 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
836 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
837 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
838 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
839 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
840 }
841
842 // TODO: read localization info from resource
843 LocalizationInfo* locinfo = NULL;
844
845 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
846 if (U_SUCCESS(status)) {
847 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
848 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
849
850 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
851 if (U_FAILURE(status)) {
852 ures_close(nfrb);
853 }
854 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
855 if (U_FAILURE(status)) {
856 ures_close(rbnfRules);
857 ures_close(nfrb);
858 return;
859 }
860
861 UnicodeString desc;
862 while (ures_hasNext(ruleSets)) {
863 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
864 }
865 UParseError perror;
866
867 init(desc, locinfo, perror, status);
868
869 ures_close(ruleSets);
870 ures_close(rbnfRules);
871 }
872 ures_close(nfrb);
873}
874
875RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
876 : NumberFormat(rhs)
877 , fRuleSets(NULL)
878 , ruleSetDescriptions(NULL)
879 , numRuleSets(0)
880 , defaultRuleSet(NULL)
881 , locale(rhs.locale)
882 , collator(NULL)
883 , decimalFormatSymbols(NULL)
884 , defaultInfinityRule(NULL)
885 , defaultNaNRule(NULL)
886 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
887 , lenient(FALSE)
888 , lenientParseRules(NULL)
889 , localizations(NULL)
890 , capitalizationInfoSet(FALSE)
891 , capitalizationForUIListMenu(FALSE)
892 , capitalizationForStandAlone(FALSE)
893 , capitalizationBrkIter(NULL)
894{
895 this->operator=(rhs);
896}
897
898// --------
899
900RuleBasedNumberFormat&
901RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
902{
903 if (this == &rhs) {
904 return *this;
905 }
906 NumberFormat::operator=(rhs);
907 UErrorCode status = U_ZERO_ERROR;
908 dispose();
909 locale = rhs.locale;
910 lenient = rhs.lenient;
911
912 UParseError perror;
913 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
914 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
915 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
916 setRoundingMode(rhs.getRoundingMode());
917
918 capitalizationInfoSet = rhs.capitalizationInfoSet;
919 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
920 capitalizationForStandAlone = rhs.capitalizationForStandAlone;
921#if !UCONFIG_NO_BREAK_ITERATION
922 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
923#endif
924
925 return *this;
926}
927
928RuleBasedNumberFormat::~RuleBasedNumberFormat()
929{
930 dispose();
931}
932
933RuleBasedNumberFormat*
934RuleBasedNumberFormat::clone() const
935{
936 return new RuleBasedNumberFormat(*this);
937}
938
939UBool
940RuleBasedNumberFormat::operator==(const Format& other) const
941{
942 if (this == &other) {
943 return TRUE;
944 }
945
946 if (typeid(*this) == typeid(other)) {
947 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
948 // test for capitalization info equality is adequately handled
949 // by the NumberFormat test for fCapitalizationContext equality;
950 // the info here is just derived from that.
951 if (locale == rhs.locale &&
952 lenient == rhs.lenient &&
953 (localizations == NULL
954 ? rhs.localizations == NULL
955 : (rhs.localizations == NULL
956 ? FALSE
957 : *localizations == rhs.localizations))) {
958
959 NFRuleSet** p = fRuleSets;
960 NFRuleSet** q = rhs.fRuleSets;
961 if (p == NULL) {
962 return q == NULL;
963 } else if (q == NULL) {
964 return FALSE;
965 }
966 while (*p && *q && (**p == **q)) {
967 ++p;
968 ++q;
969 }
970 return *q == NULL && *p == NULL;
971 }
972 }
973
974 return FALSE;
975}
976
977UnicodeString
978RuleBasedNumberFormat::getRules() const
979{
980 UnicodeString result;
981 if (fRuleSets != NULL) {
982 for (NFRuleSet** p = fRuleSets; *p; ++p) {
983 (*p)->appendRules(result);
984 }
985 }
986 return result;
987}
988
989UnicodeString
990RuleBasedNumberFormat::getRuleSetName(int32_t index) const
991{
992 if (localizations) {
993 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
994 return string;
995 }
996 else if (fRuleSets) {
997 UnicodeString result;
998 for (NFRuleSet** p = fRuleSets; *p; ++p) {
999 NFRuleSet* rs = *p;
1000 if (rs->isPublic()) {
1001 if (--index == -1) {
1002 rs->getName(result);
1003 return result;
1004 }
1005 }
1006 }
1007 }
1008 UnicodeString empty;
1009 return empty;
1010}
1011
1012int32_t
1013RuleBasedNumberFormat::getNumberOfRuleSetNames() const
1014{
1015 int32_t result = 0;
1016 if (localizations) {
1017 result = localizations->getNumberOfRuleSets();
1018 }
1019 else if (fRuleSets) {
1020 for (NFRuleSet** p = fRuleSets; *p; ++p) {
1021 if ((**p).isPublic()) {
1022 ++result;
1023 }
1024 }
1025 }
1026 return result;
1027}
1028
1029int32_t
1030RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
1031 if (localizations) {
1032 return localizations->getNumberOfDisplayLocales();
1033 }
1034 return 0;
1035}
1036
1037Locale
1038RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
1039 if (U_FAILURE(status)) {
1040 return Locale("");
1041 }
1042 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
1043 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
1044 char buffer[64];
1045 int32_t cap = name.length() + 1;
1046 char* bp = buffer;
1047 if (cap > 64) {
1048 bp = (char *)uprv_malloc(cap);
1049 if (bp == NULL) {
1050 status = U_MEMORY_ALLOCATION_ERROR;
1051 return Locale("");
1052 }
1053 }
1054 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
1055 Locale retLocale(bp);
1056 if (bp != buffer) {
1057 uprv_free(bp);
1058 }
1059 return retLocale;
1060 }
1061 status = U_ILLEGAL_ARGUMENT_ERROR;
1062 Locale retLocale;
1063 return retLocale;
1064}
1065
1066UnicodeString
1067RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1068 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1069 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1070 int32_t len = localeName.length();
1071 UChar* localeStr = localeName.getBuffer(len + 1);
1072 while (len >= 0) {
1073 localeStr[len] = 0;
1074 int32_t ix = localizations->indexForLocale(localeStr);
1075 if (ix >= 0) {
1076 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1077 return name;
1078 }
1079
1080 // trim trailing portion, skipping over ommitted sections
1081 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1082 while (len > 0 && localeStr[len-1] == 0x005F) --len;
1083 }
1084 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1085 return name;
1086 }
1087 UnicodeString bogus;
1088 bogus.setToBogus();
1089 return bogus;
1090}
1091
1092UnicodeString
1093RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1094 if (localizations) {
1095 UnicodeString rsn(ruleSetName);
1096 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1097 return getRuleSetDisplayName(ix, localeParam);
1098 }
1099 UnicodeString bogus;
1100 bogus.setToBogus();
1101 return bogus;
1102}
1103
1104NFRuleSet*
1105RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1106{
1107 if (U_SUCCESS(status) && fRuleSets) {
1108 for (NFRuleSet** p = fRuleSets; *p; ++p) {
1109 NFRuleSet* rs = *p;
1110 if (rs->isNamed(name)) {
1111 return rs;
1112 }
1113 }
1114 status = U_ILLEGAL_ARGUMENT_ERROR;
1115 }
1116 return NULL;
1117}
1118
1119UnicodeString&
1120RuleBasedNumberFormat::format(const DecimalQuantity &number,
1121 UnicodeString& appendTo,
1122 FieldPosition& pos,
1123 UErrorCode &status) const {
1124 if (U_FAILURE(status)) {
1125 return appendTo;
1126 }
1127 DecimalQuantity copy(number);
1128 if (copy.fitsInLong()) {
1129 format(number.toLong(), appendTo, pos, status);
1130 }
1131 else {
1132 copy.roundToMagnitude(0, number::impl::RoundingMode::UNUM_ROUND_HALFEVEN, status);
1133 if (copy.fitsInLong()) {
1134 format(number.toDouble(), appendTo, pos, status);
1135 }
1136 else {
1137 // We're outside of our normal range that this framework can handle.
1138 // The DecimalFormat will provide more accurate results.
1139
1140 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1141 LocalPointer<NumberFormat> decimalFormat(NumberFormat::createInstance(locale, UNUM_DECIMAL, status), status);
1142 if (decimalFormat.isNull()) {
1143 return appendTo;
1144 }
1145 Formattable f;
1146 LocalPointer<DecimalQuantity> decimalQuantity(new DecimalQuantity(number), status);
1147 if (decimalQuantity.isNull()) {
1148 return appendTo;
1149 }
1150 f.adoptDecimalQuantity(decimalQuantity.orphan()); // f now owns decimalQuantity.
1151 decimalFormat->format(f, appendTo, pos, status);
1152 }
1153 }
1154 return appendTo;
1155}
1156
1157UnicodeString&
1158RuleBasedNumberFormat::format(int32_t number,
1159 UnicodeString& toAppendTo,
1160 FieldPosition& pos) const
1161{
1162 return format((int64_t)number, toAppendTo, pos);
1163}
1164
1165
1166UnicodeString&
1167RuleBasedNumberFormat::format(int64_t number,
1168 UnicodeString& toAppendTo,
1169 FieldPosition& /* pos */) const
1170{
1171 if (defaultRuleSet) {
1172 UErrorCode status = U_ZERO_ERROR;
1173 format(number, defaultRuleSet, toAppendTo, status);
1174 }
1175 return toAppendTo;
1176}
1177
1178
1179UnicodeString&
1180RuleBasedNumberFormat::format(double number,
1181 UnicodeString& toAppendTo,
1182 FieldPosition& /* pos */) const
1183{
1184 UErrorCode status = U_ZERO_ERROR;
1185 if (defaultRuleSet) {
1186 format(number, *defaultRuleSet, toAppendTo, status);
1187 }
1188 return toAppendTo;
1189}
1190
1191
1192UnicodeString&
1193RuleBasedNumberFormat::format(int32_t number,
1194 const UnicodeString& ruleSetName,
1195 UnicodeString& toAppendTo,
1196 FieldPosition& pos,
1197 UErrorCode& status) const
1198{
1199 return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1200}
1201
1202
1203UnicodeString&
1204RuleBasedNumberFormat::format(int64_t number,
1205 const UnicodeString& ruleSetName,
1206 UnicodeString& toAppendTo,
1207 FieldPosition& /* pos */,
1208 UErrorCode& status) const
1209{
1210 if (U_SUCCESS(status)) {
1211 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1212 // throw new IllegalArgumentException("Can't use internal rule set");
1213 status = U_ILLEGAL_ARGUMENT_ERROR;
1214 } else {
1215 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1216 if (rs) {
1217 format(number, rs, toAppendTo, status);
1218 }
1219 }
1220 }
1221 return toAppendTo;
1222}
1223
1224
1225UnicodeString&
1226RuleBasedNumberFormat::format(double number,
1227 const UnicodeString& ruleSetName,
1228 UnicodeString& toAppendTo,
1229 FieldPosition& /* pos */,
1230 UErrorCode& status) const
1231{
1232 if (U_SUCCESS(status)) {
1233 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1234 // throw new IllegalArgumentException("Can't use internal rule set");
1235 status = U_ILLEGAL_ARGUMENT_ERROR;
1236 } else {
1237 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1238 if (rs) {
1239 format(number, *rs, toAppendTo, status);
1240 }
1241 }
1242 }
1243 return toAppendTo;
1244}
1245
1246void
1247RuleBasedNumberFormat::format(double number,
1248 NFRuleSet& rs,
1249 UnicodeString& toAppendTo,
1250 UErrorCode& status) const
1251{
1252 int32_t startPos = toAppendTo.length();
1253 if (getRoundingMode() != DecimalFormat::ERoundingMode::kRoundUnnecessary && !uprv_isNaN(number) && !uprv_isInfinite(number)) {
1254 DecimalQuantity digitList;
1255 digitList.setToDouble(number);
1256 digitList.roundToMagnitude(
1257 -getMaximumFractionDigits(),
1258 static_cast<UNumberFormatRoundingMode>(getRoundingMode()),
1259 status);
1260 number = digitList.toDouble();
1261 }
1262 rs.format(number, toAppendTo, toAppendTo.length(), 0, status);
1263 adjustForCapitalizationContext(startPos, toAppendTo, status);
1264}
1265
1266/**
1267 * Bottleneck through which all the public format() methods
1268 * that take a long pass. By the time we get here, we know
1269 * which rule set we're using to do the formatting.
1270 * @param number The number to format
1271 * @param ruleSet The rule set to use to format the number
1272 * @return The text that resulted from formatting the number
1273 */
1274UnicodeString&
1275RuleBasedNumberFormat::format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const
1276{
1277 // all API format() routines that take a double vector through
1278 // here. We have these two identical functions-- one taking a
1279 // double and one taking a long-- the couple digits of precision
1280 // that long has but double doesn't (both types are 8 bytes long,
1281 // but double has to borrow some of the mantissa bits to hold
1282 // the exponent).
1283 // Create an empty string buffer where the result will
1284 // be built, and pass it to the rule set (along with an insertion
1285 // position of 0 and the number being formatted) to the rule set
1286 // for formatting
1287
1288 if (U_SUCCESS(status)) {
1289 if (number == U_INT64_MIN) {
1290 // We can't handle this value right now. Provide an accurate default value.
1291
1292 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1293 NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
1294 if (decimalFormat == nullptr) {
1295 return toAppendTo;
1296 }
1297 Formattable f;
1298 FieldPosition pos(FieldPosition::DONT_CARE);
1299 DecimalQuantity *decimalQuantity = new DecimalQuantity();
1300 if (decimalQuantity == nullptr) {
1301 status = U_MEMORY_ALLOCATION_ERROR;
1302 delete decimalFormat;
1303 return toAppendTo;
1304 }
1305 decimalQuantity->setToLong(number);
1306 f.adoptDecimalQuantity(decimalQuantity); // f now owns decimalQuantity.
1307 decimalFormat->format(f, toAppendTo, pos, status);
1308 delete decimalFormat;
1309 }
1310 else {
1311 int32_t startPos = toAppendTo.length();
1312 ruleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1313 adjustForCapitalizationContext(startPos, toAppendTo, status);
1314 }
1315 }
1316 return toAppendTo;
1317}
1318
1319UnicodeString&
1320RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1321 UnicodeString& currentResult,
1322 UErrorCode& status) const
1323{
1324#if !UCONFIG_NO_BREAK_ITERATION
1325 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1326 if (capitalizationContext != UDISPCTX_CAPITALIZATION_NONE && startPos == 0 && currentResult.length() > 0) {
1327 // capitalize currentResult according to context
1328 UChar32 ch = currentResult.char32At(0);
1329 if (u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter != NULL &&
1330 ( capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1331 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1332 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1333 // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1334 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1335 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1336 }
1337 }
1338#endif
1339 return currentResult;
1340}
1341
1342
1343void
1344RuleBasedNumberFormat::parse(const UnicodeString& text,
1345 Formattable& result,
1346 ParsePosition& parsePosition) const
1347{
1348 if (!fRuleSets) {
1349 parsePosition.setErrorIndex(0);
1350 return;
1351 }
1352
1353 UnicodeString workingText(text, parsePosition.getIndex());
1354 ParsePosition workingPos(0);
1355
1356 ParsePosition high_pp(0);
1357 Formattable high_result;
1358
1359 for (NFRuleSet** p = fRuleSets; *p; ++p) {
1360 NFRuleSet *rp = *p;
1361 if (rp->isPublic() && rp->isParseable()) {
1362 ParsePosition working_pp(0);
1363 Formattable working_result;
1364
1365 rp->parse(workingText, working_pp, kMaxDouble, 0, working_result);
1366 if (working_pp.getIndex() > high_pp.getIndex()) {
1367 high_pp = working_pp;
1368 high_result = working_result;
1369
1370 if (high_pp.getIndex() == workingText.length()) {
1371 break;
1372 }
1373 }
1374 }
1375 }
1376
1377 int32_t startIndex = parsePosition.getIndex();
1378 parsePosition.setIndex(startIndex + high_pp.getIndex());
1379 if (high_pp.getIndex() > 0) {
1380 parsePosition.setErrorIndex(-1);
1381 } else {
1382 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1383 parsePosition.setErrorIndex(startIndex + errorIndex);
1384 }
1385 result = high_result;
1386 if (result.getType() == Formattable::kDouble) {
1387 double d = result.getDouble();
1388 if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) {
1389 // Note: casting a double to an int when the double is too large or small
1390 // to fit the destination is undefined behavior. The explicit range checks,
1391 // above, are required. Just casting and checking the result value is undefined.
1392 result.setLong(static_cast<int32_t>(d));
1393 }
1394 }
1395}
1396
1397#if !UCONFIG_NO_COLLATION
1398
1399void
1400RuleBasedNumberFormat::setLenient(UBool enabled)
1401{
1402 lenient = enabled;
1403 if (!enabled && collator) {
1404 delete collator;
1405 collator = NULL;
1406 }
1407}
1408
1409#endif
1410
1411void
1412RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1413 if (U_SUCCESS(status)) {
1414 if (ruleSetName.isEmpty()) {
1415 if (localizations) {
1416 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1417 defaultRuleSet = findRuleSet(name, status);
1418 } else {
1419 initDefaultRuleSet();
1420 }
1421 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1422 status = U_ILLEGAL_ARGUMENT_ERROR;
1423 } else {
1424 NFRuleSet* result = findRuleSet(ruleSetName, status);
1425 if (result != NULL) {
1426 defaultRuleSet = result;
1427 }
1428 }
1429 }
1430}
1431
1432UnicodeString
1433RuleBasedNumberFormat::getDefaultRuleSetName() const {
1434 UnicodeString result;
1435 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1436 defaultRuleSet->getName(result);
1437 } else {
1438 result.setToBogus();
1439 }
1440 return result;
1441}
1442
1443void
1444RuleBasedNumberFormat::initDefaultRuleSet()
1445{
1446 defaultRuleSet = NULL;
1447 if (!fRuleSets) {
1448 return;
1449 }
1450
1451 const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
1452 const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
1453 const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
1454
1455 NFRuleSet**p = &fRuleSets[0];
1456 while (*p) {
1457 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1458 defaultRuleSet = *p;
1459 return;
1460 } else {
1461 ++p;
1462 }
1463 }
1464
1465 defaultRuleSet = *--p;
1466 if (!defaultRuleSet->isPublic()) {
1467 while (p != fRuleSets) {
1468 if ((*--p)->isPublic()) {
1469 defaultRuleSet = *p;
1470 break;
1471 }
1472 }
1473 }
1474}
1475
1476
1477void
1478RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1479 UParseError& pErr, UErrorCode& status)
1480{
1481 // TODO: implement UParseError
1482 uprv_memset(&pErr, 0, sizeof(UParseError));
1483 // Note: this can leave ruleSets == NULL, so remaining code should check
1484 if (U_FAILURE(status)) {
1485 return;
1486 }
1487
1488 initializeDecimalFormatSymbols(status);
1489 initializeDefaultInfinityRule(status);
1490 initializeDefaultNaNRule(status);
1491 if (U_FAILURE(status)) {
1492 return;
1493 }
1494
1495 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1496
1497 UnicodeString description(rules);
1498 if (!description.length()) {
1499 status = U_MEMORY_ALLOCATION_ERROR;
1500 return;
1501 }
1502
1503 // start by stripping the trailing whitespace from all the rules
1504 // (this is all the whitespace follwing each semicolon in the
1505 // description). This allows us to look for rule-set boundaries
1506 // by searching for ";%" without having to worry about whitespace
1507 // between the ; and the %
1508 stripWhitespace(description);
1509
1510 // check to see if there's a set of lenient-parse rules. If there
1511 // is, pull them out into our temporary holding place for them,
1512 // and delete them from the description before the real desciption-
1513 // parsing code sees them
1514 int32_t lp = description.indexOf(gLenientParse, -1, 0);
1515 if (lp != -1) {
1516 // we've got to make sure we're not in the middle of a rule
1517 // (where "%%lenient-parse" would actually get treated as
1518 // rule text)
1519 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1520 // locate the beginning and end of the actual collation
1521 // rules (there may be whitespace between the name and
1522 // the first token in the description)
1523 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1524
1525 if (lpEnd == -1) {
1526 lpEnd = description.length() - 1;
1527 }
1528 int lpStart = lp + u_strlen(gLenientParse);
1529 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1530 ++lpStart;
1531 }
1532
1533 // copy out the lenient-parse rules and delete them
1534 // from the description
1535 lenientParseRules = new UnicodeString();
1536 /* test for NULL */
1537 if (lenientParseRules == nullptr) {
1538 status = U_MEMORY_ALLOCATION_ERROR;
1539 return;
1540 }
1541 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1542
1543 description.remove(lp, lpEnd + 1 - lp);
1544 }
1545 }
1546
1547 // pre-flight parsing the description and count the number of
1548 // rule sets (";%" marks the end of one rule set and the beginning
1549 // of the next)
1550 numRuleSets = 0;
1551 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1552 ++numRuleSets;
1553 ++p;
1554 }
1555 ++numRuleSets;
1556
1557 // our rule list is an array of the appropriate size
1558 fRuleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1559 /* test for NULL */
1560 if (fRuleSets == 0) {
1561 status = U_MEMORY_ALLOCATION_ERROR;
1562 return;
1563 }
1564
1565 for (int i = 0; i <= numRuleSets; ++i) {
1566 fRuleSets[i] = NULL;
1567 }
1568
1569 // divide up the descriptions into individual rule-set descriptions
1570 // and store them in a temporary array. At each step, we also
1571 // new up a rule set, but all this does is initialize its name
1572 // and remove it from its description. We can't actually parse
1573 // the rest of the descriptions and finish initializing everything
1574 // because we have to know the names and locations of all the rule
1575 // sets before we can actually set everything up
1576 if(!numRuleSets) {
1577 status = U_ILLEGAL_ARGUMENT_ERROR;
1578 return;
1579 }
1580
1581 ruleSetDescriptions = new UnicodeString[numRuleSets];
1582 if (ruleSetDescriptions == nullptr) {
1583 status = U_MEMORY_ALLOCATION_ERROR;
1584 return;
1585 }
1586
1587 {
1588 int curRuleSet = 0;
1589 int32_t start = 0;
1590 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1591 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1592 fRuleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1593 if (fRuleSets[curRuleSet] == nullptr) {
1594 status = U_MEMORY_ALLOCATION_ERROR;
1595 return;
1596 }
1597 ++curRuleSet;
1598 start = p + 1;
1599 }
1600 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1601 fRuleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1602 if (fRuleSets[curRuleSet] == nullptr) {
1603 status = U_MEMORY_ALLOCATION_ERROR;
1604 return;
1605 }
1606 }
1607
1608 // now we can take note of the formatter's default rule set, which
1609 // is the last public rule set in the description (it's the last
1610 // rather than the first so that a user can create a new formatter
1611 // from an existing formatter and change its default behavior just
1612 // by appending more rule sets to the end)
1613
1614 // {dlf} Initialization of a fraction rule set requires the default rule
1615 // set to be known. For purposes of initialization, this is always the
1616 // last public rule set, no matter what the localization data says.
1617 initDefaultRuleSet();
1618
1619 // finally, we can go back through the temporary descriptions
1620 // list and finish setting up the substructure (and we throw
1621 // away the temporary descriptions as we go)
1622 {
1623 for (int i = 0; i < numRuleSets; i++) {
1624 fRuleSets[i]->parseRules(ruleSetDescriptions[i], status);
1625 }
1626 }
1627
1628 // Now that the rules are initialized, the 'real' default rule
1629 // set can be adjusted by the localization data.
1630
1631 // The C code keeps the localization array as is, rather than building
1632 // a separate array of the public rule set names, so we have less work
1633 // to do here-- but we still need to check the names.
1634
1635 if (localizationInfos) {
1636 // confirm the names, if any aren't in the rules, that's an error
1637 // it is ok if the rules contain public rule sets that are not in this list
1638 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1639 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1640 NFRuleSet* rs = findRuleSet(name, status);
1641 if (rs == NULL) {
1642 break; // error
1643 }
1644 if (i == 0) {
1645 defaultRuleSet = rs;
1646 }
1647 }
1648 } else {
1649 defaultRuleSet = getDefaultRuleSet();
1650 }
1651 originalDescription = rules;
1652}
1653
1654// override the NumberFormat implementation in order to
1655// lazily initialize relevant items
1656void
1657RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1658{
1659 NumberFormat::setContext(value, status);
1660 if (U_SUCCESS(status)) {
1661 if (!capitalizationInfoSet &&
1662 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1663 initCapitalizationContextInfo(locale);
1664 capitalizationInfoSet = TRUE;
1665 }
1666#if !UCONFIG_NO_BREAK_ITERATION
1667 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1668 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1669 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1670 status = U_ZERO_ERROR;
1671 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1672 if (U_FAILURE(status)) {
1673 delete capitalizationBrkIter;
1674 capitalizationBrkIter = NULL;
1675 }
1676 }
1677#endif
1678 }
1679}
1680
1681void
1682RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1683{
1684#if !UCONFIG_NO_BREAK_ITERATION
1685 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1686 UErrorCode status = U_ZERO_ERROR;
1687 UResourceBundle *rb = ures_open(NULL, localeID, &status);
1688 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1689 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1690 if (U_SUCCESS(status) && rb != NULL) {
1691 int32_t len = 0;
1692 const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1693 if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1694 capitalizationForUIListMenu = static_cast<UBool>(intVector[0]);
1695 capitalizationForStandAlone = static_cast<UBool>(intVector[1]);
1696 }
1697 }
1698 ures_close(rb);
1699#endif
1700}
1701
1702void
1703RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1704{
1705 // iterate through the characters...
1706 UnicodeString result;
1707
1708 int start = 0;
1709 while (start != -1 && start < description.length()) {
1710 // seek to the first non-whitespace character...
1711 while (start < description.length()
1712 && PatternProps::isWhiteSpace(description.charAt(start))) {
1713 ++start;
1714 }
1715
1716 // locate the next semicolon in the text and copy the text from
1717 // our current position up to that semicolon into the result
1718 int32_t p = description.indexOf(gSemiColon, start);
1719 if (p == -1) {
1720 // or if we don't find a semicolon, just copy the rest of
1721 // the string into the result
1722 result.append(description, start, description.length() - start);
1723 start = -1;
1724 }
1725 else if (p < description.length()) {
1726 result.append(description, start, p + 1 - start);
1727 start = p + 1;
1728 }
1729
1730 // when we get here, we've seeked off the end of the string, and
1731 // we terminate the loop (we continue until *start* is -1 rather
1732 // than until *p* is -1, because otherwise we'd miss the last
1733 // rule in the description)
1734 else {
1735 start = -1;
1736 }
1737 }
1738
1739 description.setTo(result);
1740}
1741
1742
1743void
1744RuleBasedNumberFormat::dispose()
1745{
1746 if (fRuleSets) {
1747 for (NFRuleSet** p = fRuleSets; *p; ++p) {
1748 delete *p;
1749 }
1750 uprv_free(fRuleSets);
1751 fRuleSets = NULL;
1752 }
1753
1754 if (ruleSetDescriptions) {
1755 delete [] ruleSetDescriptions;
1756 ruleSetDescriptions = NULL;
1757 }
1758
1759#if !UCONFIG_NO_COLLATION
1760 delete collator;
1761#endif
1762 collator = NULL;
1763
1764 delete decimalFormatSymbols;
1765 decimalFormatSymbols = NULL;
1766
1767 delete defaultInfinityRule;
1768 defaultInfinityRule = NULL;
1769
1770 delete defaultNaNRule;
1771 defaultNaNRule = NULL;
1772
1773 delete lenientParseRules;
1774 lenientParseRules = NULL;
1775
1776#if !UCONFIG_NO_BREAK_ITERATION
1777 delete capitalizationBrkIter;
1778 capitalizationBrkIter = NULL;
1779#endif
1780
1781 if (localizations) {
1782 localizations = localizations->unref();
1783 }
1784}
1785
1786
1787//-----------------------------------------------------------------------
1788// package-internal API
1789//-----------------------------------------------------------------------
1790
1791/**
1792 * Returns the collator to use for lenient parsing. The collator is lazily created:
1793 * this function creates it the first time it's called.
1794 * @return The collator to use for lenient parsing, or null if lenient parsing
1795 * is turned off.
1796*/
1797const RuleBasedCollator*
1798RuleBasedNumberFormat::getCollator() const
1799{
1800#if !UCONFIG_NO_COLLATION
1801 if (!fRuleSets) {
1802 return NULL;
1803 }
1804
1805 // lazy-evaluate the collator
1806 if (collator == NULL && lenient) {
1807 // create a default collator based on the formatter's locale,
1808 // then pull out that collator's rules, append any additional
1809 // rules specified in the description, and create a _new_
1810 // collator based on the combination of those rules
1811
1812 UErrorCode status = U_ZERO_ERROR;
1813
1814 Collator* temp = Collator::createInstance(locale, status);
1815 RuleBasedCollator* newCollator;
1816 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1817 if (lenientParseRules) {
1818 UnicodeString rules(newCollator->getRules());
1819 rules.append(*lenientParseRules);
1820
1821 newCollator = new RuleBasedCollator(rules, status);
1822 // Exit if newCollator could not be created.
1823 if (newCollator == NULL) {
1824 return NULL;
1825 }
1826 } else {
1827 temp = NULL;
1828 }
1829 if (U_SUCCESS(status)) {
1830 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1831 // cast away const
1832 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1833 } else {
1834 delete newCollator;
1835 }
1836 }
1837 delete temp;
1838 }
1839#endif
1840
1841 // if lenient-parse mode is off, this will be null
1842 // (see setLenientParseMode())
1843 return collator;
1844}
1845
1846
1847DecimalFormatSymbols*
1848RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
1849{
1850 // lazy-evaluate the DecimalFormatSymbols object. This object
1851 // is shared by all DecimalFormat instances belonging to this
1852 // formatter
1853 if (decimalFormatSymbols == nullptr) {
1854 LocalPointer<DecimalFormatSymbols> temp(new DecimalFormatSymbols(locale, status), status);
1855 if (U_SUCCESS(status)) {
1856 decimalFormatSymbols = temp.orphan();
1857 }
1858 }
1859 return decimalFormatSymbols;
1860}
1861
1862/**
1863 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1864 * instances owned by this formatter.
1865*/
1866const DecimalFormatSymbols*
1867RuleBasedNumberFormat::getDecimalFormatSymbols() const
1868{
1869 return decimalFormatSymbols;
1870}
1871
1872NFRule*
1873RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
1874{
1875 if (U_FAILURE(status)) {
1876 return nullptr;
1877 }
1878 if (defaultInfinityRule == NULL) {
1879 UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
1880 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
1881 LocalPointer<NFRule> temp(new NFRule(this, rule, status), status);
1882 if (U_SUCCESS(status)) {
1883 defaultInfinityRule = temp.orphan();
1884 }
1885 }
1886 return defaultInfinityRule;
1887}
1888
1889const NFRule*
1890RuleBasedNumberFormat::getDefaultInfinityRule() const
1891{
1892 return defaultInfinityRule;
1893}
1894
1895NFRule*
1896RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
1897{
1898 if (U_FAILURE(status)) {
1899 return nullptr;
1900 }
1901 if (defaultNaNRule == nullptr) {
1902 UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
1903 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
1904 LocalPointer<NFRule> temp(new NFRule(this, rule, status), status);
1905 if (U_SUCCESS(status)) {
1906 defaultNaNRule = temp.orphan();
1907 }
1908 }
1909 return defaultNaNRule;
1910}
1911
1912const NFRule*
1913RuleBasedNumberFormat::getDefaultNaNRule() const
1914{
1915 return defaultNaNRule;
1916}
1917
1918// De-owning the current localized symbols and adopt the new symbols.
1919void
1920RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1921{
1922 if (symbolsToAdopt == NULL) {
1923 return; // do not allow caller to set decimalFormatSymbols to NULL
1924 }
1925
1926 if (decimalFormatSymbols != NULL) {
1927 delete decimalFormatSymbols;
1928 }
1929
1930 decimalFormatSymbols = symbolsToAdopt;
1931
1932 {
1933 // Apply the new decimalFormatSymbols by reparsing the rulesets
1934 UErrorCode status = U_ZERO_ERROR;
1935
1936 delete defaultInfinityRule;
1937 defaultInfinityRule = NULL;
1938 initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
1939
1940 delete defaultNaNRule;
1941 defaultNaNRule = NULL;
1942 initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
1943
1944 if (fRuleSets) {
1945 for (int32_t i = 0; i < numRuleSets; i++) {
1946 fRuleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
1947 }
1948 }
1949 }
1950}
1951
1952// Setting the symbols is equivalent to adopting a newly created localized symbols.
1953void
1954RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1955{
1956 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1957}
1958
1959PluralFormat *
1960RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1961 const UnicodeString &pattern,
1962 UErrorCode& status) const
1963{
1964 auto *pf = new PluralFormat(locale, pluralType, pattern, status);
1965 if (pf == nullptr) {
1966 status = U_MEMORY_ALLOCATION_ERROR;
1967 }
1968 return pf;
1969}
1970
1971/**
1972 * Get the rounding mode.
1973 * @return A rounding mode
1974 */
1975DecimalFormat::ERoundingMode RuleBasedNumberFormat::getRoundingMode() const {
1976 return fRoundingMode;
1977}
1978
1979/**
1980 * Set the rounding mode. This has no effect unless the rounding
1981 * increment is greater than zero.
1982 * @param roundingMode A rounding mode
1983 */
1984void RuleBasedNumberFormat::setRoundingMode(DecimalFormat::ERoundingMode roundingMode) {
1985 fRoundingMode = roundingMode;
1986}
1987
1988U_NAMESPACE_END
1989
1990/* U_HAVE_RBNF */
1991#endif
1992