rbnf.cpp source code [ClickHouse/contrib/icu/icu4c/source/i18n/rbnf.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	* Copyright (C) 1997-2015, International Business Machines Corporation
6	* and others. All Rights Reserved.
7	*******************************************************************************
8	*/
9
10	#include "unicode/utypes.h"
11	#include "utypeinfo.h" // for 'typeid' to work
12
13	#include "unicode/rbnf.h"
14
15	#if U_HAVE_RBNF
16
17	#include "unicode/normlzr.h"
18	#include "unicode/plurfmt.h"
19	#include "unicode/tblcoll.h"
20	#include "unicode/uchar.h"
21	#include "unicode/ucol.h"
22	#include "unicode/uloc.h"
23	#include "unicode/unum.h"
24	#include "unicode/ures.h"
25	#include "unicode/ustring.h"
26	#include "unicode/utf16.h"
27	#include "unicode/udata.h"
28	#include "unicode/udisplaycontext.h"
29	#include "unicode/brkiter.h"
30	#include "unicode/ucasemap.h"
31
32	#include "cmemory.h"
33	#include "cstring.h"
34	#include "patternprops.h"
35	#include "uresimp.h"
36	#include "nfrs.h"
37	#include "number_decimalquantity.h"
38
39	// debugging
40	// #define RBNF_DEBUG
41
42	#ifdef RBNF_DEBUG
43	#include <stdio.h>
44	#endif
45
46	#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
47
48	static const UChar gPercentPercent[] =
49	{
50	`0x25`, `0x25`, `0`
51	}; / "%%" /
52
53	// All urbnf objects are created through openRules, so we init all of the
54	// Unicode string constants required by rbnf, nfrs, or nfr here.
55	static const UChar gLenientParse[] =
56	{
57	`0x25`, `0x25`, `0x6C`, `0x65`, `0x6E`, `0x69`, `0x65`, `0x6E`, `0x74`, `0x2D`, `0x70`, `0x61`, `0x72`, `0x73`, `0x65`, `0x3A`, `0`
58	}; / "%%lenient-parse:" /
59	static const UChar gSemiColon = `0x003B`;
60	static const UChar gSemiPercent[] =
61	{
62	`0x3B`, `0x25`, `0`
63	}; / ";%" /
64
65	#define kSomeNumberOfBitsDiv2 22
66	#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
67	#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
68
69	U_NAMESPACE_BEGIN
70
71	using number::impl::DecimalQuantity;
72
73	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
74
75	/*
76	This is a utility class. It does not use ICU's RTTI.
77	If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
78	Please make sure that intltest passes on Windows in Release mode,
79	since the string pooling per compilation unit will mess up how RTTI works.
80	The RTTI code was also removed due to lack of code coverage.
81	*/
82	class LocalizationInfo : public UMemory {
83	protected:
84	virtual ~LocalizationInfo();
85	uint32_t refcount;
86
87	public:
88	LocalizationInfo() : refcount(`0`) {}
89
90	LocalizationInfo* ref(void) {
91	++refcount;
92	return this;
93	}
94
95	LocalizationInfo* unref(void) {
96	if (refcount && --refcount == `0`) {
97	delete this;
98	}
99	return NULL;
100	}
101
102	virtual UBool operator==(const LocalizationInfo* rhs) const;
103	inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
104
105	virtual int32_t getNumberOfRuleSets(void) const = `0`;
106	virtual const UChar* getRuleSetName(int32_t index) const = `0`;
107	virtual int32_t getNumberOfDisplayLocales(void) const = `0`;
108	virtual const UChar* getLocaleName(int32_t index) const = `0`;
109	virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = `0`;
110
111	virtual int32_t indexForLocale(const UChar* locale) const;
112	virtual int32_t indexForRuleSet(const UChar* ruleset) const;
113
114	// virtual UClassID getDynamicClassID() const = 0;
115	// static UClassID getStaticClassID(void);
116	};
117
118	LocalizationInfo::~LocalizationInfo() {}
119
120	//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
121
122	// if both strings are NULL, this returns TRUE
123	static UBool
124	streq(const UChar* lhs, const UChar* rhs) {
125	if (rhs == lhs) {
126	return TRUE;
127	}
128	if (lhs && rhs) {
129	return u_strcmp(lhs, rhs) == `0`;
130	}
131	return FALSE;
132	}
133
134	UBool
135	LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
136	if (rhs) {
137	if (this == rhs) {
138	return TRUE;
139	}
140
141	int32_t rsc = getNumberOfRuleSets();
142	if (rsc == rhs->getNumberOfRuleSets()) {
143	for (int i = `0`; i < rsc; ++i) {
144	if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
145	return FALSE;
146	}
147	}
148	int32_t dlc = getNumberOfDisplayLocales();
149	if (dlc == rhs->getNumberOfDisplayLocales()) {
150	for (int i = `0`; i < dlc; ++i) {
151	const UChar* locale = getLocaleName(i);
152	int32_t ix = rhs->indexForLocale(locale);
153	// if no locale, ix is -1, getLocaleName returns null, so streq returns false
154	if (!streq(locale, rhs->getLocaleName(ix))) {
155	return FALSE;
156	}
157	for (int j = `0`; j < rsc; ++j) {
158	if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
159	return FALSE;
160	}
161	}
162	}
163	return TRUE;
164	}
165	}
166	}
167	return FALSE;
168	}
169
170	int32_t
171	LocalizationInfo::indexForLocale(const UChar* locale) const {
172	for (int i = `0`; i < getNumberOfDisplayLocales(); ++i) {
173	if (streq(locale, getLocaleName(i))) {
174	return i;
175	}
176	}
177	return -`1`;
178	}
179
180	int32_t
181	LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
182	if (ruleset) {
183	for (int i = `0`; i < getNumberOfRuleSets(); ++i) {
184	if (streq(ruleset, getRuleSetName(i))) {
185	return i;
186	}
187	}
188	}
189	return -`1`;
190	}
191
192
193	typedef void (Fn_Deleter)(void**);
194
195	class VArray {
196	void** buf;
197	int32_t cap;
198	int32_t size;
199	Fn_Deleter deleter;
200	public:
201	VArray() : buf(NULL), cap(`0`), size(`0`), deleter(NULL) {}
202
203	VArray(Fn_Deleter del) : buf(NULL), cap(`0`), size(`0`), deleter(del) {}
204
205	~VArray() {
206	if (deleter) {
207	for (int i = `0`; i < size; ++i) {
208	(*deleter)(buf[i]);
209	}
210	}
211	uprv_free(buf);
212	}
213
214	int32_t length() {
215	return size;
216	}
217
218	void add(void* elem, UErrorCode& status) {
219	if (U_SUCCESS(status)) {
220	if (size == cap) {
221	if (cap == `0`) {
222	cap = `1`;
223	} else if (cap < `256`) {
224	cap *= `2`;
225	} else {
226	cap += `256`;
227	}
228	if (buf == NULL) {
229	buf = (void*)uprv_malloc(cap sizeof(void*));
230	} else {
231	buf = (void*)uprv_realloc(buf, cap sizeof(void*));
232	}
233	if (buf == NULL) {
234	// if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
235	status = U_MEMORY_ALLOCATION_ERROR;
236	return;
237	}
238	void* start = &buf[size];
239	size_t count = (cap - size) * sizeof(void*);
240	uprv_memset(start, `0`, count); // fill with nulls, just because
241	}
242	buf[size++] = elem;
243	}
244	}
245
246	void** release(void) {
247	void** result = buf;
248	buf = NULL;
249	cap = `0`;
250	size = `0`;
251	return result;
252	}
253	};
254
255	class LocDataParser;
256
257	class StringLocalizationInfo : public LocalizationInfo {
258	UChar* info;
259	UChar*** data;
260	int32_t numRuleSets;
261	int32_t numLocales;
262
263	friend class LocDataParser;
264
265	StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
266	: info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
267	{
268	}
269
270	public:
271	static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
272
273	virtual ~StringLocalizationInfo();
274	virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
275	virtual const UChar* getRuleSetName(int32_t index) const;
276	virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
277	virtual const UChar* getLocaleName(int32_t index) const;
278	virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
279
280	// virtual UClassID getDynamicClassID() const;
281	// static UClassID getStaticClassID(void);
282
283	private:
284	void init(UErrorCode& status) const;
285	};
286
287
288	enum {
289	OPEN_ANGLE = `0x003c`, / '<' /
290	CLOSE_ANGLE = `0x003e`, / '>' /
291	COMMA = `0x002c`,
292	TICK = `0x0027`,
293	QUOTE = `0x0022`,
294	SPACE = `0x0020`
295	};
296
297	/**
298	* Utility for parsing a localization string and returning a StringLocalizationInfo*.
299	*/
300	class LocDataParser {
301	UChar* data;
302	const UChar* e;
303	UChar* p;
304	UChar ch;
305	UParseError& pe;
306	UErrorCode& ec;
307
308	public:
309	LocDataParser(UParseError& parseError, UErrorCode& status)
310	: data(NULL), e(NULL), p(NULL), ch(`0xffff`), pe(parseError), ec(status) {}
311	~LocDataParser() {}
312
313	/*
314	* On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
315	* and return NULL. The StringLocalizationInfo will adopt locData if it is created.
316	*/
317	StringLocalizationInfo* parse(UChar* data, int32_t len);
318
319	private:
320
321	inline void inc(void) {
322	++p;
323	ch = `0xffff`;
324	}
325	inline UBool checkInc(UChar c) {
326	if (p < e && (ch == c \|\| *p == c)) {
327	inc();
328	return TRUE;
329	}
330	return FALSE;
331	}
332	inline UBool check(UChar c) {
333	return p < e && (ch == c \|\| *p == c);
334	}
335	inline void skipWhitespace(void) {
336	while (p < e && PatternProps::isWhiteSpace(ch != `0xffff` ? ch : *p)) {
337	inc();
338	}
339	}
340	inline UBool inList(UChar c, const UChar* list) const {
341	if (*list == SPACE && PatternProps::isWhiteSpace(c)) {
342	return TRUE;
343	}
344	while (list && list != c) {
345	++list;
346	}
347	return *list == c;
348	}
349	void parseError(const char* msg);
350
351	StringLocalizationInfo* doParse(void);
352
353	UChar** nextArray(int32_t& requiredLength);
354	UChar* nextString(void);
355	};
356
357	#ifdef RBNF_DEBUG
358	#define ERROR(msg) UPRV_BLOCK_MACRO_BEGIN { \
359	parseError(msg); \
360	return NULL; \
361	} UPRV_BLOCK_MACRO_END
362	#define EXPLANATION_ARG explanationArg
363	#else
364	#define ERROR(msg) UPRV_BLOCK_MACRO_BEGIN { \
365	parseError(NULL); \
366	return NULL; \
367	} UPRV_BLOCK_MACRO_END
368	#define EXPLANATION_ARG
369	#endif
370
371
372	static const UChar DQUOTE_STOPLIST[] = {
373	QUOTE, `0`
374	};
375
376	static const UChar SQUOTE_STOPLIST[] = {
377	TICK, `0`
378	};
379
380	static const UChar NOQUOTE_STOPLIST[] = {
381	SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, `0`
382	};
383
384	static void
385	DeleteFn(void* p) {
386	uprv_free(p);
387	}
388
389	StringLocalizationInfo*
390	LocDataParser::parse(UChar* _data, int32_t len) {
391	if (U_FAILURE(ec)) {
392	if (_data) uprv_free(_data);
393	return NULL;
394	}
395
396	pe.line = `0`;
397	pe.offset = -`1`;
398	pe.postContext[`0`] = `0`;
399	pe.preContext[`0`] = `0`;
400
401	if (_data == NULL) {
402	ec = U_ILLEGAL_ARGUMENT_ERROR;
403	return NULL;
404	}
405
406	if (len <= `0`) {
407	ec = U_ILLEGAL_ARGUMENT_ERROR;
408	uprv_free(_data);
409	return NULL;
410	}
411
412	data = _data;
413	e = data + len;
414	p = _data;
415	ch = `0xffff`;
416
417	return doParse();
418	}
419
420
421	StringLocalizationInfo*
422	LocDataParser::doParse(void) {
423	skipWhitespace();
424	if (!checkInc(OPEN_ANGLE)) {
425	ERROR("Missing open angle");
426	} else {
427	VArray array(DeleteFn);
428	UBool mightHaveNext = TRUE;
429	int32_t requiredLength = -`1`;
430	while (mightHaveNext) {
431	mightHaveNext = FALSE;
432	UChar** elem = nextArray(requiredLength);
433	skipWhitespace();
434	UBool haveComma = check(COMMA);
435	if (elem) {
436	array.add(elem, ec);
437	if (haveComma) {
438	inc();
439	mightHaveNext = TRUE;
440	}
441	} else if (haveComma) {
442	ERROR("Unexpected character");
443	}
444	}
445
446	skipWhitespace();
447	if (!checkInc(CLOSE_ANGLE)) {
448	if (check(OPEN_ANGLE)) {
449	ERROR("Missing comma in outer array");
450	} else {
451	ERROR("Missing close angle bracket in outer array");
452	}
453	}
454
455	skipWhitespace();
456	if (p != e) {
457	ERROR("Extra text after close of localization data");
458	}
459
460	array.add(NULL, ec);
461	if (U_SUCCESS(ec)) {
462	int32_t numLocs = array.length() - `2`; // subtract first, NULL
463	UChar* result = (UChar*)array.release();
464
465	return new StringLocalizationInfo (data, result, requiredLength-`2`, numLocs); // subtract first, NULL
466	}
467	}
468
469	ERROR("Unknown error");
470	}
471
472	UChar**
473	LocDataParser::nextArray(int32_t& requiredLength) {
474	if (U_FAILURE(ec)) {
475	return NULL;
476	}
477
478	skipWhitespace();
479	if (!checkInc(OPEN_ANGLE)) {
480	ERROR("Missing open angle");
481	}
482
483	VArray array;
484	UBool mightHaveNext = TRUE;
485	while (mightHaveNext) {
486	mightHaveNext = FALSE;
487	UChar* elem = nextString();
488	skipWhitespace();
489	UBool haveComma = check(COMMA);
490	if (elem) {
491	array.add(elem, ec);
492	if (haveComma) {
493	inc();
494	mightHaveNext = TRUE;
495	}
496	} else if (haveComma) {
497	ERROR("Unexpected comma");
498	}
499	}
500	skipWhitespace();
501	if (!checkInc(CLOSE_ANGLE)) {
502	if (check(OPEN_ANGLE)) {
503	ERROR("Missing close angle bracket in inner array");
504	} else {
505	ERROR("Missing comma in inner array");
506	}
507	}
508
509	array.add(NULL, ec);
510	if (U_SUCCESS(ec)) {
511	if (requiredLength == -`1`) {
512	requiredLength = array.length() + `1`;
513	} else if (array.length() != requiredLength) {
514	ec = U_ILLEGAL_ARGUMENT_ERROR;
515	ERROR("Array not of required length");
516	}
517
518	return (UChar**)array.release();
519	}
520	ERROR("Unknown Error");
521	}
522
523	UChar*
524	LocDataParser::nextString() {
525	UChar* result = NULL;
526
527	skipWhitespace();
528	if (p < e) {
529	const UChar* terminators;
530	UChar c = *p;
531	UBool haveQuote = c == QUOTE \|\| c == TICK;
532	if (haveQuote) {
533	inc();
534	terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
535	} else {
536	terminators = NOQUOTE_STOPLIST;
537	}
538	UChar* start = p;
539	while (p < e && !inList(*p, terminators)) ++p;
540	if (p == e) {
541	ERROR("Unexpected end of data");
542	}
543
544	UChar x = *p;
545	if (p > start) {
546	ch = x;
547	p = `0x0`; // terminate by writing to data*
548	result = start; // just point into data
549	}
550	if (haveQuote) {
551	if (x != c) {
552	ERROR("Missing matching quote");
553	} else if (p == start) {
554	ERROR("Empty string");
555	}
556	inc();
557	} else if (x == OPEN_ANGLE \|\| x == TICK \|\| x == QUOTE) {
558	ERROR("Unexpected character in string");
559	}
560	}
561
562	// ok for there to be no next string
563	return result;
564	}
565
566	void LocDataParser::parseError(const char* EXPLANATION_ARG)
567	{
568	if (!data) {
569	return;
570	}
571
572	const UChar* start = p - U_PARSE_CONTEXT_LEN - `1`;
573	if (start < data) {
574	start = data;
575	}
576	for (UChar* x = p; --x >= start;) {
577	if (!*x) {
578	start = x+`1`;
579	break;
580	}
581	}
582	const UChar* limit = p + U_PARSE_CONTEXT_LEN - `1`;
583	if (limit > e) {
584	limit = e;
585	}
586	u_strncpy(pe.preContext, start, (int32_t)(p-start));
587	pe.preContext[p-start] = `0`;
588	u_strncpy(pe.postContext, p, (int32_t)(limit-p));
589	pe.postContext[limit-p] = `0`;
590	pe.offset = (int32_t)(p - data);
591
592	#ifdef RBNF_DEBUG
593	fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
594
595	UnicodeString msg;
596	msg.append(start, p - start);
597	msg.append((UChar)`0x002f`); / SOLIDUS/SLASH /
598	msg.append(p, limit-p);
599	msg.append(UNICODE_STRING_SIMPLE("'"));
600
601	char buf[`128`];
602	int32_t len = msg.extract(`0`, msg.length(), buf, `128`);
603	if (len >= `128`) {
604	buf[`127`] = `0`;
605	} else {
606	buf[len] = `0`;
607	}
608	fprintf(stderr, "%s\n", buf);
609	fflush(stderr);
610	#endif
611
612	uprv_free(data);
613	data = NULL;
614	p = NULL;
615	e = NULL;
616
617	if (U_SUCCESS(ec)) {
618	ec = U_PARSE_ERROR;
619	}
620	}
621
622	//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
623
624	StringLocalizationInfo*
625	StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
626	if (U_FAILURE(status)) {
627	return NULL;
628	}
629
630	int32_t len = info.length();
631	if (len == `0`) {
632	return NULL; // no error;
633	}
634
635	UChar* p = (UChar)uprv_malloc(len sizeof(UChar));
636	if (!p) {
637	status = U_MEMORY_ALLOCATION_ERROR;
638	return NULL;
639	}
640	info.extract(p, len, status);
641	if (!U_FAILURE(status)) {
642	status = U_ZERO_ERROR; // clear warning about non-termination
643	}
644
645	LocDataParser parser(perror, status);
646	return parser.parse(p, len);
647	}
648
649	StringLocalizationInfo::~StringLocalizationInfo() {
650	for (UChar* p = (UChar)data; p; ++p) {
651	// remaining data is simply pointer into our unicode string data.
652	if (p) uprv_free(p);
653	}
654	if (data) uprv_free(data);
655	if (info) uprv_free(info);
656	}
657
658
659	const UChar*
660	StringLocalizationInfo::getRuleSetName(int32_t index) const {
661	if (index >= `0` && index < getNumberOfRuleSets()) {
662	return data[`0`][index];
663	}
664	return NULL;
665	}
666
667	const UChar*
668	StringLocalizationInfo::getLocaleName(int32_t index) const {
669	if (index >= `0` && index < getNumberOfDisplayLocales()) {
670	return data[index+`1`][`0`];
671	}
672	return NULL;
673	}
674
675	const UChar*
676	StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
677	if (localeIndex >= `0` && localeIndex < getNumberOfDisplayLocales() &&
678	ruleIndex >= `0` && ruleIndex < getNumberOfRuleSets()) {
679	return data[localeIndex+`1`][ruleIndex+`1`];
680	}
681	return NULL;
682	}
683
684	// ----------
685
686	RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
687	const UnicodeString& locs,
688	const Locale& alocale, UParseError& perror, UErrorCode& status)
689	: fRuleSets(NULL)
690	, ruleSetDescriptions(NULL)
691	, numRuleSets(`0`)
692	, defaultRuleSet(NULL)
693	, locale (alocale)
694	, collator(NULL)
695	, decimalFormatSymbols(NULL)
696	, defaultInfinityRule(NULL)
697	, defaultNaNRule(NULL)
698	, fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
699	, lenient(FALSE)
700	, lenientParseRules(NULL)
701	, localizations(NULL)
702	, capitalizationInfoSet(FALSE)
703	, capitalizationForUIListMenu(FALSE)
704	, capitalizationForStandAlone(FALSE)
705	, capitalizationBrkIter(NULL)
706	{
707	LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
708	init(description, locinfo, perror, status);
709	}
710
711	RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
712	const UnicodeString& locs,
713	UParseError& perror, UErrorCode& status)
714	: fRuleSets(NULL)
715	, ruleSetDescriptions(NULL)
716	, numRuleSets(`0`)
717	, defaultRuleSet(NULL)
718	, locale (Locale::getDefault())
719	, collator(NULL)
720	, decimalFormatSymbols(NULL)
721	, defaultInfinityRule(NULL)
722	, defaultNaNRule(NULL)
723	, fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
724	, lenient(FALSE)
725	, lenientParseRules(NULL)
726	, localizations(NULL)
727	, capitalizationInfoSet(FALSE)
728	, capitalizationForUIListMenu(FALSE)
729	, capitalizationForStandAlone(FALSE)
730	, capitalizationBrkIter(NULL)
731	{
732	LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
733	init(description, locinfo, perror, status);
734	}
735
736	RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
737	LocalizationInfo* info,
738	const Locale& alocale, UParseError& perror, UErrorCode& status)
739	: fRuleSets(NULL)
740	, ruleSetDescriptions(NULL)
741	, numRuleSets(`0`)
742	, defaultRuleSet(NULL)
743	, locale (alocale)
744	, collator(NULL)
745	, decimalFormatSymbols(NULL)
746	, defaultInfinityRule(NULL)
747	, defaultNaNRule(NULL)
748	, fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
749	, lenient(FALSE)
750	, lenientParseRules(NULL)
751	, localizations(NULL)
752	, capitalizationInfoSet(FALSE)
753	, capitalizationForUIListMenu(FALSE)
754	, capitalizationForStandAlone(FALSE)
755	, capitalizationBrkIter(NULL)
756	{
757	init(description, info, perror, status);
758	}
759
760	RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
761	UParseError& perror,
762	UErrorCode& status)
763	: fRuleSets(NULL)
764	, ruleSetDescriptions(NULL)
765	, numRuleSets(`0`)
766	, defaultRuleSet(NULL)
767	, locale (Locale::getDefault())
768	, collator(NULL)
769	, decimalFormatSymbols(NULL)
770	, defaultInfinityRule(NULL)
771	, defaultNaNRule(NULL)
772	, fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
773	, lenient(FALSE)
774	, lenientParseRules(NULL)
775	, localizations(NULL)
776	, capitalizationInfoSet(FALSE)
777	, capitalizationForUIListMenu(FALSE)
778	, capitalizationForStandAlone(FALSE)
779	, capitalizationBrkIter(NULL)
780	{
781	init(description, NULL, perror, status);
782	}
783
784	RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
785	const Locale& aLocale,
786	UParseError& perror,
787	UErrorCode& status)
788	: fRuleSets(NULL)
789	, ruleSetDescriptions(NULL)
790	, numRuleSets(`0`)
791	, defaultRuleSet(NULL)
792	, locale (aLocale)
793	, collator(NULL)
794	, decimalFormatSymbols(NULL)
795	, defaultInfinityRule(NULL)
796	, defaultNaNRule(NULL)
797	, fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
798	, lenient(FALSE)
799	, lenientParseRules(NULL)
800	, localizations(NULL)
801	, capitalizationInfoSet(FALSE)
802	, capitalizationForUIListMenu(FALSE)
803	, capitalizationForStandAlone(FALSE)
804	, capitalizationBrkIter(NULL)
805	{
806	init(description, NULL, perror, status);
807	}
808
809	RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
810	: fRuleSets(NULL)
811	, ruleSetDescriptions(NULL)
812	, numRuleSets(`0`)
813	, defaultRuleSet(NULL)
814	, locale (alocale)
815	, collator(NULL)
816	, decimalFormatSymbols(NULL)
817	, defaultInfinityRule(NULL)
818	, defaultNaNRule(NULL)
819	, fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
820	, lenient(FALSE)
821	, lenientParseRules(NULL)
822	, localizations(NULL)
823	, capitalizationInfoSet(FALSE)
824	, capitalizationForUIListMenu(FALSE)
825	, capitalizationForStandAlone(FALSE)
826	, capitalizationBrkIter(NULL)
827	{
828	if (U_FAILURE(status)) {
829	return;
830	}
831
832	const char* rules_tag = "RBNFRules";
833	const char* fmt_tag = "";
834	switch (tag) {
835	case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
836	case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
837	case URBNF_DURATION: fmt_tag = "DurationRules"; break;
838	case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
839	default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
840	}
841
842	// TODO: read localization info from resource
843	LocalizationInfo* locinfo = NULL;
844
845	UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
846	if (U_SUCCESS(status)) {
847	setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
848	ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
849
850	UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
851	if (U_FAILURE(status)) {
852	ures_close(nfrb);
853	}
854	UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
855	if (U_FAILURE(status)) {
856	ures_close(rbnfRules);
857	ures_close(nfrb);
858	return;
859	}
860
861	UnicodeString desc;
862	while (ures_hasNext(ruleSets)) {
863	desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
864	}
865	UParseError perror;
866
867	init(desc, locinfo, perror, status);
868
869	ures_close(ruleSets);
870	ures_close(rbnfRules);
871	}
872	ures_close(nfrb);
873	}
874
875	RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
876	: NumberFormat (rhs)
877	, fRuleSets(NULL)
878	, ruleSetDescriptions(NULL)
879	, numRuleSets(`0`)
880	, defaultRuleSet(NULL)
881	, locale (rhs.locale)
882	, collator(NULL)
883	, decimalFormatSymbols(NULL)
884	, defaultInfinityRule(NULL)
885	, defaultNaNRule(NULL)
886	, fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
887	, lenient(FALSE)
888	, lenientParseRules(NULL)
889	, localizations(NULL)
890	, capitalizationInfoSet(FALSE)
891	, capitalizationForUIListMenu(FALSE)
892	, capitalizationForStandAlone(FALSE)
893	, capitalizationBrkIter(NULL)
894	{
895	this->operator=(rhs);
896	}
897
898	// --------
899
900	RuleBasedNumberFormat&
901	RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
902	{
903	if (this == &rhs) {
904	return *this;
905	}
906	NumberFormat::operator=(rhs);
907	UErrorCode status = U_ZERO_ERROR;
908	dispose();
909	locale = rhs.locale;
910	lenient = rhs.lenient;
911
912	UParseError perror;
913	setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
914	init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
915	setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
916	setRoundingMode(rhs.getRoundingMode());
917
918	capitalizationInfoSet = rhs.capitalizationInfoSet;
919	capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
920	capitalizationForStandAlone = rhs.capitalizationForStandAlone;
921	#if !UCONFIG_NO_BREAK_ITERATION
922	capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
923	#endif
924
925	return *this;
926	}
927
928	RuleBasedNumberFormat::~RuleBasedNumberFormat()
929	{
930	dispose();
931	}
932
933	RuleBasedNumberFormat*
934	RuleBasedNumberFormat::clone() const
935	{
936	return new RuleBasedNumberFormat (*this);
937	}
938
939	UBool
940	RuleBasedNumberFormat::operator==(const Format& other) const
941	{
942	if (this == &other) {
943	return TRUE;
944	}
945
946	if (typeid(*this) == typeid(other)) {
947	const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
948	// test for capitalization info equality is adequately handled
949	// by the NumberFormat test for fCapitalizationContext equality;
950	// the info here is just derived from that.
951	if (locale == rhs.locale &&
952	lenient == rhs.lenient &&
953	(localizations == NULL
954	? rhs.localizations == NULL
955	: (rhs.localizations == NULL
956	? FALSE
957	: *localizations == rhs.localizations))) {
958
959	NFRuleSet** p = fRuleSets;
960	NFRuleSet** q = rhs.fRuleSets;
961	if (p == NULL) {
962	return q == NULL;
963	} else if (q == NULL) {
964	return FALSE;
965	}
966	while (p && q && (p == q)) {
967	++p;
968	++q;
969	}
970	return q == NULL && p == NULL;
971	}
972	}
973
974	return FALSE;
975	}
976
977	UnicodeString
978	RuleBasedNumberFormat::getRules() const
979	{
980	UnicodeString result;
981	if (fRuleSets != NULL) {
982	for (NFRuleSet** p = fRuleSets; *p; ++p) {
983	(*p)->appendRules(result);
984	}
985	}
986	return result;
987	}
988
989	UnicodeString
990	RuleBasedNumberFormat::getRuleSetName(int32_t index) const
991	{
992	if (localizations) {
993	UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-`1`);
994	return string;
995	}
996	else if (fRuleSets) {
997	UnicodeString result;
998	for (NFRuleSet** p = fRuleSets; *p; ++p) {
999	NFRuleSet* rs = *p;
1000	if (rs->isPublic()) {
1001	if (--index == -`1`) {
1002	rs->getName(result);
1003	return result;
1004	}
1005	}
1006	}
1007	}
1008	UnicodeString empty;
1009	return empty;
1010	}
1011
1012	int32_t
1013	RuleBasedNumberFormat::getNumberOfRuleSetNames() const
1014	{
1015	int32_t result = `0`;
1016	if (localizations) {
1017	result = localizations->getNumberOfRuleSets();
1018	}
1019	else if (fRuleSets) {
1020	for (NFRuleSet** p = fRuleSets; *p; ++p) {
1021	if ((**p).isPublic()) {
1022	++result;
1023	}
1024	}
1025	}
1026	return result;
1027	}
1028
1029	int32_t
1030	RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
1031	if (localizations) {
1032	return localizations->getNumberOfDisplayLocales();
1033	}
1034	return `0`;
1035	}
1036
1037	Locale
1038	RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
1039	if (U_FAILURE(status)) {
1040	return Locale ("");
1041	}
1042	if (localizations && index >= `0` && index < localizations->getNumberOfDisplayLocales()) {
1043	UnicodeString name(TRUE, localizations->getLocaleName(index), -`1`);
1044	char buffer[`64`];
1045	int32_t cap = name.length() + `1`;
1046	char* bp = buffer;
1047	if (cap > `64`) {
1048	bp = (char *)uprv_malloc(cap);
1049	if (bp == NULL) {
1050	status = U_MEMORY_ALLOCATION_ERROR;
1051	return Locale ("");
1052	}
1053	}
1054	name.extract(`0`, name.length(), bp, cap, UnicodeString::kInvariant);
1055	Locale retLocale(bp);
1056	if (bp != buffer) {
1057	uprv_free(bp);
1058	}
1059	return retLocale;
1060	}
1061	status = U_ILLEGAL_ARGUMENT_ERROR;
1062	Locale retLocale;
1063	return retLocale;
1064	}
1065
1066	UnicodeString
1067	RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1068	if (localizations && index >= `0` && index < localizations->getNumberOfRuleSets()) {
1069	UnicodeString localeName(localeParam.getBaseName(), -`1`, UnicodeString::kInvariant);
1070	int32_t len = localeName.length();
1071	UChar* localeStr = localeName.getBuffer(len + `1`);
1072	while (len >= `0`) {
1073	localeStr[len] = `0`;
1074	int32_t ix = localizations->indexForLocale(localeStr);
1075	if (ix >= `0`) {
1076	UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -`1`);
1077	return name;
1078	}
1079
1080	// trim trailing portion, skipping over ommitted sections
1081	do { --len;} while (len > `0` && localeStr[len] != `0x005f`); // underscore
1082	while (len > `0` && localeStr[len-`1`] == `0x005F`) --len;
1083	}
1084	UnicodeString name(TRUE, localizations->getRuleSetName(index), -`1`);
1085	return name;
1086	}
1087	UnicodeString bogus;
1088	bogus.setToBogus();
1089	return bogus;
1090	}
1091
1092	UnicodeString
1093	RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1094	if (localizations) {
1095	UnicodeString rsn(ruleSetName);
1096	int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1097	return getRuleSetDisplayName(ix, localeParam);
1098	}
1099	UnicodeString bogus;
1100	bogus.setToBogus();
1101	return bogus;
1102	}
1103
1104	NFRuleSet*
1105	RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1106	{
1107	if (U_SUCCESS(status) && fRuleSets) {
1108	for (NFRuleSet** p = fRuleSets; *p; ++p) {
1109	NFRuleSet* rs = *p;
1110	if (rs->isNamed(name)) {
1111	return rs;
1112	}
1113	}
1114	status = U_ILLEGAL_ARGUMENT_ERROR;
1115	}
1116	return NULL;
1117	}
1118
1119	UnicodeString&
1120	RuleBasedNumberFormat::format(const DecimalQuantity &number,
1121	UnicodeString& appendTo,
1122	FieldPosition& pos,
1123	UErrorCode &status) const {
1124	if (U_FAILURE(status)) {
1125	return appendTo;
1126	}
1127	DecimalQuantity copy(number);
1128	if (copy.fitsInLong()) {
1129	format(number.toLong(), appendTo, pos, status);
1130	}
1131	else {
1132	copy.roundToMagnitude(`0`, number::impl::RoundingMode::UNUM_ROUND_HALFEVEN, status);
1133	if (copy.fitsInLong()) {
1134	format(number.toDouble(), appendTo, pos, status);
1135	}
1136	else {
1137	// We're outside of our normal range that this framework can handle.
1138	// The DecimalFormat will provide more accurate results.
1139
1140	// TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1141	LocalPointer<NumberFormat> decimalFormat(NumberFormat::createInstance(locale, UNUM_DECIMAL, status), status);
1142	if (decimalFormat.isNull()) {
1143	return appendTo;
1144	}
1145	Formattable f;
1146	LocalPointer<DecimalQuantity> decimalQuantity(new DecimalQuantity (number), status);
1147	if (decimalQuantity.isNull()) {
1148	return appendTo;
1149	}
1150	f.adoptDecimalQuantity(decimalQuantity.orphan()); // f now owns decimalQuantity.
1151	decimalFormat ->format(f, appendTo, pos, status);
1152	}
1153	}
1154	return appendTo;
1155	}
1156
1157	UnicodeString&
1158	RuleBasedNumberFormat::format(int32_t number,
1159	UnicodeString& toAppendTo,
1160	FieldPosition& pos) const
1161	{
1162	return format((int64_t)number, toAppendTo, pos);
1163	}
1164
1165
1166	UnicodeString&
1167	RuleBasedNumberFormat::format(int64_t number,
1168	UnicodeString& toAppendTo,
1169	FieldPosition& / pos /) const
1170	{
1171	if (defaultRuleSet) {
1172	UErrorCode status = U_ZERO_ERROR;
1173	format(number, defaultRuleSet, toAppendTo, status);
1174	}
1175	return toAppendTo;
1176	}
1177
1178
1179	UnicodeString&
1180	RuleBasedNumberFormat::format(double number,
1181	UnicodeString& toAppendTo,
1182	FieldPosition& / pos /) const
1183	{
1184	UErrorCode status = U_ZERO_ERROR;
1185	if (defaultRuleSet) {
1186	format(number, *defaultRuleSet, toAppendTo, status);
1187	}
1188	return toAppendTo;
1189	}
1190
1191
1192	UnicodeString&
1193	RuleBasedNumberFormat::format(int32_t number,
1194	const UnicodeString& ruleSetName,
1195	UnicodeString& toAppendTo,
1196	FieldPosition& pos,
1197	UErrorCode& status) const
1198	{
1199	return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1200	}
1201
1202
1203	UnicodeString&
1204	RuleBasedNumberFormat::format(int64_t number,
1205	const UnicodeString& ruleSetName,
1206	UnicodeString& toAppendTo,
1207	FieldPosition& / pos /,
1208	UErrorCode& status) const
1209	{
1210	if (U_SUCCESS(status)) {
1211	if (ruleSetName.indexOf(gPercentPercent, `2`, `0`) == `0`) {
1212	// throw new IllegalArgumentException("Can't use internal rule set");
1213	status = U_ILLEGAL_ARGUMENT_ERROR;
1214	} else {
1215	NFRuleSet *rs = findRuleSet(ruleSetName, status);
1216	if (rs) {
1217	format(number, rs, toAppendTo, status);
1218	}
1219	}
1220	}
1221	return toAppendTo;
1222	}
1223
1224
1225	UnicodeString&
1226	RuleBasedNumberFormat::format(double number,
1227	const UnicodeString& ruleSetName,
1228	UnicodeString& toAppendTo,
1229	FieldPosition& / pos /,
1230	UErrorCode& status) const
1231	{
1232	if (U_SUCCESS(status)) {
1233	if (ruleSetName.indexOf(gPercentPercent, `2`, `0`) == `0`) {
1234	// throw new IllegalArgumentException("Can't use internal rule set");
1235	status = U_ILLEGAL_ARGUMENT_ERROR;
1236	} else {
1237	NFRuleSet *rs = findRuleSet(ruleSetName, status);
1238	if (rs) {
1239	format(number, *rs, toAppendTo, status);
1240	}
1241	}
1242	}
1243	return toAppendTo;
1244	}
1245
1246	void
1247	RuleBasedNumberFormat::format(double number,
1248	NFRuleSet& rs,
1249	UnicodeString& toAppendTo,
1250	UErrorCode& status) const
1251	{
1252	int32_t startPos = toAppendTo.length();
1253	if (getRoundingMode() != DecimalFormat::ERoundingMode::kRoundUnnecessary && !uprv_isNaN(number) && !uprv_isInfinite(number)) {
1254	DecimalQuantity digitList;
1255	digitList.setToDouble(number);
1256	digitList.roundToMagnitude(
1257	-getMaximumFractionDigits(),
1258	static_cast<UNumberFormatRoundingMode>(getRoundingMode()),
1259	status);
1260	number = digitList.toDouble();
1261	}
1262	rs.format(number, toAppendTo, toAppendTo.length(), `0`, status);
1263	adjustForCapitalizationContext(startPos, toAppendTo, status);
1264	}
1265
1266	/**
1267	* Bottleneck through which all the public format() methods
1268	* that take a long pass. By the time we get here, we know
1269	* which rule set we're using to do the formatting.
1270	* @param number The number to format
1271	* @param ruleSet The rule set to use to format the number
1272	* @return The text that resulted from formatting the number
1273	*/
1274	UnicodeString&
1275	RuleBasedNumberFormat::format(int64_t number, NFRuleSet ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const*
1276	{
1277	// all API format() routines that take a double vector through
1278	// here. We have these two identical functions-- one taking a
1279	// double and one taking a long-- the couple digits of precision
1280	// that long has but double doesn't (both types are 8 bytes long,
1281	// but double has to borrow some of the mantissa bits to hold
1282	// the exponent).
1283	// Create an empty string buffer where the result will
1284	// be built, and pass it to the rule set (along with an insertion
1285	// position of 0 and the number being formatted) to the rule set
1286	// for formatting
1287
1288	if (U_SUCCESS(status)) {
1289	if (number == U_INT64_MIN) {
1290	// We can't handle this value right now. Provide an accurate default value.
1291
1292	// TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1293	NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
1294	if (decimalFormat == nullptr) {
1295	return toAppendTo;
1296	}
1297	Formattable f;
1298	FieldPosition pos(FieldPosition::DONT_CARE);
1299	DecimalQuantity decimalQuantity = new* DecimalQuantity ();
1300	if (decimalQuantity == nullptr) {
1301	status = U_MEMORY_ALLOCATION_ERROR;
1302	delete decimalFormat;
1303	return toAppendTo;
1304	}
1305	decimalQuantity->setToLong(number);
1306	f.adoptDecimalQuantity(decimalQuantity); // f now owns decimalQuantity.
1307	decimalFormat->format(f, toAppendTo, pos, status);
1308	delete decimalFormat;
1309	}
1310	else {
1311	int32_t startPos = toAppendTo.length();
1312	ruleSet->format(number, toAppendTo, toAppendTo.length(), `0`, status);
1313	adjustForCapitalizationContext(startPos, toAppendTo, status);
1314	}
1315	}
1316	return toAppendTo;
1317	}
1318
1319	UnicodeString&
1320	RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1321	UnicodeString& currentResult,
1322	UErrorCode& status) const
1323	{
1324	#if !UCONFIG_NO_BREAK_ITERATION
1325	UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1326	if (capitalizationContext != UDISPCTX_CAPITALIZATION_NONE && startPos == `0` && currentResult.length() > `0`) {
1327	// capitalize currentResult according to context
1328	UChar32 ch = currentResult.char32At(`0`);
1329	if (u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter != NULL &&
1330	( capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE \|\|
1331	(capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) \|\|
1332	(capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1333	// titlecase first word of currentResult, here use sentence iterator unlike current implementations
1334	// in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1335	currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE \| U_TITLECASE_NO_BREAK_ADJUSTMENT);
1336	}
1337	}
1338	#endif
1339	return currentResult;
1340	}
1341
1342
1343	void
1344	RuleBasedNumberFormat::parse(const UnicodeString& text,
1345	Formattable& result,
1346	ParsePosition& parsePosition) const
1347	{
1348	if (!fRuleSets) {
1349	parsePosition.setErrorIndex(`0`);
1350	return;
1351	}
1352
1353	UnicodeString workingText(text, parsePosition.getIndex());
1354	ParsePosition workingPos(`0`);
1355
1356	ParsePosition high_pp(`0`);
1357	Formattable high_result;
1358
1359	for (NFRuleSet** p = fRuleSets; *p; ++p) {
1360	NFRuleSet rp = p;
1361	if (rp->isPublic() && rp->isParseable()) {
1362	ParsePosition working_pp(`0`);
1363	Formattable working_result;
1364
1365	rp->parse(workingText, working_pp, kMaxDouble, `0`, working_result);
1366	if (working_pp.getIndex() > high_pp.getIndex()) {
1367	high_pp = working_pp;
1368	high_result = working_result;
1369
1370	if (high_pp.getIndex() == workingText.length()) {
1371	break;
1372	}
1373	}
1374	}
1375	}
1376
1377	int32_t startIndex = parsePosition.getIndex();
1378	parsePosition.setIndex(startIndex + high_pp.getIndex());
1379	if (high_pp.getIndex() > `0`) {
1380	parsePosition.setErrorIndex(-`1`);
1381	} else {
1382	int32_t errorIndex = (high_pp.getErrorIndex()>`0`)? high_pp.getErrorIndex(): `0`;
1383	parsePosition.setErrorIndex(startIndex + errorIndex);
1384	}
1385	result = high_result;
1386	if (result.getType() == Formattable::kDouble) {
1387	double d = result.getDouble();
1388	if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) {
1389	// Note: casting a double to an int when the double is too large or small
1390	// to fit the destination is undefined behavior. The explicit range checks,
1391	// above, are required. Just casting and checking the result value is undefined.
1392	result.setLong(static_cast<int32_t>(d));
1393	}
1394	}
1395	}
1396
1397	#if !UCONFIG_NO_COLLATION
1398
1399	void
1400	RuleBasedNumberFormat::setLenient(UBool enabled)
1401	{
1402	lenient = enabled;
1403	if (!enabled && collator) {
1404	delete collator;
1405	collator = NULL;
1406	}
1407	}
1408
1409	#endif
1410
1411	void
1412	RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1413	if (U_SUCCESS(status)) {
1414	if (ruleSetName.isEmpty()) {
1415	if (localizations) {
1416	UnicodeString name(TRUE, localizations->getRuleSetName(`0`), -`1`);
1417	defaultRuleSet = findRuleSet(name, status);
1418	} else {
1419	initDefaultRuleSet();
1420	}
1421	} else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1422	status = U_ILLEGAL_ARGUMENT_ERROR;
1423	} else {
1424	NFRuleSet* result = findRuleSet(ruleSetName, status);
1425	if (result != NULL) {
1426	defaultRuleSet = result;
1427	}
1428	}
1429	}
1430	}
1431
1432	UnicodeString
1433	RuleBasedNumberFormat::getDefaultRuleSetName() const {
1434	UnicodeString result;
1435	if (defaultRuleSet && defaultRuleSet->isPublic()) {
1436	defaultRuleSet->getName(result);
1437	} else {
1438	result.setToBogus();
1439	}
1440	return result;
1441	}
1442
1443	void
1444	RuleBasedNumberFormat::initDefaultRuleSet()
1445	{
1446	defaultRuleSet = NULL;
1447	if (!fRuleSets) {
1448	return;
1449	}
1450
1451	const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
1452	const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
1453	const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
1454
1455	NFRuleSet**p = &fRuleSets[`0`];
1456	while (*p) {
1457	if ((p)->isNamed(spellout) \|\| (p)->isNamed(ordinal) \|\| (*p)->isNamed(duration)) {
1458	defaultRuleSet = *p;
1459	return;
1460	} else {
1461	++p;
1462	}
1463	}
1464
1465	defaultRuleSet = *--p;
1466	if (!defaultRuleSet->isPublic()) {
1467	while (p != fRuleSets) {
1468	if ((*--p)->isPublic()) {
1469	defaultRuleSet = *p;
1470	break;
1471	}
1472	}
1473	}
1474	}
1475
1476
1477	void
1478	RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1479	UParseError& pErr, UErrorCode& status)
1480	{
1481	// TODO: implement UParseError
1482	uprv_memset(&pErr, `0`, sizeof(UParseError));
1483	// Note: this can leave ruleSets == NULL, so remaining code should check
1484	if (U_FAILURE(status)) {
1485	return;
1486	}
1487
1488	initializeDecimalFormatSymbols(status);
1489	initializeDefaultInfinityRule(status);
1490	initializeDefaultNaNRule(status);
1491	if (U_FAILURE(status)) {
1492	return;
1493	}
1494
1495	this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1496
1497	UnicodeString description(rules);
1498	if (!description.length()) {
1499	status = U_MEMORY_ALLOCATION_ERROR;
1500	return;
1501	}
1502
1503	// start by stripping the trailing whitespace from all the rules
1504	// (this is all the whitespace follwing each semicolon in the
1505	// description). This allows us to look for rule-set boundaries
1506	// by searching for ";%" without having to worry about whitespace
1507	// between the ; and the %
1508	stripWhitespace(description);
1509
1510	// check to see if there's a set of lenient-parse rules. If there
1511	// is, pull them out into our temporary holding place for them,
1512	// and delete them from the description before the real desciption-
1513	// parsing code sees them
1514	int32_t lp = description.indexOf(gLenientParse, -`1`, `0`);
1515	if (lp != -`1`) {
1516	// we've got to make sure we're not in the middle of a rule
1517	// (where "%%lenient-parse" would actually get treated as
1518	// rule text)
1519	if (lp == `0` \|\| description.charAt(lp - `1`) == gSemiColon) {
1520	// locate the beginning and end of the actual collation
1521	// rules (there may be whitespace between the name and
1522	// the first token in the description)
1523	int lpEnd = description.indexOf(gSemiPercent, `2`, lp);
1524
1525	if (lpEnd == -`1`) {
1526	lpEnd = description.length() - `1`;
1527	}
1528	int lpStart = lp + u_strlen(gLenientParse);
1529	while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1530	++lpStart;
1531	}
1532
1533	// copy out the lenient-parse rules and delete them
1534	// from the description
1535	lenientParseRules = new UnicodeString ();
1536	/ test for NULL /
1537	if (lenientParseRules == nullptr) {
1538	status = U_MEMORY_ALLOCATION_ERROR;
1539	return;
1540	}
1541	lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1542
1543	description.remove(lp, lpEnd + `1` - lp);
1544	}
1545	}
1546
1547	// pre-flight parsing the description and count the number of
1548	// rule sets (";%" marks the end of one rule set and the beginning
1549	// of the next)
1550	numRuleSets = `0`;
1551	for (int32_t p = description.indexOf(gSemiPercent, `2`, `0`); p != -`1`; p = description.indexOf(gSemiPercent, `2`, p)) {
1552	++numRuleSets;
1553	++p;
1554	}
1555	++numRuleSets;
1556
1557	// our rule list is an array of the appropriate size
1558	fRuleSets = (NFRuleSet *)uprv_malloc((numRuleSets + `1`) sizeof(NFRuleSet *));
1559	/ test for NULL /
1560	if (fRuleSets == `0`) {
1561	status = U_MEMORY_ALLOCATION_ERROR;
1562	return;
1563	}
1564
1565	for (int i = `0`; i <= numRuleSets; ++i) {
1566	fRuleSets[i] = NULL;
1567	}
1568
1569	// divide up the descriptions into individual rule-set descriptions
1570	// and store them in a temporary array. At each step, we also
1571	// new up a rule set, but all this does is initialize its name
1572	// and remove it from its description. We can't actually parse
1573	// the rest of the descriptions and finish initializing everything
1574	// because we have to know the names and locations of all the rule
1575	// sets before we can actually set everything up
1576	if(!numRuleSets) {
1577	status = U_ILLEGAL_ARGUMENT_ERROR;
1578	return;
1579	}
1580
1581	ruleSetDescriptions = new UnicodeString[numRuleSets];
1582	if (ruleSetDescriptions == nullptr) {
1583	status = U_MEMORY_ALLOCATION_ERROR;
1584	return;
1585	}
1586
1587	{
1588	int curRuleSet = `0`;
1589	int32_t start = `0`;
1590	for (int32_t p = description.indexOf(gSemiPercent, `2`, `0`); p != -`1`; p = description.indexOf(gSemiPercent, `2`, start)) {
1591	ruleSetDescriptions[curRuleSet].setTo(description, start, p + `1` - start);
1592	fRuleSets[curRuleSet] = new NFRuleSet (this, ruleSetDescriptions, curRuleSet, status);
1593	if (fRuleSets[curRuleSet] == nullptr) {
1594	status = U_MEMORY_ALLOCATION_ERROR;
1595	return;
1596	}
1597	++curRuleSet;
1598	start = p + `1`;
1599	}
1600	ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1601	fRuleSets[curRuleSet] = new NFRuleSet (this, ruleSetDescriptions, curRuleSet, status);
1602	if (fRuleSets[curRuleSet] == nullptr) {
1603	status = U_MEMORY_ALLOCATION_ERROR;
1604	return;
1605	}
1606	}
1607
1608	// now we can take note of the formatter's default rule set, which
1609	// is the last public rule set in the description (it's the last
1610	// rather than the first so that a user can create a new formatter
1611	// from an existing formatter and change its default behavior just
1612	// by appending more rule sets to the end)
1613
1614	// {dlf} Initialization of a fraction rule set requires the default rule
1615	// set to be known. For purposes of initialization, this is always the
1616	// last public rule set, no matter what the localization data says.
1617	initDefaultRuleSet();
1618
1619	// finally, we can go back through the temporary descriptions
1620	// list and finish setting up the substructure (and we throw
1621	// away the temporary descriptions as we go)
1622	{
1623	for (int i = `0`; i < numRuleSets; i++) {
1624	fRuleSets[i]->parseRules(ruleSetDescriptions[i], status);
1625	}
1626	}
1627
1628	// Now that the rules are initialized, the 'real' default rule
1629	// set can be adjusted by the localization data.
1630
1631	// The C code keeps the localization array as is, rather than building
1632	// a separate array of the public rule set names, so we have less work
1633	// to do here-- but we still need to check the names.
1634
1635	if (localizationInfos) {
1636	// confirm the names, if any aren't in the rules, that's an error
1637	// it is ok if the rules contain public rule sets that are not in this list
1638	for (int32_t i = `0`; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1639	UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -`1`);
1640	NFRuleSet* rs = findRuleSet(name, status);
1641	if (rs == NULL) {
1642	break; // error
1643	}
1644	if (i == `0`) {
1645	defaultRuleSet = rs;
1646	}
1647	}
1648	} else {
1649	defaultRuleSet = getDefaultRuleSet();
1650	}
1651	originalDescription = rules;
1652	}
1653
1654	// override the NumberFormat implementation in order to
1655	// lazily initialize relevant items
1656	void
1657	RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1658	{
1659	NumberFormat::setContext(value, status);
1660	if (U_SUCCESS(status)) {
1661	if (!capitalizationInfoSet &&
1662	(value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU \|\| value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1663	initCapitalizationContextInfo(locale);
1664	capitalizationInfoSet = TRUE;
1665	}
1666	#if !UCONFIG_NO_BREAK_ITERATION
1667	if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE \|\|
1668	(value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) \|\|
1669	(value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1670	status = U_ZERO_ERROR;
1671	capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1672	if (U_FAILURE(status)) {
1673	delete capitalizationBrkIter;
1674	capitalizationBrkIter = NULL;
1675	}
1676	}
1677	#endif
1678	}
1679	}
1680
1681	void
1682	RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1683	{
1684	#if !UCONFIG_NO_BREAK_ITERATION
1685	const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1686	UErrorCode status = U_ZERO_ERROR;
1687	UResourceBundle *rb = ures_open(NULL, localeID, &status);
1688	rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1689	rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1690	if (U_SUCCESS(status) && rb != NULL) {
1691	int32_t len = `0`;
1692	const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1693	if (U_SUCCESS(status) && intVector != NULL && len >= `2`) {
1694	capitalizationForUIListMenu = static_cast<UBool>(intVector[`0`]);
1695	capitalizationForStandAlone = static_cast<UBool>(intVector[`1`]);
1696	}
1697	}
1698	ures_close(rb);
1699	#endif
1700	}
1701
1702	void
1703	RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1704	{
1705	// iterate through the characters...
1706	UnicodeString result;
1707
1708	int start = `0`;
1709	while (start != -`1` && start < description.length()) {
1710	// seek to the first non-whitespace character...
1711	while (start < description.length()
1712	&& PatternProps::isWhiteSpace(description.charAt(start))) {
1713	++start;
1714	}
1715
1716	// locate the next semicolon in the text and copy the text from
1717	// our current position up to that semicolon into the result
1718	int32_t p = description.indexOf(gSemiColon, start);
1719	if (p == -`1`) {
1720	// or if we don't find a semicolon, just copy the rest of
1721	// the string into the result
1722	result.append(description, start, description.length() - start);
1723	start = -`1`;
1724	}
1725	else if (p < description.length()) {
1726	result.append(description, start, p + `1` - start);
1727	start = p + `1`;
1728	}
1729
1730	// when we get here, we've seeked off the end of the string, and
1731	// we terminate the loop (we continue until start* is -1 rather*
1732	// than until p* is -1, because otherwise we'd miss the last*
1733	// rule in the description)
1734	else {
1735	start = -`1`;
1736	}
1737	}
1738
1739	description.setTo(result);
1740	}
1741
1742
1743	void
1744	RuleBasedNumberFormat::dispose()
1745	{
1746	if (fRuleSets) {
1747	for (NFRuleSet** p = fRuleSets; *p; ++p) {
1748	delete *p;
1749	}
1750	uprv_free(fRuleSets);
1751	fRuleSets = NULL;
1752	}
1753
1754	if (ruleSetDescriptions) {
1755	delete [] ruleSetDescriptions;
1756	ruleSetDescriptions = NULL;
1757	}
1758
1759	#if !UCONFIG_NO_COLLATION
1760	delete collator;
1761	#endif
1762	collator = NULL;
1763
1764	delete decimalFormatSymbols;
1765	decimalFormatSymbols = NULL;
1766
1767	delete defaultInfinityRule;
1768	defaultInfinityRule = NULL;
1769
1770	delete defaultNaNRule;
1771	defaultNaNRule = NULL;
1772
1773	delete lenientParseRules;
1774	lenientParseRules = NULL;
1775
1776	#if !UCONFIG_NO_BREAK_ITERATION
1777	delete capitalizationBrkIter;
1778	capitalizationBrkIter = NULL;
1779	#endif
1780
1781	if (localizations) {
1782	localizations = localizations->unref();
1783	}
1784	}
1785
1786
1787	//-----------------------------------------------------------------------
1788	// package-internal API
1789	//-----------------------------------------------------------------------
1790
1791	/**
1792	* Returns the collator to use for lenient parsing. The collator is lazily created:
1793	* this function creates it the first time it's called.
1794	* @return The collator to use for lenient parsing, or null if lenient parsing
1795	* is turned off.
1796	*/
1797	const RuleBasedCollator*
1798	RuleBasedNumberFormat::getCollator() const
1799	{
1800	#if !UCONFIG_NO_COLLATION
1801	if (!fRuleSets) {
1802	return NULL;
1803	}
1804
1805	// lazy-evaluate the collator
1806	if (collator == NULL && lenient) {
1807	// create a default collator based on the formatter's locale,
1808	// then pull out that collator's rules, append any additional
1809	// rules specified in the description, and create a _new_
1810	// collator based on the combination of those rules
1811
1812	UErrorCode status = U_ZERO_ERROR;
1813
1814	Collator* temp = Collator::createInstance(locale, status);
1815	RuleBasedCollator* newCollator;
1816	if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1817	if (lenientParseRules) {
1818	UnicodeString rules(newCollator->getRules());
1819	rules.append(*lenientParseRules);
1820
1821	newCollator = new RuleBasedCollator (rules, status);
1822	// Exit if newCollator could not be created.
1823	if (newCollator == NULL) {
1824	return NULL;
1825	}
1826	} else {
1827	temp = NULL;
1828	}
1829	if (U_SUCCESS(status)) {
1830	newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1831	// cast away const
1832	((RuleBasedNumberFormat)this*)->collator = newCollator;
1833	} else {
1834	delete newCollator;
1835	}
1836	}
1837	delete temp;
1838	}
1839	#endif
1840
1841	// if lenient-parse mode is off, this will be null
1842	// (see setLenientParseMode())
1843	return collator;
1844	}
1845
1846
1847	DecimalFormatSymbols*
1848	RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
1849	{
1850	// lazy-evaluate the DecimalFormatSymbols object. This object
1851	// is shared by all DecimalFormat instances belonging to this
1852	// formatter
1853	if (decimalFormatSymbols == nullptr) {
1854	LocalPointer<DecimalFormatSymbols> temp(new DecimalFormatSymbols (locale, status), status);
1855	if (U_SUCCESS(status)) {
1856	decimalFormatSymbols = temp.orphan();
1857	}
1858	}
1859	return decimalFormatSymbols;
1860	}
1861
1862	/**
1863	* Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1864	* instances owned by this formatter.
1865	*/
1866	const DecimalFormatSymbols*
1867	RuleBasedNumberFormat::getDecimalFormatSymbols() const
1868	{
1869	return decimalFormatSymbols;
1870	}
1871
1872	NFRule*
1873	RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
1874	{
1875	if (U_FAILURE(status)) {
1876	return nullptr;
1877	}
1878	if (defaultInfinityRule == NULL) {
1879	UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
1880	rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
1881	LocalPointer<NFRule> temp(new NFRule (this, rule, status), status);
1882	if (U_SUCCESS(status)) {
1883	defaultInfinityRule = temp.orphan();
1884	}
1885	}
1886	return defaultInfinityRule;
1887	}
1888
1889	const NFRule*
1890	RuleBasedNumberFormat::getDefaultInfinityRule() const
1891	{
1892	return defaultInfinityRule;
1893	}
1894
1895	NFRule*
1896	RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
1897	{
1898	if (U_FAILURE(status)) {
1899	return nullptr;
1900	}
1901	if (defaultNaNRule == nullptr) {
1902	UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
1903	rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
1904	LocalPointer<NFRule> temp(new NFRule (this, rule, status), status);
1905	if (U_SUCCESS(status)) {
1906	defaultNaNRule = temp.orphan();
1907	}
1908	}
1909	return defaultNaNRule;
1910	}
1911
1912	const NFRule*
1913	RuleBasedNumberFormat::getDefaultNaNRule() const
1914	{
1915	return defaultNaNRule;
1916	}
1917
1918	// De-owning the current localized symbols and adopt the new symbols.
1919	void
1920	RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1921	{
1922	if (symbolsToAdopt == NULL) {
1923	return; // do not allow caller to set decimalFormatSymbols to NULL
1924	}
1925
1926	if (decimalFormatSymbols != NULL) {
1927	delete decimalFormatSymbols;
1928	}
1929
1930	decimalFormatSymbols = symbolsToAdopt;
1931
1932	{
1933	// Apply the new decimalFormatSymbols by reparsing the rulesets
1934	UErrorCode status = U_ZERO_ERROR;
1935
1936	delete defaultInfinityRule;
1937	defaultInfinityRule = NULL;
1938	initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
1939
1940	delete defaultNaNRule;
1941	defaultNaNRule = NULL;
1942	initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
1943
1944	if (fRuleSets) {
1945	for (int32_t i = `0`; i < numRuleSets; i++) {
1946	fRuleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
1947	}
1948	}
1949	}
1950	}
1951
1952	// Setting the symbols is equivalent to adopting a newly created localized symbols.
1953	void
1954	RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1955	{
1956	adoptDecimalFormatSymbols(new DecimalFormatSymbols (symbols));
1957	}
1958
1959	PluralFormat *
1960	RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1961	const UnicodeString &pattern,
1962	UErrorCode& status) const
1963	{
1964	auto pf = new* PluralFormat (locale, pluralType, pattern, status);
1965	if (pf == nullptr) {
1966	status = U_MEMORY_ALLOCATION_ERROR;
1967	}
1968	return pf;
1969	}
1970
1971	/**
1972	* Get the rounding mode.
1973	* @return A rounding mode
1974	*/
1975	DecimalFormat::ERoundingMode RuleBasedNumberFormat::getRoundingMode() const {
1976	return fRoundingMode;
1977	}
1978
1979	/**
1980	* Set the rounding mode. This has no effect unless the rounding
1981	* increment is greater than zero.
1982	* @param roundingMode A rounding mode
1983	*/
1984	void RuleBasedNumberFormat::setRoundingMode(DecimalFormat::ERoundingMode roundingMode) {
1985	fRoundingMode = roundingMode;
1986	}
1987
1988	U_NAMESPACE_END
1989
1990	/ U_HAVE_RBNF /
1991	#endif
1992

Browse the source code of ClickHouse/contrib/icu/icu4c/source/i18n/rbnf.cpp