nfrs.cpp source code [ClickHouse/contrib/icu/icu4c/source/i18n/nfrs.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	******************************************************************************
5	* Copyright (C) 1997-2015, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	******************************************************************************
8	* file name: nfrs.cpp
9	* encoding: UTF-8
10	* tab size: 8 (not used)
11	* indentation:4
12	*
13	* Modification history
14	* Date Name Comments
15	* 10/11/2001 Doug Ported from ICU4J
16	*/
17
18	#include "nfrs.h"
19
20	#if U_HAVE_RBNF
21
22	#include "unicode/uchar.h"
23	#include "nfrule.h"
24	#include "nfrlist.h"
25	#include "patternprops.h"
26	#include "putilimp.h"
27
28	#ifdef RBNF_DEBUG
29	#include "cmemory.h"
30	#endif
31
32	enum {
33	/* -x /
34	NEGATIVE_RULE_INDEX = `0`,
35	/* x.x /
36	IMPROPER_FRACTION_RULE_INDEX = `1`,
37	/* 0.x /
38	PROPER_FRACTION_RULE_INDEX = `2`,
39	/* x.0 /
40	MASTER_RULE_INDEX = `3`,
41	/* Inf /
42	INFINITY_RULE_INDEX = `4`,
43	/* NaN /
44	NAN_RULE_INDEX = `5`,
45	NON_NUMERICAL_RULE_LENGTH = `6`
46	};
47
48	U_NAMESPACE_BEGIN
49
50	#if 0
51	// euclid's algorithm works with doubles
52	// note, doubles only get us up to one quadrillion or so, which
53	// isn't as much range as we get with longs. We probably still
54	// want either 64-bit math, or BigInteger.
55
56	static int64_t
57	util_lcm(int64_t x, int64_t y)
58	{
59	x.abs();
60	y.abs();
61
62	if (x == `0` \|\| y == `0`) {
63	return `0`;
64	} else {
65	do {
66	if (x < y) {
67	int64_t t = x; x = y; y = t;
68	}
69	x -= y * (x/y);
70	} while (x != `0`);
71
72	return y;
73	}
74	}
75
76	#else
77	/**
78	* Calculates the least common multiple of x and y.
79	*/
80	static int64_t
81	util_lcm(int64_t x, int64_t y)
82	{
83	// binary gcd algorithm from Knuth, "The Art of Computer Programming,"
84	// vol. 2, 1st ed., pp. 298-299
85	int64_t x1 = x;
86	int64_t y1 = y;
87
88	int p2 = `0`;
89	while ((x1 & `1`) == `0` && (y1 & `1`) == `0`) {
90	++p2;
91	x1 >>= `1`;
92	y1 >>= `1`;
93	}
94
95	int64_t t;
96	if ((x1 & `1`) == `1`) {
97	t = -y1;
98	} else {
99	t = x1;
100	}
101
102	while (t != `0`) {
103	while ((t & `1`) == `0`) {
104	t = t >> `1`;
105	}
106	if (t > `0`) {
107	x1 = t;
108	} else {
109	y1 = -t;
110	}
111	t = x1 - y1;
112	}
113
114	int64_t gcd = x1 << p2;
115
116	// x y == gcd(x, y) * lcm(x, y)*
117	return x / gcd * y;
118	}
119	#endif
120
121	static const UChar gPercent = `0x0025`;
122	static const UChar gColon = `0x003a`;
123	static const UChar gSemicolon = `0x003b`;
124	static const UChar gLineFeed = `0x000a`;
125
126	static const UChar gPercentPercent[] =
127	{
128	`0x25`, `0x25`, `0`
129	}; / "%%" /
130
131	static const UChar gNoparse[] =
132	{
133	`0x40`, `0x6E`, `0x6F`, `0x70`, `0x61`, `0x72`, `0x73`, `0x65`, `0`
134	}; / "@noparse" /
135
136	NFRuleSet::NFRuleSet(RuleBasedNumberFormat _owner, UnicodeString descriptions, int32_t index, UErrorCode& status)
137	: name ()
138	, rules (`0`)
139	, owner(_owner)
140	, fractionRules ()
141	, fIsFractionRuleSet(FALSE)
142	, fIsPublic(FALSE)
143	, fIsParseable(TRUE)
144	{
145	for (int32_t i = `0`; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
146	nonNumericalRules[i] = NULL;
147	}
148
149	if (U_FAILURE(status)) {
150	return;
151	}
152
153	UnicodeString& description = descriptions[index]; // !!! make sure index is valid
154
155	if (description.length() == `0`) {
156	// throw new IllegalArgumentException("Empty rule set description");
157	status = U_PARSE_ERROR;
158	return;
159	}
160
161	// if the description begins with a rule set name (the rule set
162	// name can be omitted in formatter descriptions that consist
163	// of only one rule set), copy it out into our "name" member
164	// and delete it from the description
165	if (description.charAt(`0`) == gPercent) {
166	int32_t pos = description.indexOf(gColon);
167	if (pos == -`1`) {
168	// throw new IllegalArgumentException("Rule set name doesn't end in colon");
169	status = U_PARSE_ERROR;
170	} else {
171	name.setTo(description, `0`, pos);
172	while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
173	}
174	description.remove(`0`, pos);
175	}
176	} else {
177	name.setTo(UNICODE_STRING_SIMPLE("%default"));
178	}
179
180	if (description.length() == `0`) {
181	// throw new IllegalArgumentException("Empty rule set description");
182	status = U_PARSE_ERROR;
183	}
184
185	fIsPublic = name.indexOf(gPercentPercent, `2`, `0`) != `0`;
186
187	if ( name.endsWith(gNoparse,`8`) ) {
188	fIsParseable = FALSE;
189	name.truncate(name.length()-`8`); // remove the @noparse from the name
190	}
191
192	// all of the other members of NFRuleSet are initialized
193	// by parseRules()
194	}
195
196	void
197	NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
198	{
199	// start by creating a Vector whose elements are Strings containing
200	// the descriptions of the rules (one rule per element). The rules
201	// are separated by semicolons (there's no escape facility: ALL
202	// semicolons are rule delimiters)
203
204	if (U_FAILURE(status)) {
205	return;
206	}
207
208	// ensure we are starting with an empty rule list
209	rules.deleteAll();
210
211	// dlf - the original code kept a separate description array for no reason,
212	// so I got rid of it. The loop was too complex so I simplified it.
213
214	UnicodeString currentDescription;
215	int32_t oldP = `0`;
216	while (oldP < description.length()) {
217	int32_t p = description.indexOf(gSemicolon, oldP);
218	if (p == -`1`) {
219	p = description.length();
220	}
221	currentDescription.setTo(description, oldP, p - oldP);
222	NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
223	oldP = p + `1`;
224	}
225
226	// for rules that didn't specify a base value, their base values
227	// were initialized to 0. Make another pass through the list and
228	// set all those rules' base values. We also remove any special
229	// rules from the list and put them into their own member variables
230	int64_t defaultBaseValue = `0`;
231
232	// (this isn't a for loop because we might be deleting items from
233	// the vector-- we want to make sure we only increment i when
234	// we _didn't_ delete aything from the vector)
235	int32_t rulesSize = rules.size();
236	for (int32_t i = `0`; i < rulesSize; i++) {
237	NFRule* rule = rules [i];
238	int64_t baseValue = rule->getBaseValue();
239
240	if (baseValue == `0`) {
241	// if the rule's base value is 0, fill in a default
242	// base value (this will be 1 plus the preceding
243	// rule's base value for regular rule sets, and the
244	// same as the preceding rule's base value in fraction
245	// rule sets)
246	rule->setBaseValue(defaultBaseValue, status);
247	}
248	else {
249	// if it's a regular rule that already knows its base value,
250	// check to make sure the rules are in order, and update
251	// the default base value for the next rule
252	if (baseValue < defaultBaseValue) {
253	// throw new IllegalArgumentException("Rules are not in order");
254	status = U_PARSE_ERROR;
255	return;
256	}
257	defaultBaseValue = baseValue;
258	}
259	if (!fIsFractionRuleSet) {
260	++defaultBaseValue;
261	}
262	}
263	}
264
265	/**
266	* Set one of the non-numerical rules.
267	* @param rule The rule to set.
268	*/
269	void NFRuleSet::setNonNumericalRule(NFRule *rule) {
270	int64_t baseValue = rule->getBaseValue();
271	if (baseValue == NFRule::kNegativeNumberRule) {
272	delete nonNumericalRules[NEGATIVE_RULE_INDEX];
273	nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
274	}
275	else if (baseValue == NFRule::kImproperFractionRule) {
276	setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
277	}
278	else if (baseValue == NFRule::kProperFractionRule) {
279	setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
280	}
281	else if (baseValue == NFRule::kMasterRule) {
282	setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
283	}
284	else if (baseValue == NFRule::kInfinityRule) {
285	delete nonNumericalRules[INFINITY_RULE_INDEX];
286	nonNumericalRules[INFINITY_RULE_INDEX] = rule;
287	}
288	else if (baseValue == NFRule::kNaNRule) {
289	delete nonNumericalRules[NAN_RULE_INDEX];
290	nonNumericalRules[NAN_RULE_INDEX] = rule;
291	}
292	}
293
294	/**
295	* Determine the best fraction rule to use. Rules matching the decimal point from
296	* DecimalFormatSymbols become the main set of rules to use.
297	* @param originalIndex The index into nonNumericalRules
298	* @param newRule The new rule to consider
299	* @param rememberRule Should the new rule be added to fractionRules.
300	*/
301	void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
302	if (rememberRule) {
303	fractionRules.add(newRule);
304	}
305	NFRule *bestResult = nonNumericalRules[originalIndex];
306	if (bestResult == NULL) {
307	nonNumericalRules[originalIndex] = newRule;
308	}
309	else {
310	// We have more than one. Which one is better?
311	const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
312	if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(`0`)
313	== newRule->getDecimalPoint())
314	{
315	nonNumericalRules[originalIndex] = newRule;
316	}
317	// else leave it alone
318	}
319	}
320
321	NFRuleSet::~NFRuleSet()
322	{
323	for (int i = `0`; i < NON_NUMERICAL_RULE_LENGTH; i++) {
324	if (i != IMPROPER_FRACTION_RULE_INDEX
325	&& i != PROPER_FRACTION_RULE_INDEX
326	&& i != MASTER_RULE_INDEX)
327	{
328	delete nonNumericalRules[i];
329	}
330	// else it will be deleted via NFRuleList fractionRules
331	}
332	}
333
334	static UBool
335	util_equalRules(const NFRule* rule1, const NFRule* rule2)
336	{
337	if (rule1) {
338	if (rule2) {
339	return rule1 == rule2;
340	}
341	} else if (!rule2) {
342	return TRUE;
343	}
344	return FALSE;
345	}
346
347	UBool
348	NFRuleSet::operator==(const NFRuleSet& rhs) const
349	{
350	if (rules.size() == rhs.rules.size() &&
351	fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
352	name == rhs.name) {
353
354	// ...then compare the non-numerical rule lists...
355	for (int i = `0`; i < NON_NUMERICAL_RULE_LENGTH; i++) {
356	if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
357	return FALSE;
358	}
359	}
360
361	// ...then compare the rule lists...
362	for (uint32_t i = `0`; i < rules.size(); ++i) {
363	if (rules [i] != rhs.rules [i]) {
364	return FALSE;
365	}
366	}
367	return TRUE;
368	}
369	return FALSE;
370	}
371
372	void
373	NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
374	for (uint32_t i = `0`; i < rules.size(); ++i) {
375	rules [i]->setDecimalFormatSymbols(newSymbols, status);
376	}
377	// Switch the fraction rules to mirror the DecimalFormatSymbols.
378	for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
379	if (nonNumericalRules[nonNumericalIdx]) {
380	for (uint32_t fIdx = `0`; fIdx < fractionRules.size(); fIdx++) {
381	NFRule *fractionRule = fractionRules [fIdx];
382	if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
383	setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
384	}
385	}
386	}
387	}
388
389	for (uint32_t nnrIdx = `0`; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
390	NFRule *rule = nonNumericalRules[nnrIdx];
391	if (rule) {
392	rule->setDecimalFormatSymbols(newSymbols, status);
393	}
394	}
395	}
396
397	#define RECURSION_LIMIT 64
398
399	void
400	NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
401	{
402	if (recursionCount >= RECURSION_LIMIT) {
403	// stop recursion
404	status = U_INVALID_STATE_ERROR;
405	return;
406	}
407	const NFRule *rule = findNormalRule(number);
408	if (rule) { // else error, but can't report it
409	rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
410	}
411	}
412
413	void
414	NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
415	{
416	if (recursionCount >= RECURSION_LIMIT) {
417	// stop recursion
418	status = U_INVALID_STATE_ERROR;
419	return;
420	}
421	const NFRule *rule = findDoubleRule(number);
422	if (rule) { // else error, but can't report it
423	rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
424	}
425	}
426
427	const NFRule*
428	NFRuleSet::findDoubleRule(double number) const
429	{
430	// if this is a fraction rule set, use findFractionRuleSetRule()
431	if (isFractionRuleSet()) {
432	return findFractionRuleSetRule(number);
433	}
434
435	if (uprv_isNaN(number)) {
436	const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
437	if (!rule) {
438	rule = owner->getDefaultNaNRule();
439	}
440	return rule;
441	}
442
443	// if the number is negative, return the negative number rule
444	// (if there isn't a negative-number rule, we pretend it's a
445	// positive number)
446	if (number < `0`) {
447	if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
448	return nonNumericalRules[NEGATIVE_RULE_INDEX];
449	} else {
450	number = -number;
451	}
452	}
453
454	if (uprv_isInfinite(number)) {
455	const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
456	if (!rule) {
457	rule = owner->getDefaultInfinityRule();
458	}
459	return rule;
460	}
461
462	// if the number isn't an integer, we use one of the fraction rules...
463	if (number != uprv_floor(number)) {
464	// if the number is between 0 and 1, return the proper
465	// fraction rule
466	if (number < `1` && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
467	return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
468	}
469	// otherwise, return the improper fraction rule
470	else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
471	return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
472	}
473	}
474
475	// if there's a master rule, use it to format the number
476	if (nonNumericalRules[MASTER_RULE_INDEX]) {
477	return nonNumericalRules[MASTER_RULE_INDEX];
478	}
479
480	// and if we haven't yet returned a rule, use findNormalRule()
481	// to find the applicable rule
482	int64_t r = util64_fromDouble(number + `0.5`);
483	return findNormalRule(r);
484	}
485
486	const NFRule *
487	NFRuleSet::findNormalRule(int64_t number) const
488	{
489	// if this is a fraction rule set, use findFractionRuleSetRule()
490	// to find the rule (we should only go into this clause if the
491	// value is 0)
492	if (fIsFractionRuleSet) {
493	return findFractionRuleSetRule((double)number);
494	}
495
496	// if the number is negative, return the negative-number rule
497	// (if there isn't one, pretend the number is positive)
498	if (number < `0`) {
499	if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
500	return nonNumericalRules[NEGATIVE_RULE_INDEX];
501	} else {
502	number = -number;
503	}
504	}
505
506	// we have to repeat the preceding two checks, even though we
507	// do them in findRule(), because the version of format() that
508	// takes a long bypasses findRule() and goes straight to this
509	// function. This function does skip the fraction rules since
510	// we know the value is an integer (it also skips the master
511	// rule, since it's considered a fraction rule. Skipping the
512	// master rule in this function is also how we avoid infinite
513	// recursion)
514
515	// {dlf} unfortunately this fails if there are no rules except
516	// special rules. If there are no rules, use the master rule.
517
518	// binary-search the rule list for the applicable rule
519	// (a rule is used for all values from its base value to
520	// the next rule's base value)
521	int32_t hi = rules.size();
522	if (hi > `0`) {
523	int32_t lo = `0`;
524
525	while (lo < hi) {
526	int32_t mid = (lo + hi) / `2`;
527	if (rules [mid]->getBaseValue() == number) {
528	return rules [mid];
529	}
530	else if (rules [mid]->getBaseValue() > number) {
531	hi = mid;
532	}
533	else {
534	lo = mid + `1`;
535	}
536	}
537	if (hi == `0`) { // bad rule set, minimum base > 0
538	return NULL; // want to throw exception here
539	}
540
541	NFRule *result = rules [hi - `1`];
542
543	// use shouldRollBack() to see whether we need to invoke the
544	// rollback rule (see shouldRollBack()'s documentation for
545	// an explanation of the rollback rule). If we do, roll back
546	// one rule and return that one instead of the one we'd normally
547	// return
548	if (result->shouldRollBack(number)) {
549	if (hi == `1`) { // bad rule set, no prior rule to rollback to from this base
550	return NULL;
551	}
552	result = rules [hi - `2`];
553	}
554	return result;
555	}
556	// else use the master rule
557	return nonNumericalRules[MASTER_RULE_INDEX];
558	}
559
560	/**
561	* If this rule is a fraction rule set, this function is used by
562	* findRule() to select the most appropriate rule for formatting
563	* the number. Basically, the base value of each rule in the rule
564	* set is treated as the denominator of a fraction. Whichever
565	* denominator can produce the fraction closest in value to the
566	* number passed in is the result. If there's a tie, the earlier
567	* one in the list wins. (If there are two rules in a row with the
568	* same base value, the first one is used when the numerator of the
569	* fraction would be 1, and the second rule is used the rest of the
570	* time.
571	* @param number The number being formatted (which will always be
572	* a number between 0 and 1)
573	* @return The rule to use to format this number
574	*/
575	const NFRule*
576	NFRuleSet::findFractionRuleSetRule(double number) const
577	{
578	// the obvious way to do this (multiply the value being formatted
579	// by each rule's base value until you get an integral result)
580	// doesn't work because of rounding error. This method is more
581	// accurate
582
583	// find the least common multiple of the rules' base values
584	// and multiply this by the number being formatted. This is
585	// all the precision we need, and we can do all of the rest
586	// of the math using integer arithmetic
587	int64_t leastCommonMultiple = rules [`0`]->getBaseValue();
588	int64_t numerator;
589	{
590	for (uint32_t i = `1`; i < rules.size(); ++i) {
591	leastCommonMultiple = util_lcm(leastCommonMultiple, rules [i]->getBaseValue());
592	}
593	numerator = util64_fromDouble(number * (double)leastCommonMultiple + `0.5`);
594	}
595	// for each rule, do the following...
596	int64_t tempDifference;
597	int64_t difference = util64_fromDouble(uprv_maxMantissa());
598	int32_t winner = `0`;
599	for (uint32_t i = `0`; i < rules.size(); ++i) {
600	// "numerator" is the numerator of the fraction if the
601	// denominator is the LCD. The numerator if the rule's
602	// base value is the denominator is "numerator" times the
603	// base value divided bythe LCD. Here we check to see if
604	// that's an integer, and if not, how close it is to being
605	// an integer.
606	tempDifference = numerator * rules [i]->getBaseValue() % leastCommonMultiple;
607
608
609	// normalize the result of the above calculation: we want
610	// the numerator's distance from the CLOSEST multiple
611	// of the LCD
612	if (leastCommonMultiple - tempDifference < tempDifference) {
613	tempDifference = leastCommonMultiple - tempDifference;
614	}
615
616	// if this is as close as we've come, keep track of how close
617	// that is, and the line number of the rule that did it. If
618	// we've scored a direct hit, we don't have to look at any more
619	// rules
620	if (tempDifference < difference) {
621	difference = tempDifference;
622	winner = i;
623	if (difference == `0`) {
624	break;
625	}
626	}
627	}
628
629	// if we have two successive rules that both have the winning base
630	// value, then the first one (the one we found above) is used if
631	// the numerator of the fraction is 1 and the second one is used if
632	// the numerator of the fraction is anything else (this lets us
633	// do things like "one third"/"two thirds" without haveing to define
634	// a whole bunch of extra rule sets)
635	if ((unsigned)(winner + `1`) < rules.size() &&
636	rules [winner + `1`]->getBaseValue() == rules [winner]->getBaseValue()) {
637	double n = ((double)rules [winner]->getBaseValue()) * number;
638	if (n < `0.5` \|\| n >= `2`) {
639	++winner;
640	}
641	}
642
643	// finally, return the winning rule
644	return rules [winner];
645	}
646
647	/**
648	* Parses a string. Matches the string to be parsed against each
649	* of its rules (with a base value less than upperBound) and returns
650	* the value produced by the rule that matched the most charcters
651	* in the source string.
652	* @param text The string to parse
653	* @param parsePosition The initial position is ignored and assumed
654	* to be 0. On exit, this object has been updated to point to the
655	* first character position this rule set didn't consume.
656	* @param upperBound Limits the rules that can be allowed to match.
657	* Only rules whose base values are strictly less than upperBound
658	* are considered.
659	* @return The numerical result of parsing this string. This will
660	* be the matching rule's base value, composed appropriately with
661	* the results of matching any of its substitutions. The object
662	* will be an instance of Long if it's an integral value; otherwise,
663	* it will be an instance of Double. This function always returns
664	* a valid object: If nothing matched the input string at all,
665	* this function returns new Long(0), and the parse position is
666	* left unchanged.
667	*/
668	#ifdef RBNF_DEBUG
669	#include <stdio.h>
670
671	static void dumpUS(FILE* f, const UnicodeString& us) {
672	int len = us.length();
673	char* buf = (char )uprv_malloc((len+`1`)sizeof(char)); //new char[len+1];
674	if (buf != NULL) {
675	us.extract(`0`, len, buf);
676	buf[len] = `0`;
677	fprintf(f, "%s", buf);
678	uprv_free(buf); //delete[] buf;
679	}
680	}
681	#endif
682
683	UBool
684	NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const
685	{
686	// try matching each rule in the rule set against the text being
687	// parsed. Whichever one matches the most characters is the one
688	// that determines the value we return.
689
690	result.setLong(`0`);
691
692	// dump out if there's no text to parse
693	if (text.length() == `0`) {
694	return `0`;
695	}
696
697	ParsePosition highWaterMark;
698	ParsePosition workingPos = pos;
699
700	#ifdef RBNF_DEBUG
701	fprintf(stderr, "<nfrs> %x '", this);
702	dumpUS(stderr, name);
703	fprintf(stderr, "' text '");
704	dumpUS(stderr, text);
705	fprintf(stderr, "'\n");
706	fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != `0`);
707	#endif
708	// Try each of the negative rules, fraction rules, infinity rules and NaN rules
709	for (int i = `0`; i < NON_NUMERICAL_RULE_LENGTH; i++) {
710	if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & `1`) == `0`) {
711	// Mark this rule as being executed so that we don't try to execute it again.
712	nonNumericalExecutedRuleMask \|= `1` << i;
713
714	Formattable tempResult;
715	UBool success = nonNumericalRules[i]->doParse(text, workingPos, `0`, upperBound, nonNumericalExecutedRuleMask, tempResult);
716	if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
717	result = tempResult;
718	highWaterMark = workingPos;
719	}
720	workingPos = pos;
721	}
722	}
723	#ifdef RBNF_DEBUG
724	fprintf(stderr, "<nfrs> continue other with text '");
725	dumpUS(stderr, text);
726	fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
727	#endif
728
729	// finally, go through the regular rules one at a time. We start
730	// at the end of the list because we want to try matching the most
731	// sigificant rule first (this helps ensure that we parse
732	// "five thousand three hundred six" as
733	// "(five thousand) (three hundred) (six)" rather than
734	// "((five thousand three) hundred) (six)"). Skip rules whose
735	// base values are higher than the upper bound (again, this helps
736	// limit ambiguity by making sure the rules that match a rule's
737	// are less significant than the rule containing the substitutions)/
738	{
739	int64_t ub = util64_fromDouble(upperBound);
740	#ifdef RBNF_DEBUG
741	{
742	char ubstr[`64`];
743	util64_toa(ub, ubstr, `64`);
744	char ubstrhex[`64`];
745	util64_toa(ub, ubstrhex, `64`, `16`);
746	fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
747	}
748	#endif
749	for (int32_t i = rules.size(); --i >= `0` && highWaterMark.getIndex() < text.length();) {
750	if ((!fIsFractionRuleSet) && (rules [i]->getBaseValue() >= ub)) {
751	continue;
752	}
753	Formattable tempResult;
754	UBool success = rules [i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
755	if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
756	result = tempResult;
757	highWaterMark = workingPos;
758	}
759	workingPos = pos;
760	}
761	}
762	#ifdef RBNF_DEBUG
763	fprintf(stderr, "<nfrs> exit\n");
764	#endif
765	// finally, update the parse postion we were passed to point to the
766	// first character we didn't use, and return the result that
767	// corresponds to that string of characters
768	pos = highWaterMark;
769
770	return `1`;
771	}
772
773	void
774	NFRuleSet::appendRules(UnicodeString& result) const
775	{
776	uint32_t i;
777
778	// the rule set name goes first...
779	result.append(name);
780	result.append(gColon);
781	result.append(gLineFeed);
782
783	// followed by the regular rules...
784	for (i = `0`; i < rules.size(); i++) {
785	rules [i]->_appendRuleText(result);
786	result.append(gLineFeed);
787	}
788
789	// followed by the special rules (if they exist)
790	for (i = `0`; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
791	NFRule *rule = nonNumericalRules[i];
792	if (nonNumericalRules[i]) {
793	if (rule->getBaseValue() == NFRule::kImproperFractionRule
794	\|\| rule->getBaseValue() == NFRule::kProperFractionRule
795	\|\| rule->getBaseValue() == NFRule::kMasterRule)
796	{
797	for (uint32_t fIdx = `0`; fIdx < fractionRules.size(); fIdx++) {
798	NFRule *fractionRule = fractionRules [fIdx];
799	if (fractionRule->getBaseValue() == rule->getBaseValue()) {
800	fractionRule->_appendRuleText(result);
801	result.append(gLineFeed);
802	}
803	}
804	}
805	else {
806	rule->_appendRuleText(result);
807	result.append(gLineFeed);
808	}
809	}
810	}
811	}
812
813	// utility functions
814
815	int64_t util64_fromDouble(double d) {
816	int64_t result = `0`;
817	if (!uprv_isNaN(d)) {
818	double mant = uprv_maxMantissa();
819	if (d < -mant) {
820	d = -mant;
821	} else if (d > mant) {
822	d = mant;
823	}
824	UBool neg = d < `0`;
825	if (neg) {
826	d = -d;
827	}
828	result = (int64_t)uprv_floor(d);
829	if (neg) {
830	result = -result;
831	}
832	}
833	return result;
834	}
835
836	uint64_t util64_pow(uint32_t base, uint16_t exponent) {
837	if (base == `0`) {
838	return `0`;
839	}
840	uint64_t result = `1`;
841	uint64_t pow = base;
842	while (true) {
843	if ((exponent & `1`) == `1`) {
844	result *= pow;
845	}
846	exponent >>= `1`;
847	if (exponent == `0`) {
848	break;
849	}
850	pow *= pow;
851	}
852	return result;
853	}
854
855	static const uint8_t asciiDigits[] = {
856	`0x30u`, `0x31u`, `0x32u`, `0x33u`, `0x34u`, `0x35u`, `0x36u`, `0x37u`,
857	`0x38u`, `0x39u`, `0x61u`, `0x62u`, `0x63u`, `0x64u`, `0x65u`, `0x66u`,
858	`0x67u`, `0x68u`, `0x69u`, `0x6au`, `0x6bu`, `0x6cu`, `0x6du`, `0x6eu`,
859	`0x6fu`, `0x70u`, `0x71u`, `0x72u`, `0x73u`, `0x74u`, `0x75u`, `0x76u`,
860	`0x77u`, `0x78u`, `0x79u`, `0x7au`,
861	};
862
863	static const UChar kUMinus = (UChar)`0x002d`;
864
865	#ifdef RBNF_DEBUG
866	static const char kMinus = `'-'`;
867
868	static const uint8_t digitInfo[] = {
869	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
870	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
871	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
872	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
873	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
874	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
875	`0x80u`, `0x81u`, `0x82u`, `0x83u`, `0x84u`, `0x85u`, `0x86u`, `0x87u`,
876	`0x88u`, `0x89u`, `0`, `0`, `0`, `0`, `0`, `0`,
877	`0`, `0x8au`, `0x8bu`, `0x8cu`, `0x8du`, `0x8eu`, `0x8fu`, `0x90u`,
878	`0x91u`, `0x92u`, `0x93u`, `0x94u`, `0x95u`, `0x96u`, `0x97u`, `0x98u`,
879	`0x99u`, `0x9au`, `0x9bu`, `0x9cu`, `0x9du`, `0x9eu`, `0x9fu`, `0xa0u`,
880	`0xa1u`, `0xa2u`, `0xa3u`, `0`, `0`, `0`, `0`, `0`,
881	`0`, `0x8au`, `0x8bu`, `0x8cu`, `0x8du`, `0x8eu`, `0x8fu`, `0x90u`,
882	`0x91u`, `0x92u`, `0x93u`, `0x94u`, `0x95u`, `0x96u`, `0x97u`, `0x98u`,
883	`0x99u`, `0x9au`, `0x9bu`, `0x9cu`, `0x9du`, `0x9eu`, `0x9fu`, `0xa0u`,
884	`0xa1u`, `0xa2u`, `0xa3u`, `0`, `0`, `0`, `0`, `0`,
885	};
886
887	int64_t util64_atoi(const char* str, uint32_t radix)
888	{
889	if (radix > `36`) {
890	radix = `36`;
891	} else if (radix < `2`) {
892	radix = `2`;
893	}
894	int64_t lradix = radix;
895
896	int neg = `0`;
897	if (*str == kMinus) {
898	++str;
899	neg = `1`;
900	}
901	int64_t result = `0`;
902	uint8_t b;
903	while ((b = digitInfo[*str++]) && ((b &= `0x7f`) < radix)) {
904	result *= lradix;
905	result += (int32_t)b;
906	}
907	if (neg) {
908	result = -result;
909	}
910	return result;
911	}
912
913	int64_t util64_utoi(const UChar* str, uint32_t radix)
914	{
915	if (radix > `36`) {
916	radix = `36`;
917	} else if (radix < `2`) {
918	radix = `2`;
919	}
920	int64_t lradix = radix;
921
922	int neg = `0`;
923	if (*str == kUMinus) {
924	++str;
925	neg = `1`;
926	}
927	int64_t result = `0`;
928	UChar c;
929	uint8_t b;
930	while (((c = *str++) < `0x0080`) && (b = digitInfo[c]) && ((b &= `0x7f`) < radix)) {
931	result *= lradix;
932	result += (int32_t)b;
933	}
934	if (neg) {
935	result = -result;
936	}
937	return result;
938	}
939
940	uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
941	{
942	if (radix > `36`) {
943	radix = `36`;
944	} else if (radix < `2`) {
945	radix = `2`;
946	}
947	int64_t base = radix;
948
949	char* p = buf;
950	if (len && (w < `0`) && (radix == `10`) && !raw) {
951	w = -w;
952	*p++ = kMinus;
953	--len;
954	} else if (len && (w == `0`)) {
955	p++ = (char*)raw ? `0` : asciiDigits[`0`];
956	--len;
957	}
958
959	while (len && w != `0`) {
960	int64_t n = w / base;
961	int64_t m = n * base;
962	int32_t d = (int32_t)(w-m);
963	p++ = raw ? (char*)d : asciiDigits[d];
964	w = n;
965	--len;
966	}
967	if (len) {
968	p = `0`; // null terminate if room for caller convenience*
969	}
970
971	len = p - buf;
972	if (*buf == kMinus) {
973	++buf;
974	}
975	while (--p > buf) {
976	char c = *p;
977	p = buf;
978	*buf = c;
979	++buf;
980	}
981
982	return len;
983	}
984	#endif
985
986	uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
987	{
988	if (radix > `36`) {
989	radix = `36`;
990	} else if (radix < `2`) {
991	radix = `2`;
992	}
993	int64_t base = radix;
994
995	UChar* p = buf;
996	if (len && (w < `0`) && (radix == `10`) && !raw) {
997	w = -w;
998	*p++ = kUMinus;
999	--len;
1000	} else if (len && (w == `0`)) {
1001	*p++ = (UChar)raw ? `0` : asciiDigits[`0`];
1002	--len;
1003	}
1004
1005	while (len && (w != `0`)) {
1006	int64_t n = w / base;
1007	int64_t m = n * base;
1008	int32_t d = (int32_t)(w-m);
1009	*p++ = (UChar)(raw ? d : asciiDigits[d]);
1010	w = n;
1011	--len;
1012	}
1013	if (len) {
1014	p = `0`; // null terminate if room for caller convenience*
1015	}
1016
1017	len = (uint32_t)(p - buf);
1018	if (*buf == kUMinus) {
1019	++buf;
1020	}
1021	while (--p > buf) {
1022	UChar c = *p;
1023	p = buf;
1024	*buf = c;
1025	++buf;
1026	}
1027
1028	return len;
1029	}
1030
1031
1032	U_NAMESPACE_END
1033
1034	/ U_HAVE_RBNF /
1035	#endif
1036

Browse the source code of ClickHouse/contrib/icu/icu4c/source/i18n/nfrs.cpp