1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ****************************************************************************** |
5 | * Copyright (C) 1997-2015, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ****************************************************************************** |
8 | * file name: nfrs.cpp |
9 | * encoding: UTF-8 |
10 | * tab size: 8 (not used) |
11 | * indentation:4 |
12 | * |
13 | * Modification history |
14 | * Date Name Comments |
15 | * 10/11/2001 Doug Ported from ICU4J |
16 | */ |
17 | |
18 | #include "nfrs.h" |
19 | |
20 | #if U_HAVE_RBNF |
21 | |
22 | #include "unicode/uchar.h" |
23 | #include "nfrule.h" |
24 | #include "nfrlist.h" |
25 | #include "patternprops.h" |
26 | #include "putilimp.h" |
27 | |
28 | #ifdef RBNF_DEBUG |
29 | #include "cmemory.h" |
30 | #endif |
31 | |
32 | enum { |
33 | /** -x */ |
34 | NEGATIVE_RULE_INDEX = 0, |
35 | /** x.x */ |
36 | IMPROPER_FRACTION_RULE_INDEX = 1, |
37 | /** 0.x */ |
38 | PROPER_FRACTION_RULE_INDEX = 2, |
39 | /** x.0 */ |
40 | MASTER_RULE_INDEX = 3, |
41 | /** Inf */ |
42 | INFINITY_RULE_INDEX = 4, |
43 | /** NaN */ |
44 | NAN_RULE_INDEX = 5, |
45 | NON_NUMERICAL_RULE_LENGTH = 6 |
46 | }; |
47 | |
48 | U_NAMESPACE_BEGIN |
49 | |
50 | #if 0 |
51 | // euclid's algorithm works with doubles |
52 | // note, doubles only get us up to one quadrillion or so, which |
53 | // isn't as much range as we get with longs. We probably still |
54 | // want either 64-bit math, or BigInteger. |
55 | |
56 | static int64_t |
57 | util_lcm(int64_t x, int64_t y) |
58 | { |
59 | x.abs(); |
60 | y.abs(); |
61 | |
62 | if (x == 0 || y == 0) { |
63 | return 0; |
64 | } else { |
65 | do { |
66 | if (x < y) { |
67 | int64_t t = x; x = y; y = t; |
68 | } |
69 | x -= y * (x/y); |
70 | } while (x != 0); |
71 | |
72 | return y; |
73 | } |
74 | } |
75 | |
76 | #else |
77 | /** |
78 | * Calculates the least common multiple of x and y. |
79 | */ |
80 | static int64_t |
81 | util_lcm(int64_t x, int64_t y) |
82 | { |
83 | // binary gcd algorithm from Knuth, "The Art of Computer Programming," |
84 | // vol. 2, 1st ed., pp. 298-299 |
85 | int64_t x1 = x; |
86 | int64_t y1 = y; |
87 | |
88 | int p2 = 0; |
89 | while ((x1 & 1) == 0 && (y1 & 1) == 0) { |
90 | ++p2; |
91 | x1 >>= 1; |
92 | y1 >>= 1; |
93 | } |
94 | |
95 | int64_t t; |
96 | if ((x1 & 1) == 1) { |
97 | t = -y1; |
98 | } else { |
99 | t = x1; |
100 | } |
101 | |
102 | while (t != 0) { |
103 | while ((t & 1) == 0) { |
104 | t = t >> 1; |
105 | } |
106 | if (t > 0) { |
107 | x1 = t; |
108 | } else { |
109 | y1 = -t; |
110 | } |
111 | t = x1 - y1; |
112 | } |
113 | |
114 | int64_t gcd = x1 << p2; |
115 | |
116 | // x * y == gcd(x, y) * lcm(x, y) |
117 | return x / gcd * y; |
118 | } |
119 | #endif |
120 | |
121 | static const UChar gPercent = 0x0025; |
122 | static const UChar gColon = 0x003a; |
123 | static const UChar gSemicolon = 0x003b; |
124 | static const UChar gLineFeed = 0x000a; |
125 | |
126 | static const UChar gPercentPercent[] = |
127 | { |
128 | 0x25, 0x25, 0 |
129 | }; /* "%%" */ |
130 | |
131 | static const UChar gNoparse[] = |
132 | { |
133 | 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 |
134 | }; /* "@noparse" */ |
135 | |
136 | NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status) |
137 | : name() |
138 | , rules(0) |
139 | , owner(_owner) |
140 | , fractionRules() |
141 | , fIsFractionRuleSet(FALSE) |
142 | , fIsPublic(FALSE) |
143 | , fIsParseable(TRUE) |
144 | { |
145 | for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { |
146 | nonNumericalRules[i] = NULL; |
147 | } |
148 | |
149 | if (U_FAILURE(status)) { |
150 | return; |
151 | } |
152 | |
153 | UnicodeString& description = descriptions[index]; // !!! make sure index is valid |
154 | |
155 | if (description.length() == 0) { |
156 | // throw new IllegalArgumentException("Empty rule set description"); |
157 | status = U_PARSE_ERROR; |
158 | return; |
159 | } |
160 | |
161 | // if the description begins with a rule set name (the rule set |
162 | // name can be omitted in formatter descriptions that consist |
163 | // of only one rule set), copy it out into our "name" member |
164 | // and delete it from the description |
165 | if (description.charAt(0) == gPercent) { |
166 | int32_t pos = description.indexOf(gColon); |
167 | if (pos == -1) { |
168 | // throw new IllegalArgumentException("Rule set name doesn't end in colon"); |
169 | status = U_PARSE_ERROR; |
170 | } else { |
171 | name.setTo(description, 0, pos); |
172 | while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { |
173 | } |
174 | description.remove(0, pos); |
175 | } |
176 | } else { |
177 | name.setTo(UNICODE_STRING_SIMPLE("%default" )); |
178 | } |
179 | |
180 | if (description.length() == 0) { |
181 | // throw new IllegalArgumentException("Empty rule set description"); |
182 | status = U_PARSE_ERROR; |
183 | } |
184 | |
185 | fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; |
186 | |
187 | if ( name.endsWith(gNoparse,8) ) { |
188 | fIsParseable = FALSE; |
189 | name.truncate(name.length()-8); // remove the @noparse from the name |
190 | } |
191 | |
192 | // all of the other members of NFRuleSet are initialized |
193 | // by parseRules() |
194 | } |
195 | |
196 | void |
197 | NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status) |
198 | { |
199 | // start by creating a Vector whose elements are Strings containing |
200 | // the descriptions of the rules (one rule per element). The rules |
201 | // are separated by semicolons (there's no escape facility: ALL |
202 | // semicolons are rule delimiters) |
203 | |
204 | if (U_FAILURE(status)) { |
205 | return; |
206 | } |
207 | |
208 | // ensure we are starting with an empty rule list |
209 | rules.deleteAll(); |
210 | |
211 | // dlf - the original code kept a separate description array for no reason, |
212 | // so I got rid of it. The loop was too complex so I simplified it. |
213 | |
214 | UnicodeString currentDescription; |
215 | int32_t oldP = 0; |
216 | while (oldP < description.length()) { |
217 | int32_t p = description.indexOf(gSemicolon, oldP); |
218 | if (p == -1) { |
219 | p = description.length(); |
220 | } |
221 | currentDescription.setTo(description, oldP, p - oldP); |
222 | NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); |
223 | oldP = p + 1; |
224 | } |
225 | |
226 | // for rules that didn't specify a base value, their base values |
227 | // were initialized to 0. Make another pass through the list and |
228 | // set all those rules' base values. We also remove any special |
229 | // rules from the list and put them into their own member variables |
230 | int64_t defaultBaseValue = 0; |
231 | |
232 | // (this isn't a for loop because we might be deleting items from |
233 | // the vector-- we want to make sure we only increment i when |
234 | // we _didn't_ delete aything from the vector) |
235 | int32_t rulesSize = rules.size(); |
236 | for (int32_t i = 0; i < rulesSize; i++) { |
237 | NFRule* rule = rules[i]; |
238 | int64_t baseValue = rule->getBaseValue(); |
239 | |
240 | if (baseValue == 0) { |
241 | // if the rule's base value is 0, fill in a default |
242 | // base value (this will be 1 plus the preceding |
243 | // rule's base value for regular rule sets, and the |
244 | // same as the preceding rule's base value in fraction |
245 | // rule sets) |
246 | rule->setBaseValue(defaultBaseValue, status); |
247 | } |
248 | else { |
249 | // if it's a regular rule that already knows its base value, |
250 | // check to make sure the rules are in order, and update |
251 | // the default base value for the next rule |
252 | if (baseValue < defaultBaseValue) { |
253 | // throw new IllegalArgumentException("Rules are not in order"); |
254 | status = U_PARSE_ERROR; |
255 | return; |
256 | } |
257 | defaultBaseValue = baseValue; |
258 | } |
259 | if (!fIsFractionRuleSet) { |
260 | ++defaultBaseValue; |
261 | } |
262 | } |
263 | } |
264 | |
265 | /** |
266 | * Set one of the non-numerical rules. |
267 | * @param rule The rule to set. |
268 | */ |
269 | void NFRuleSet::setNonNumericalRule(NFRule *rule) { |
270 | int64_t baseValue = rule->getBaseValue(); |
271 | if (baseValue == NFRule::kNegativeNumberRule) { |
272 | delete nonNumericalRules[NEGATIVE_RULE_INDEX]; |
273 | nonNumericalRules[NEGATIVE_RULE_INDEX] = rule; |
274 | } |
275 | else if (baseValue == NFRule::kImproperFractionRule) { |
276 | setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE); |
277 | } |
278 | else if (baseValue == NFRule::kProperFractionRule) { |
279 | setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE); |
280 | } |
281 | else if (baseValue == NFRule::kMasterRule) { |
282 | setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE); |
283 | } |
284 | else if (baseValue == NFRule::kInfinityRule) { |
285 | delete nonNumericalRules[INFINITY_RULE_INDEX]; |
286 | nonNumericalRules[INFINITY_RULE_INDEX] = rule; |
287 | } |
288 | else if (baseValue == NFRule::kNaNRule) { |
289 | delete nonNumericalRules[NAN_RULE_INDEX]; |
290 | nonNumericalRules[NAN_RULE_INDEX] = rule; |
291 | } |
292 | } |
293 | |
294 | /** |
295 | * Determine the best fraction rule to use. Rules matching the decimal point from |
296 | * DecimalFormatSymbols become the main set of rules to use. |
297 | * @param originalIndex The index into nonNumericalRules |
298 | * @param newRule The new rule to consider |
299 | * @param rememberRule Should the new rule be added to fractionRules. |
300 | */ |
301 | void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) { |
302 | if (rememberRule) { |
303 | fractionRules.add(newRule); |
304 | } |
305 | NFRule *bestResult = nonNumericalRules[originalIndex]; |
306 | if (bestResult == NULL) { |
307 | nonNumericalRules[originalIndex] = newRule; |
308 | } |
309 | else { |
310 | // We have more than one. Which one is better? |
311 | const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols(); |
312 | if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0) |
313 | == newRule->getDecimalPoint()) |
314 | { |
315 | nonNumericalRules[originalIndex] = newRule; |
316 | } |
317 | // else leave it alone |
318 | } |
319 | } |
320 | |
321 | NFRuleSet::~NFRuleSet() |
322 | { |
323 | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { |
324 | if (i != IMPROPER_FRACTION_RULE_INDEX |
325 | && i != PROPER_FRACTION_RULE_INDEX |
326 | && i != MASTER_RULE_INDEX) |
327 | { |
328 | delete nonNumericalRules[i]; |
329 | } |
330 | // else it will be deleted via NFRuleList fractionRules |
331 | } |
332 | } |
333 | |
334 | static UBool |
335 | util_equalRules(const NFRule* rule1, const NFRule* rule2) |
336 | { |
337 | if (rule1) { |
338 | if (rule2) { |
339 | return *rule1 == *rule2; |
340 | } |
341 | } else if (!rule2) { |
342 | return TRUE; |
343 | } |
344 | return FALSE; |
345 | } |
346 | |
347 | UBool |
348 | NFRuleSet::operator==(const NFRuleSet& rhs) const |
349 | { |
350 | if (rules.size() == rhs.rules.size() && |
351 | fIsFractionRuleSet == rhs.fIsFractionRuleSet && |
352 | name == rhs.name) { |
353 | |
354 | // ...then compare the non-numerical rule lists... |
355 | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { |
356 | if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) { |
357 | return FALSE; |
358 | } |
359 | } |
360 | |
361 | // ...then compare the rule lists... |
362 | for (uint32_t i = 0; i < rules.size(); ++i) { |
363 | if (*rules[i] != *rhs.rules[i]) { |
364 | return FALSE; |
365 | } |
366 | } |
367 | return TRUE; |
368 | } |
369 | return FALSE; |
370 | } |
371 | |
372 | void |
373 | NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) { |
374 | for (uint32_t i = 0; i < rules.size(); ++i) { |
375 | rules[i]->setDecimalFormatSymbols(newSymbols, status); |
376 | } |
377 | // Switch the fraction rules to mirror the DecimalFormatSymbols. |
378 | for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) { |
379 | if (nonNumericalRules[nonNumericalIdx]) { |
380 | for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { |
381 | NFRule *fractionRule = fractionRules[fIdx]; |
382 | if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) { |
383 | setBestFractionRule(nonNumericalIdx, fractionRule, FALSE); |
384 | } |
385 | } |
386 | } |
387 | } |
388 | |
389 | for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) { |
390 | NFRule *rule = nonNumericalRules[nnrIdx]; |
391 | if (rule) { |
392 | rule->setDecimalFormatSymbols(newSymbols, status); |
393 | } |
394 | } |
395 | } |
396 | |
397 | #define RECURSION_LIMIT 64 |
398 | |
399 | void |
400 | NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const |
401 | { |
402 | if (recursionCount >= RECURSION_LIMIT) { |
403 | // stop recursion |
404 | status = U_INVALID_STATE_ERROR; |
405 | return; |
406 | } |
407 | const NFRule *rule = findNormalRule(number); |
408 | if (rule) { // else error, but can't report it |
409 | rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); |
410 | } |
411 | } |
412 | |
413 | void |
414 | NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const |
415 | { |
416 | if (recursionCount >= RECURSION_LIMIT) { |
417 | // stop recursion |
418 | status = U_INVALID_STATE_ERROR; |
419 | return; |
420 | } |
421 | const NFRule *rule = findDoubleRule(number); |
422 | if (rule) { // else error, but can't report it |
423 | rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); |
424 | } |
425 | } |
426 | |
427 | const NFRule* |
428 | NFRuleSet::findDoubleRule(double number) const |
429 | { |
430 | // if this is a fraction rule set, use findFractionRuleSetRule() |
431 | if (isFractionRuleSet()) { |
432 | return findFractionRuleSetRule(number); |
433 | } |
434 | |
435 | if (uprv_isNaN(number)) { |
436 | const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX]; |
437 | if (!rule) { |
438 | rule = owner->getDefaultNaNRule(); |
439 | } |
440 | return rule; |
441 | } |
442 | |
443 | // if the number is negative, return the negative number rule |
444 | // (if there isn't a negative-number rule, we pretend it's a |
445 | // positive number) |
446 | if (number < 0) { |
447 | if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { |
448 | return nonNumericalRules[NEGATIVE_RULE_INDEX]; |
449 | } else { |
450 | number = -number; |
451 | } |
452 | } |
453 | |
454 | if (uprv_isInfinite(number)) { |
455 | const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX]; |
456 | if (!rule) { |
457 | rule = owner->getDefaultInfinityRule(); |
458 | } |
459 | return rule; |
460 | } |
461 | |
462 | // if the number isn't an integer, we use one of the fraction rules... |
463 | if (number != uprv_floor(number)) { |
464 | // if the number is between 0 and 1, return the proper |
465 | // fraction rule |
466 | if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) { |
467 | return nonNumericalRules[PROPER_FRACTION_RULE_INDEX]; |
468 | } |
469 | // otherwise, return the improper fraction rule |
470 | else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) { |
471 | return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]; |
472 | } |
473 | } |
474 | |
475 | // if there's a master rule, use it to format the number |
476 | if (nonNumericalRules[MASTER_RULE_INDEX]) { |
477 | return nonNumericalRules[MASTER_RULE_INDEX]; |
478 | } |
479 | |
480 | // and if we haven't yet returned a rule, use findNormalRule() |
481 | // to find the applicable rule |
482 | int64_t r = util64_fromDouble(number + 0.5); |
483 | return findNormalRule(r); |
484 | } |
485 | |
486 | const NFRule * |
487 | NFRuleSet::findNormalRule(int64_t number) const |
488 | { |
489 | // if this is a fraction rule set, use findFractionRuleSetRule() |
490 | // to find the rule (we should only go into this clause if the |
491 | // value is 0) |
492 | if (fIsFractionRuleSet) { |
493 | return findFractionRuleSetRule((double)number); |
494 | } |
495 | |
496 | // if the number is negative, return the negative-number rule |
497 | // (if there isn't one, pretend the number is positive) |
498 | if (number < 0) { |
499 | if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { |
500 | return nonNumericalRules[NEGATIVE_RULE_INDEX]; |
501 | } else { |
502 | number = -number; |
503 | } |
504 | } |
505 | |
506 | // we have to repeat the preceding two checks, even though we |
507 | // do them in findRule(), because the version of format() that |
508 | // takes a long bypasses findRule() and goes straight to this |
509 | // function. This function does skip the fraction rules since |
510 | // we know the value is an integer (it also skips the master |
511 | // rule, since it's considered a fraction rule. Skipping the |
512 | // master rule in this function is also how we avoid infinite |
513 | // recursion) |
514 | |
515 | // {dlf} unfortunately this fails if there are no rules except |
516 | // special rules. If there are no rules, use the master rule. |
517 | |
518 | // binary-search the rule list for the applicable rule |
519 | // (a rule is used for all values from its base value to |
520 | // the next rule's base value) |
521 | int32_t hi = rules.size(); |
522 | if (hi > 0) { |
523 | int32_t lo = 0; |
524 | |
525 | while (lo < hi) { |
526 | int32_t mid = (lo + hi) / 2; |
527 | if (rules[mid]->getBaseValue() == number) { |
528 | return rules[mid]; |
529 | } |
530 | else if (rules[mid]->getBaseValue() > number) { |
531 | hi = mid; |
532 | } |
533 | else { |
534 | lo = mid + 1; |
535 | } |
536 | } |
537 | if (hi == 0) { // bad rule set, minimum base > 0 |
538 | return NULL; // want to throw exception here |
539 | } |
540 | |
541 | NFRule *result = rules[hi - 1]; |
542 | |
543 | // use shouldRollBack() to see whether we need to invoke the |
544 | // rollback rule (see shouldRollBack()'s documentation for |
545 | // an explanation of the rollback rule). If we do, roll back |
546 | // one rule and return that one instead of the one we'd normally |
547 | // return |
548 | if (result->shouldRollBack(number)) { |
549 | if (hi == 1) { // bad rule set, no prior rule to rollback to from this base |
550 | return NULL; |
551 | } |
552 | result = rules[hi - 2]; |
553 | } |
554 | return result; |
555 | } |
556 | // else use the master rule |
557 | return nonNumericalRules[MASTER_RULE_INDEX]; |
558 | } |
559 | |
560 | /** |
561 | * If this rule is a fraction rule set, this function is used by |
562 | * findRule() to select the most appropriate rule for formatting |
563 | * the number. Basically, the base value of each rule in the rule |
564 | * set is treated as the denominator of a fraction. Whichever |
565 | * denominator can produce the fraction closest in value to the |
566 | * number passed in is the result. If there's a tie, the earlier |
567 | * one in the list wins. (If there are two rules in a row with the |
568 | * same base value, the first one is used when the numerator of the |
569 | * fraction would be 1, and the second rule is used the rest of the |
570 | * time. |
571 | * @param number The number being formatted (which will always be |
572 | * a number between 0 and 1) |
573 | * @return The rule to use to format this number |
574 | */ |
575 | const NFRule* |
576 | NFRuleSet::findFractionRuleSetRule(double number) const |
577 | { |
578 | // the obvious way to do this (multiply the value being formatted |
579 | // by each rule's base value until you get an integral result) |
580 | // doesn't work because of rounding error. This method is more |
581 | // accurate |
582 | |
583 | // find the least common multiple of the rules' base values |
584 | // and multiply this by the number being formatted. This is |
585 | // all the precision we need, and we can do all of the rest |
586 | // of the math using integer arithmetic |
587 | int64_t leastCommonMultiple = rules[0]->getBaseValue(); |
588 | int64_t numerator; |
589 | { |
590 | for (uint32_t i = 1; i < rules.size(); ++i) { |
591 | leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); |
592 | } |
593 | numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5); |
594 | } |
595 | // for each rule, do the following... |
596 | int64_t tempDifference; |
597 | int64_t difference = util64_fromDouble(uprv_maxMantissa()); |
598 | int32_t winner = 0; |
599 | for (uint32_t i = 0; i < rules.size(); ++i) { |
600 | // "numerator" is the numerator of the fraction if the |
601 | // denominator is the LCD. The numerator if the rule's |
602 | // base value is the denominator is "numerator" times the |
603 | // base value divided bythe LCD. Here we check to see if |
604 | // that's an integer, and if not, how close it is to being |
605 | // an integer. |
606 | tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; |
607 | |
608 | |
609 | // normalize the result of the above calculation: we want |
610 | // the numerator's distance from the CLOSEST multiple |
611 | // of the LCD |
612 | if (leastCommonMultiple - tempDifference < tempDifference) { |
613 | tempDifference = leastCommonMultiple - tempDifference; |
614 | } |
615 | |
616 | // if this is as close as we've come, keep track of how close |
617 | // that is, and the line number of the rule that did it. If |
618 | // we've scored a direct hit, we don't have to look at any more |
619 | // rules |
620 | if (tempDifference < difference) { |
621 | difference = tempDifference; |
622 | winner = i; |
623 | if (difference == 0) { |
624 | break; |
625 | } |
626 | } |
627 | } |
628 | |
629 | // if we have two successive rules that both have the winning base |
630 | // value, then the first one (the one we found above) is used if |
631 | // the numerator of the fraction is 1 and the second one is used if |
632 | // the numerator of the fraction is anything else (this lets us |
633 | // do things like "one third"/"two thirds" without haveing to define |
634 | // a whole bunch of extra rule sets) |
635 | if ((unsigned)(winner + 1) < rules.size() && |
636 | rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { |
637 | double n = ((double)rules[winner]->getBaseValue()) * number; |
638 | if (n < 0.5 || n >= 2) { |
639 | ++winner; |
640 | } |
641 | } |
642 | |
643 | // finally, return the winning rule |
644 | return rules[winner]; |
645 | } |
646 | |
647 | /** |
648 | * Parses a string. Matches the string to be parsed against each |
649 | * of its rules (with a base value less than upperBound) and returns |
650 | * the value produced by the rule that matched the most charcters |
651 | * in the source string. |
652 | * @param text The string to parse |
653 | * @param parsePosition The initial position is ignored and assumed |
654 | * to be 0. On exit, this object has been updated to point to the |
655 | * first character position this rule set didn't consume. |
656 | * @param upperBound Limits the rules that can be allowed to match. |
657 | * Only rules whose base values are strictly less than upperBound |
658 | * are considered. |
659 | * @return The numerical result of parsing this string. This will |
660 | * be the matching rule's base value, composed appropriately with |
661 | * the results of matching any of its substitutions. The object |
662 | * will be an instance of Long if it's an integral value; otherwise, |
663 | * it will be an instance of Double. This function always returns |
664 | * a valid object: If nothing matched the input string at all, |
665 | * this function returns new Long(0), and the parse position is |
666 | * left unchanged. |
667 | */ |
668 | #ifdef RBNF_DEBUG |
669 | #include <stdio.h> |
670 | |
671 | static void dumpUS(FILE* f, const UnicodeString& us) { |
672 | int len = us.length(); |
673 | char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; |
674 | if (buf != NULL) { |
675 | us.extract(0, len, buf); |
676 | buf[len] = 0; |
677 | fprintf(f, "%s" , buf); |
678 | uprv_free(buf); //delete[] buf; |
679 | } |
680 | } |
681 | #endif |
682 | |
683 | UBool |
684 | NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const |
685 | { |
686 | // try matching each rule in the rule set against the text being |
687 | // parsed. Whichever one matches the most characters is the one |
688 | // that determines the value we return. |
689 | |
690 | result.setLong(0); |
691 | |
692 | // dump out if there's no text to parse |
693 | if (text.length() == 0) { |
694 | return 0; |
695 | } |
696 | |
697 | ParsePosition highWaterMark; |
698 | ParsePosition workingPos = pos; |
699 | |
700 | #ifdef RBNF_DEBUG |
701 | fprintf(stderr, "<nfrs> %x '" , this); |
702 | dumpUS(stderr, name); |
703 | fprintf(stderr, "' text '" ); |
704 | dumpUS(stderr, text); |
705 | fprintf(stderr, "'\n" ); |
706 | fprintf(stderr, " parse negative: %d\n" , this, negativeNumberRule != 0); |
707 | #endif |
708 | // Try each of the negative rules, fraction rules, infinity rules and NaN rules |
709 | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { |
710 | if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) { |
711 | // Mark this rule as being executed so that we don't try to execute it again. |
712 | nonNumericalExecutedRuleMask |= 1 << i; |
713 | |
714 | Formattable tempResult; |
715 | UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult); |
716 | if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { |
717 | result = tempResult; |
718 | highWaterMark = workingPos; |
719 | } |
720 | workingPos = pos; |
721 | } |
722 | } |
723 | #ifdef RBNF_DEBUG |
724 | fprintf(stderr, "<nfrs> continue other with text '" ); |
725 | dumpUS(stderr, text); |
726 | fprintf(stderr, "' hwm: %d\n" , highWaterMark.getIndex()); |
727 | #endif |
728 | |
729 | // finally, go through the regular rules one at a time. We start |
730 | // at the end of the list because we want to try matching the most |
731 | // sigificant rule first (this helps ensure that we parse |
732 | // "five thousand three hundred six" as |
733 | // "(five thousand) (three hundred) (six)" rather than |
734 | // "((five thousand three) hundred) (six)"). Skip rules whose |
735 | // base values are higher than the upper bound (again, this helps |
736 | // limit ambiguity by making sure the rules that match a rule's |
737 | // are less significant than the rule containing the substitutions)/ |
738 | { |
739 | int64_t ub = util64_fromDouble(upperBound); |
740 | #ifdef RBNF_DEBUG |
741 | { |
742 | char ubstr[64]; |
743 | util64_toa(ub, ubstr, 64); |
744 | char ubstrhex[64]; |
745 | util64_toa(ub, ubstrhex, 64, 16); |
746 | fprintf(stderr, "ub: %g, i64: %s (%s)\n" , upperBound, ubstr, ubstrhex); |
747 | } |
748 | #endif |
749 | for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { |
750 | if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { |
751 | continue; |
752 | } |
753 | Formattable tempResult; |
754 | UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult); |
755 | if (success && workingPos.getIndex() > highWaterMark.getIndex()) { |
756 | result = tempResult; |
757 | highWaterMark = workingPos; |
758 | } |
759 | workingPos = pos; |
760 | } |
761 | } |
762 | #ifdef RBNF_DEBUG |
763 | fprintf(stderr, "<nfrs> exit\n" ); |
764 | #endif |
765 | // finally, update the parse postion we were passed to point to the |
766 | // first character we didn't use, and return the result that |
767 | // corresponds to that string of characters |
768 | pos = highWaterMark; |
769 | |
770 | return 1; |
771 | } |
772 | |
773 | void |
774 | NFRuleSet::appendRules(UnicodeString& result) const |
775 | { |
776 | uint32_t i; |
777 | |
778 | // the rule set name goes first... |
779 | result.append(name); |
780 | result.append(gColon); |
781 | result.append(gLineFeed); |
782 | |
783 | // followed by the regular rules... |
784 | for (i = 0; i < rules.size(); i++) { |
785 | rules[i]->_appendRuleText(result); |
786 | result.append(gLineFeed); |
787 | } |
788 | |
789 | // followed by the special rules (if they exist) |
790 | for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { |
791 | NFRule *rule = nonNumericalRules[i]; |
792 | if (nonNumericalRules[i]) { |
793 | if (rule->getBaseValue() == NFRule::kImproperFractionRule |
794 | || rule->getBaseValue() == NFRule::kProperFractionRule |
795 | || rule->getBaseValue() == NFRule::kMasterRule) |
796 | { |
797 | for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { |
798 | NFRule *fractionRule = fractionRules[fIdx]; |
799 | if (fractionRule->getBaseValue() == rule->getBaseValue()) { |
800 | fractionRule->_appendRuleText(result); |
801 | result.append(gLineFeed); |
802 | } |
803 | } |
804 | } |
805 | else { |
806 | rule->_appendRuleText(result); |
807 | result.append(gLineFeed); |
808 | } |
809 | } |
810 | } |
811 | } |
812 | |
813 | // utility functions |
814 | |
815 | int64_t util64_fromDouble(double d) { |
816 | int64_t result = 0; |
817 | if (!uprv_isNaN(d)) { |
818 | double mant = uprv_maxMantissa(); |
819 | if (d < -mant) { |
820 | d = -mant; |
821 | } else if (d > mant) { |
822 | d = mant; |
823 | } |
824 | UBool neg = d < 0; |
825 | if (neg) { |
826 | d = -d; |
827 | } |
828 | result = (int64_t)uprv_floor(d); |
829 | if (neg) { |
830 | result = -result; |
831 | } |
832 | } |
833 | return result; |
834 | } |
835 | |
836 | uint64_t util64_pow(uint32_t base, uint16_t exponent) { |
837 | if (base == 0) { |
838 | return 0; |
839 | } |
840 | uint64_t result = 1; |
841 | uint64_t pow = base; |
842 | while (true) { |
843 | if ((exponent & 1) == 1) { |
844 | result *= pow; |
845 | } |
846 | exponent >>= 1; |
847 | if (exponent == 0) { |
848 | break; |
849 | } |
850 | pow *= pow; |
851 | } |
852 | return result; |
853 | } |
854 | |
855 | static const uint8_t asciiDigits[] = { |
856 | 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, |
857 | 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, |
858 | 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, |
859 | 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, |
860 | 0x77u, 0x78u, 0x79u, 0x7au, |
861 | }; |
862 | |
863 | static const UChar kUMinus = (UChar)0x002d; |
864 | |
865 | #ifdef RBNF_DEBUG |
866 | static const char kMinus = '-'; |
867 | |
868 | static const uint8_t digitInfo[] = { |
869 | 0, 0, 0, 0, 0, 0, 0, 0, |
870 | 0, 0, 0, 0, 0, 0, 0, 0, |
871 | 0, 0, 0, 0, 0, 0, 0, 0, |
872 | 0, 0, 0, 0, 0, 0, 0, 0, |
873 | 0, 0, 0, 0, 0, 0, 0, 0, |
874 | 0, 0, 0, 0, 0, 0, 0, 0, |
875 | 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, |
876 | 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, |
877 | 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, |
878 | 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, |
879 | 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, |
880 | 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, |
881 | 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, |
882 | 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, |
883 | 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, |
884 | 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, |
885 | }; |
886 | |
887 | int64_t util64_atoi(const char* str, uint32_t radix) |
888 | { |
889 | if (radix > 36) { |
890 | radix = 36; |
891 | } else if (radix < 2) { |
892 | radix = 2; |
893 | } |
894 | int64_t lradix = radix; |
895 | |
896 | int neg = 0; |
897 | if (*str == kMinus) { |
898 | ++str; |
899 | neg = 1; |
900 | } |
901 | int64_t result = 0; |
902 | uint8_t b; |
903 | while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { |
904 | result *= lradix; |
905 | result += (int32_t)b; |
906 | } |
907 | if (neg) { |
908 | result = -result; |
909 | } |
910 | return result; |
911 | } |
912 | |
913 | int64_t util64_utoi(const UChar* str, uint32_t radix) |
914 | { |
915 | if (radix > 36) { |
916 | radix = 36; |
917 | } else if (radix < 2) { |
918 | radix = 2; |
919 | } |
920 | int64_t lradix = radix; |
921 | |
922 | int neg = 0; |
923 | if (*str == kUMinus) { |
924 | ++str; |
925 | neg = 1; |
926 | } |
927 | int64_t result = 0; |
928 | UChar c; |
929 | uint8_t b; |
930 | while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { |
931 | result *= lradix; |
932 | result += (int32_t)b; |
933 | } |
934 | if (neg) { |
935 | result = -result; |
936 | } |
937 | return result; |
938 | } |
939 | |
940 | uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) |
941 | { |
942 | if (radix > 36) { |
943 | radix = 36; |
944 | } else if (radix < 2) { |
945 | radix = 2; |
946 | } |
947 | int64_t base = radix; |
948 | |
949 | char* p = buf; |
950 | if (len && (w < 0) && (radix == 10) && !raw) { |
951 | w = -w; |
952 | *p++ = kMinus; |
953 | --len; |
954 | } else if (len && (w == 0)) { |
955 | *p++ = (char)raw ? 0 : asciiDigits[0]; |
956 | --len; |
957 | } |
958 | |
959 | while (len && w != 0) { |
960 | int64_t n = w / base; |
961 | int64_t m = n * base; |
962 | int32_t d = (int32_t)(w-m); |
963 | *p++ = raw ? (char)d : asciiDigits[d]; |
964 | w = n; |
965 | --len; |
966 | } |
967 | if (len) { |
968 | *p = 0; // null terminate if room for caller convenience |
969 | } |
970 | |
971 | len = p - buf; |
972 | if (*buf == kMinus) { |
973 | ++buf; |
974 | } |
975 | while (--p > buf) { |
976 | char c = *p; |
977 | *p = *buf; |
978 | *buf = c; |
979 | ++buf; |
980 | } |
981 | |
982 | return len; |
983 | } |
984 | #endif |
985 | |
986 | uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw) |
987 | { |
988 | if (radix > 36) { |
989 | radix = 36; |
990 | } else if (radix < 2) { |
991 | radix = 2; |
992 | } |
993 | int64_t base = radix; |
994 | |
995 | UChar* p = buf; |
996 | if (len && (w < 0) && (radix == 10) && !raw) { |
997 | w = -w; |
998 | *p++ = kUMinus; |
999 | --len; |
1000 | } else if (len && (w == 0)) { |
1001 | *p++ = (UChar)raw ? 0 : asciiDigits[0]; |
1002 | --len; |
1003 | } |
1004 | |
1005 | while (len && (w != 0)) { |
1006 | int64_t n = w / base; |
1007 | int64_t m = n * base; |
1008 | int32_t d = (int32_t)(w-m); |
1009 | *p++ = (UChar)(raw ? d : asciiDigits[d]); |
1010 | w = n; |
1011 | --len; |
1012 | } |
1013 | if (len) { |
1014 | *p = 0; // null terminate if room for caller convenience |
1015 | } |
1016 | |
1017 | len = (uint32_t)(p - buf); |
1018 | if (*buf == kUMinus) { |
1019 | ++buf; |
1020 | } |
1021 | while (--p > buf) { |
1022 | UChar c = *p; |
1023 | *p = *buf; |
1024 | *buf = c; |
1025 | ++buf; |
1026 | } |
1027 | |
1028 | return len; |
1029 | } |
1030 | |
1031 | |
1032 | U_NAMESPACE_END |
1033 | |
1034 | /* U_HAVE_RBNF */ |
1035 | #endif |
1036 | |