| 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ****************************************************************************** |
| 5 | * Copyright (C) 1997-2015, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ****************************************************************************** |
| 8 | * file name: nfrs.cpp |
| 9 | * encoding: UTF-8 |
| 10 | * tab size: 8 (not used) |
| 11 | * indentation:4 |
| 12 | * |
| 13 | * Modification history |
| 14 | * Date Name Comments |
| 15 | * 10/11/2001 Doug Ported from ICU4J |
| 16 | */ |
| 17 | |
| 18 | #include "nfrs.h" |
| 19 | |
| 20 | #if U_HAVE_RBNF |
| 21 | |
| 22 | #include "unicode/uchar.h" |
| 23 | #include "nfrule.h" |
| 24 | #include "nfrlist.h" |
| 25 | #include "patternprops.h" |
| 26 | #include "putilimp.h" |
| 27 | |
| 28 | #ifdef RBNF_DEBUG |
| 29 | #include "cmemory.h" |
| 30 | #endif |
| 31 | |
| 32 | enum { |
| 33 | /** -x */ |
| 34 | NEGATIVE_RULE_INDEX = 0, |
| 35 | /** x.x */ |
| 36 | IMPROPER_FRACTION_RULE_INDEX = 1, |
| 37 | /** 0.x */ |
| 38 | PROPER_FRACTION_RULE_INDEX = 2, |
| 39 | /** x.0 */ |
| 40 | MASTER_RULE_INDEX = 3, |
| 41 | /** Inf */ |
| 42 | INFINITY_RULE_INDEX = 4, |
| 43 | /** NaN */ |
| 44 | NAN_RULE_INDEX = 5, |
| 45 | NON_NUMERICAL_RULE_LENGTH = 6 |
| 46 | }; |
| 47 | |
| 48 | U_NAMESPACE_BEGIN |
| 49 | |
| 50 | #if 0 |
| 51 | // euclid's algorithm works with doubles |
| 52 | // note, doubles only get us up to one quadrillion or so, which |
| 53 | // isn't as much range as we get with longs. We probably still |
| 54 | // want either 64-bit math, or BigInteger. |
| 55 | |
| 56 | static int64_t |
| 57 | util_lcm(int64_t x, int64_t y) |
| 58 | { |
| 59 | x.abs(); |
| 60 | y.abs(); |
| 61 | |
| 62 | if (x == 0 || y == 0) { |
| 63 | return 0; |
| 64 | } else { |
| 65 | do { |
| 66 | if (x < y) { |
| 67 | int64_t t = x; x = y; y = t; |
| 68 | } |
| 69 | x -= y * (x/y); |
| 70 | } while (x != 0); |
| 71 | |
| 72 | return y; |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | #else |
| 77 | /** |
| 78 | * Calculates the least common multiple of x and y. |
| 79 | */ |
| 80 | static int64_t |
| 81 | util_lcm(int64_t x, int64_t y) |
| 82 | { |
| 83 | // binary gcd algorithm from Knuth, "The Art of Computer Programming," |
| 84 | // vol. 2, 1st ed., pp. 298-299 |
| 85 | int64_t x1 = x; |
| 86 | int64_t y1 = y; |
| 87 | |
| 88 | int p2 = 0; |
| 89 | while ((x1 & 1) == 0 && (y1 & 1) == 0) { |
| 90 | ++p2; |
| 91 | x1 >>= 1; |
| 92 | y1 >>= 1; |
| 93 | } |
| 94 | |
| 95 | int64_t t; |
| 96 | if ((x1 & 1) == 1) { |
| 97 | t = -y1; |
| 98 | } else { |
| 99 | t = x1; |
| 100 | } |
| 101 | |
| 102 | while (t != 0) { |
| 103 | while ((t & 1) == 0) { |
| 104 | t = t >> 1; |
| 105 | } |
| 106 | if (t > 0) { |
| 107 | x1 = t; |
| 108 | } else { |
| 109 | y1 = -t; |
| 110 | } |
| 111 | t = x1 - y1; |
| 112 | } |
| 113 | |
| 114 | int64_t gcd = x1 << p2; |
| 115 | |
| 116 | // x * y == gcd(x, y) * lcm(x, y) |
| 117 | return x / gcd * y; |
| 118 | } |
| 119 | #endif |
| 120 | |
| 121 | static const UChar gPercent = 0x0025; |
| 122 | static const UChar gColon = 0x003a; |
| 123 | static const UChar gSemicolon = 0x003b; |
| 124 | static const UChar gLineFeed = 0x000a; |
| 125 | |
| 126 | static const UChar gPercentPercent[] = |
| 127 | { |
| 128 | 0x25, 0x25, 0 |
| 129 | }; /* "%%" */ |
| 130 | |
| 131 | static const UChar gNoparse[] = |
| 132 | { |
| 133 | 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 |
| 134 | }; /* "@noparse" */ |
| 135 | |
| 136 | NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status) |
| 137 | : name() |
| 138 | , rules(0) |
| 139 | , owner(_owner) |
| 140 | , fractionRules() |
| 141 | , fIsFractionRuleSet(FALSE) |
| 142 | , fIsPublic(FALSE) |
| 143 | , fIsParseable(TRUE) |
| 144 | { |
| 145 | for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { |
| 146 | nonNumericalRules[i] = NULL; |
| 147 | } |
| 148 | |
| 149 | if (U_FAILURE(status)) { |
| 150 | return; |
| 151 | } |
| 152 | |
| 153 | UnicodeString& description = descriptions[index]; // !!! make sure index is valid |
| 154 | |
| 155 | if (description.length() == 0) { |
| 156 | // throw new IllegalArgumentException("Empty rule set description"); |
| 157 | status = U_PARSE_ERROR; |
| 158 | return; |
| 159 | } |
| 160 | |
| 161 | // if the description begins with a rule set name (the rule set |
| 162 | // name can be omitted in formatter descriptions that consist |
| 163 | // of only one rule set), copy it out into our "name" member |
| 164 | // and delete it from the description |
| 165 | if (description.charAt(0) == gPercent) { |
| 166 | int32_t pos = description.indexOf(gColon); |
| 167 | if (pos == -1) { |
| 168 | // throw new IllegalArgumentException("Rule set name doesn't end in colon"); |
| 169 | status = U_PARSE_ERROR; |
| 170 | } else { |
| 171 | name.setTo(description, 0, pos); |
| 172 | while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { |
| 173 | } |
| 174 | description.remove(0, pos); |
| 175 | } |
| 176 | } else { |
| 177 | name.setTo(UNICODE_STRING_SIMPLE("%default" )); |
| 178 | } |
| 179 | |
| 180 | if (description.length() == 0) { |
| 181 | // throw new IllegalArgumentException("Empty rule set description"); |
| 182 | status = U_PARSE_ERROR; |
| 183 | } |
| 184 | |
| 185 | fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; |
| 186 | |
| 187 | if ( name.endsWith(gNoparse,8) ) { |
| 188 | fIsParseable = FALSE; |
| 189 | name.truncate(name.length()-8); // remove the @noparse from the name |
| 190 | } |
| 191 | |
| 192 | // all of the other members of NFRuleSet are initialized |
| 193 | // by parseRules() |
| 194 | } |
| 195 | |
| 196 | void |
| 197 | NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status) |
| 198 | { |
| 199 | // start by creating a Vector whose elements are Strings containing |
| 200 | // the descriptions of the rules (one rule per element). The rules |
| 201 | // are separated by semicolons (there's no escape facility: ALL |
| 202 | // semicolons are rule delimiters) |
| 203 | |
| 204 | if (U_FAILURE(status)) { |
| 205 | return; |
| 206 | } |
| 207 | |
| 208 | // ensure we are starting with an empty rule list |
| 209 | rules.deleteAll(); |
| 210 | |
| 211 | // dlf - the original code kept a separate description array for no reason, |
| 212 | // so I got rid of it. The loop was too complex so I simplified it. |
| 213 | |
| 214 | UnicodeString currentDescription; |
| 215 | int32_t oldP = 0; |
| 216 | while (oldP < description.length()) { |
| 217 | int32_t p = description.indexOf(gSemicolon, oldP); |
| 218 | if (p == -1) { |
| 219 | p = description.length(); |
| 220 | } |
| 221 | currentDescription.setTo(description, oldP, p - oldP); |
| 222 | NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); |
| 223 | oldP = p + 1; |
| 224 | } |
| 225 | |
| 226 | // for rules that didn't specify a base value, their base values |
| 227 | // were initialized to 0. Make another pass through the list and |
| 228 | // set all those rules' base values. We also remove any special |
| 229 | // rules from the list and put them into their own member variables |
| 230 | int64_t defaultBaseValue = 0; |
| 231 | |
| 232 | // (this isn't a for loop because we might be deleting items from |
| 233 | // the vector-- we want to make sure we only increment i when |
| 234 | // we _didn't_ delete aything from the vector) |
| 235 | int32_t rulesSize = rules.size(); |
| 236 | for (int32_t i = 0; i < rulesSize; i++) { |
| 237 | NFRule* rule = rules[i]; |
| 238 | int64_t baseValue = rule->getBaseValue(); |
| 239 | |
| 240 | if (baseValue == 0) { |
| 241 | // if the rule's base value is 0, fill in a default |
| 242 | // base value (this will be 1 plus the preceding |
| 243 | // rule's base value for regular rule sets, and the |
| 244 | // same as the preceding rule's base value in fraction |
| 245 | // rule sets) |
| 246 | rule->setBaseValue(defaultBaseValue, status); |
| 247 | } |
| 248 | else { |
| 249 | // if it's a regular rule that already knows its base value, |
| 250 | // check to make sure the rules are in order, and update |
| 251 | // the default base value for the next rule |
| 252 | if (baseValue < defaultBaseValue) { |
| 253 | // throw new IllegalArgumentException("Rules are not in order"); |
| 254 | status = U_PARSE_ERROR; |
| 255 | return; |
| 256 | } |
| 257 | defaultBaseValue = baseValue; |
| 258 | } |
| 259 | if (!fIsFractionRuleSet) { |
| 260 | ++defaultBaseValue; |
| 261 | } |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | /** |
| 266 | * Set one of the non-numerical rules. |
| 267 | * @param rule The rule to set. |
| 268 | */ |
| 269 | void NFRuleSet::setNonNumericalRule(NFRule *rule) { |
| 270 | int64_t baseValue = rule->getBaseValue(); |
| 271 | if (baseValue == NFRule::kNegativeNumberRule) { |
| 272 | delete nonNumericalRules[NEGATIVE_RULE_INDEX]; |
| 273 | nonNumericalRules[NEGATIVE_RULE_INDEX] = rule; |
| 274 | } |
| 275 | else if (baseValue == NFRule::kImproperFractionRule) { |
| 276 | setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE); |
| 277 | } |
| 278 | else if (baseValue == NFRule::kProperFractionRule) { |
| 279 | setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE); |
| 280 | } |
| 281 | else if (baseValue == NFRule::kMasterRule) { |
| 282 | setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE); |
| 283 | } |
| 284 | else if (baseValue == NFRule::kInfinityRule) { |
| 285 | delete nonNumericalRules[INFINITY_RULE_INDEX]; |
| 286 | nonNumericalRules[INFINITY_RULE_INDEX] = rule; |
| 287 | } |
| 288 | else if (baseValue == NFRule::kNaNRule) { |
| 289 | delete nonNumericalRules[NAN_RULE_INDEX]; |
| 290 | nonNumericalRules[NAN_RULE_INDEX] = rule; |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | /** |
| 295 | * Determine the best fraction rule to use. Rules matching the decimal point from |
| 296 | * DecimalFormatSymbols become the main set of rules to use. |
| 297 | * @param originalIndex The index into nonNumericalRules |
| 298 | * @param newRule The new rule to consider |
| 299 | * @param rememberRule Should the new rule be added to fractionRules. |
| 300 | */ |
| 301 | void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) { |
| 302 | if (rememberRule) { |
| 303 | fractionRules.add(newRule); |
| 304 | } |
| 305 | NFRule *bestResult = nonNumericalRules[originalIndex]; |
| 306 | if (bestResult == NULL) { |
| 307 | nonNumericalRules[originalIndex] = newRule; |
| 308 | } |
| 309 | else { |
| 310 | // We have more than one. Which one is better? |
| 311 | const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols(); |
| 312 | if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0) |
| 313 | == newRule->getDecimalPoint()) |
| 314 | { |
| 315 | nonNumericalRules[originalIndex] = newRule; |
| 316 | } |
| 317 | // else leave it alone |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | NFRuleSet::~NFRuleSet() |
| 322 | { |
| 323 | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { |
| 324 | if (i != IMPROPER_FRACTION_RULE_INDEX |
| 325 | && i != PROPER_FRACTION_RULE_INDEX |
| 326 | && i != MASTER_RULE_INDEX) |
| 327 | { |
| 328 | delete nonNumericalRules[i]; |
| 329 | } |
| 330 | // else it will be deleted via NFRuleList fractionRules |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | static UBool |
| 335 | util_equalRules(const NFRule* rule1, const NFRule* rule2) |
| 336 | { |
| 337 | if (rule1) { |
| 338 | if (rule2) { |
| 339 | return *rule1 == *rule2; |
| 340 | } |
| 341 | } else if (!rule2) { |
| 342 | return TRUE; |
| 343 | } |
| 344 | return FALSE; |
| 345 | } |
| 346 | |
| 347 | UBool |
| 348 | NFRuleSet::operator==(const NFRuleSet& rhs) const |
| 349 | { |
| 350 | if (rules.size() == rhs.rules.size() && |
| 351 | fIsFractionRuleSet == rhs.fIsFractionRuleSet && |
| 352 | name == rhs.name) { |
| 353 | |
| 354 | // ...then compare the non-numerical rule lists... |
| 355 | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { |
| 356 | if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) { |
| 357 | return FALSE; |
| 358 | } |
| 359 | } |
| 360 | |
| 361 | // ...then compare the rule lists... |
| 362 | for (uint32_t i = 0; i < rules.size(); ++i) { |
| 363 | if (*rules[i] != *rhs.rules[i]) { |
| 364 | return FALSE; |
| 365 | } |
| 366 | } |
| 367 | return TRUE; |
| 368 | } |
| 369 | return FALSE; |
| 370 | } |
| 371 | |
| 372 | void |
| 373 | NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) { |
| 374 | for (uint32_t i = 0; i < rules.size(); ++i) { |
| 375 | rules[i]->setDecimalFormatSymbols(newSymbols, status); |
| 376 | } |
| 377 | // Switch the fraction rules to mirror the DecimalFormatSymbols. |
| 378 | for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) { |
| 379 | if (nonNumericalRules[nonNumericalIdx]) { |
| 380 | for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { |
| 381 | NFRule *fractionRule = fractionRules[fIdx]; |
| 382 | if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) { |
| 383 | setBestFractionRule(nonNumericalIdx, fractionRule, FALSE); |
| 384 | } |
| 385 | } |
| 386 | } |
| 387 | } |
| 388 | |
| 389 | for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) { |
| 390 | NFRule *rule = nonNumericalRules[nnrIdx]; |
| 391 | if (rule) { |
| 392 | rule->setDecimalFormatSymbols(newSymbols, status); |
| 393 | } |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | #define RECURSION_LIMIT 64 |
| 398 | |
| 399 | void |
| 400 | NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const |
| 401 | { |
| 402 | if (recursionCount >= RECURSION_LIMIT) { |
| 403 | // stop recursion |
| 404 | status = U_INVALID_STATE_ERROR; |
| 405 | return; |
| 406 | } |
| 407 | const NFRule *rule = findNormalRule(number); |
| 408 | if (rule) { // else error, but can't report it |
| 409 | rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); |
| 410 | } |
| 411 | } |
| 412 | |
| 413 | void |
| 414 | NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const |
| 415 | { |
| 416 | if (recursionCount >= RECURSION_LIMIT) { |
| 417 | // stop recursion |
| 418 | status = U_INVALID_STATE_ERROR; |
| 419 | return; |
| 420 | } |
| 421 | const NFRule *rule = findDoubleRule(number); |
| 422 | if (rule) { // else error, but can't report it |
| 423 | rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); |
| 424 | } |
| 425 | } |
| 426 | |
| 427 | const NFRule* |
| 428 | NFRuleSet::findDoubleRule(double number) const |
| 429 | { |
| 430 | // if this is a fraction rule set, use findFractionRuleSetRule() |
| 431 | if (isFractionRuleSet()) { |
| 432 | return findFractionRuleSetRule(number); |
| 433 | } |
| 434 | |
| 435 | if (uprv_isNaN(number)) { |
| 436 | const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX]; |
| 437 | if (!rule) { |
| 438 | rule = owner->getDefaultNaNRule(); |
| 439 | } |
| 440 | return rule; |
| 441 | } |
| 442 | |
| 443 | // if the number is negative, return the negative number rule |
| 444 | // (if there isn't a negative-number rule, we pretend it's a |
| 445 | // positive number) |
| 446 | if (number < 0) { |
| 447 | if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { |
| 448 | return nonNumericalRules[NEGATIVE_RULE_INDEX]; |
| 449 | } else { |
| 450 | number = -number; |
| 451 | } |
| 452 | } |
| 453 | |
| 454 | if (uprv_isInfinite(number)) { |
| 455 | const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX]; |
| 456 | if (!rule) { |
| 457 | rule = owner->getDefaultInfinityRule(); |
| 458 | } |
| 459 | return rule; |
| 460 | } |
| 461 | |
| 462 | // if the number isn't an integer, we use one of the fraction rules... |
| 463 | if (number != uprv_floor(number)) { |
| 464 | // if the number is between 0 and 1, return the proper |
| 465 | // fraction rule |
| 466 | if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) { |
| 467 | return nonNumericalRules[PROPER_FRACTION_RULE_INDEX]; |
| 468 | } |
| 469 | // otherwise, return the improper fraction rule |
| 470 | else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) { |
| 471 | return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]; |
| 472 | } |
| 473 | } |
| 474 | |
| 475 | // if there's a master rule, use it to format the number |
| 476 | if (nonNumericalRules[MASTER_RULE_INDEX]) { |
| 477 | return nonNumericalRules[MASTER_RULE_INDEX]; |
| 478 | } |
| 479 | |
| 480 | // and if we haven't yet returned a rule, use findNormalRule() |
| 481 | // to find the applicable rule |
| 482 | int64_t r = util64_fromDouble(number + 0.5); |
| 483 | return findNormalRule(r); |
| 484 | } |
| 485 | |
| 486 | const NFRule * |
| 487 | NFRuleSet::findNormalRule(int64_t number) const |
| 488 | { |
| 489 | // if this is a fraction rule set, use findFractionRuleSetRule() |
| 490 | // to find the rule (we should only go into this clause if the |
| 491 | // value is 0) |
| 492 | if (fIsFractionRuleSet) { |
| 493 | return findFractionRuleSetRule((double)number); |
| 494 | } |
| 495 | |
| 496 | // if the number is negative, return the negative-number rule |
| 497 | // (if there isn't one, pretend the number is positive) |
| 498 | if (number < 0) { |
| 499 | if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { |
| 500 | return nonNumericalRules[NEGATIVE_RULE_INDEX]; |
| 501 | } else { |
| 502 | number = -number; |
| 503 | } |
| 504 | } |
| 505 | |
| 506 | // we have to repeat the preceding two checks, even though we |
| 507 | // do them in findRule(), because the version of format() that |
| 508 | // takes a long bypasses findRule() and goes straight to this |
| 509 | // function. This function does skip the fraction rules since |
| 510 | // we know the value is an integer (it also skips the master |
| 511 | // rule, since it's considered a fraction rule. Skipping the |
| 512 | // master rule in this function is also how we avoid infinite |
| 513 | // recursion) |
| 514 | |
| 515 | // {dlf} unfortunately this fails if there are no rules except |
| 516 | // special rules. If there are no rules, use the master rule. |
| 517 | |
| 518 | // binary-search the rule list for the applicable rule |
| 519 | // (a rule is used for all values from its base value to |
| 520 | // the next rule's base value) |
| 521 | int32_t hi = rules.size(); |
| 522 | if (hi > 0) { |
| 523 | int32_t lo = 0; |
| 524 | |
| 525 | while (lo < hi) { |
| 526 | int32_t mid = (lo + hi) / 2; |
| 527 | if (rules[mid]->getBaseValue() == number) { |
| 528 | return rules[mid]; |
| 529 | } |
| 530 | else if (rules[mid]->getBaseValue() > number) { |
| 531 | hi = mid; |
| 532 | } |
| 533 | else { |
| 534 | lo = mid + 1; |
| 535 | } |
| 536 | } |
| 537 | if (hi == 0) { // bad rule set, minimum base > 0 |
| 538 | return NULL; // want to throw exception here |
| 539 | } |
| 540 | |
| 541 | NFRule *result = rules[hi - 1]; |
| 542 | |
| 543 | // use shouldRollBack() to see whether we need to invoke the |
| 544 | // rollback rule (see shouldRollBack()'s documentation for |
| 545 | // an explanation of the rollback rule). If we do, roll back |
| 546 | // one rule and return that one instead of the one we'd normally |
| 547 | // return |
| 548 | if (result->shouldRollBack(number)) { |
| 549 | if (hi == 1) { // bad rule set, no prior rule to rollback to from this base |
| 550 | return NULL; |
| 551 | } |
| 552 | result = rules[hi - 2]; |
| 553 | } |
| 554 | return result; |
| 555 | } |
| 556 | // else use the master rule |
| 557 | return nonNumericalRules[MASTER_RULE_INDEX]; |
| 558 | } |
| 559 | |
| 560 | /** |
| 561 | * If this rule is a fraction rule set, this function is used by |
| 562 | * findRule() to select the most appropriate rule for formatting |
| 563 | * the number. Basically, the base value of each rule in the rule |
| 564 | * set is treated as the denominator of a fraction. Whichever |
| 565 | * denominator can produce the fraction closest in value to the |
| 566 | * number passed in is the result. If there's a tie, the earlier |
| 567 | * one in the list wins. (If there are two rules in a row with the |
| 568 | * same base value, the first one is used when the numerator of the |
| 569 | * fraction would be 1, and the second rule is used the rest of the |
| 570 | * time. |
| 571 | * @param number The number being formatted (which will always be |
| 572 | * a number between 0 and 1) |
| 573 | * @return The rule to use to format this number |
| 574 | */ |
| 575 | const NFRule* |
| 576 | NFRuleSet::findFractionRuleSetRule(double number) const |
| 577 | { |
| 578 | // the obvious way to do this (multiply the value being formatted |
| 579 | // by each rule's base value until you get an integral result) |
| 580 | // doesn't work because of rounding error. This method is more |
| 581 | // accurate |
| 582 | |
| 583 | // find the least common multiple of the rules' base values |
| 584 | // and multiply this by the number being formatted. This is |
| 585 | // all the precision we need, and we can do all of the rest |
| 586 | // of the math using integer arithmetic |
| 587 | int64_t leastCommonMultiple = rules[0]->getBaseValue(); |
| 588 | int64_t numerator; |
| 589 | { |
| 590 | for (uint32_t i = 1; i < rules.size(); ++i) { |
| 591 | leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); |
| 592 | } |
| 593 | numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5); |
| 594 | } |
| 595 | // for each rule, do the following... |
| 596 | int64_t tempDifference; |
| 597 | int64_t difference = util64_fromDouble(uprv_maxMantissa()); |
| 598 | int32_t winner = 0; |
| 599 | for (uint32_t i = 0; i < rules.size(); ++i) { |
| 600 | // "numerator" is the numerator of the fraction if the |
| 601 | // denominator is the LCD. The numerator if the rule's |
| 602 | // base value is the denominator is "numerator" times the |
| 603 | // base value divided bythe LCD. Here we check to see if |
| 604 | // that's an integer, and if not, how close it is to being |
| 605 | // an integer. |
| 606 | tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; |
| 607 | |
| 608 | |
| 609 | // normalize the result of the above calculation: we want |
| 610 | // the numerator's distance from the CLOSEST multiple |
| 611 | // of the LCD |
| 612 | if (leastCommonMultiple - tempDifference < tempDifference) { |
| 613 | tempDifference = leastCommonMultiple - tempDifference; |
| 614 | } |
| 615 | |
| 616 | // if this is as close as we've come, keep track of how close |
| 617 | // that is, and the line number of the rule that did it. If |
| 618 | // we've scored a direct hit, we don't have to look at any more |
| 619 | // rules |
| 620 | if (tempDifference < difference) { |
| 621 | difference = tempDifference; |
| 622 | winner = i; |
| 623 | if (difference == 0) { |
| 624 | break; |
| 625 | } |
| 626 | } |
| 627 | } |
| 628 | |
| 629 | // if we have two successive rules that both have the winning base |
| 630 | // value, then the first one (the one we found above) is used if |
| 631 | // the numerator of the fraction is 1 and the second one is used if |
| 632 | // the numerator of the fraction is anything else (this lets us |
| 633 | // do things like "one third"/"two thirds" without haveing to define |
| 634 | // a whole bunch of extra rule sets) |
| 635 | if ((unsigned)(winner + 1) < rules.size() && |
| 636 | rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { |
| 637 | double n = ((double)rules[winner]->getBaseValue()) * number; |
| 638 | if (n < 0.5 || n >= 2) { |
| 639 | ++winner; |
| 640 | } |
| 641 | } |
| 642 | |
| 643 | // finally, return the winning rule |
| 644 | return rules[winner]; |
| 645 | } |
| 646 | |
| 647 | /** |
| 648 | * Parses a string. Matches the string to be parsed against each |
| 649 | * of its rules (with a base value less than upperBound) and returns |
| 650 | * the value produced by the rule that matched the most charcters |
| 651 | * in the source string. |
| 652 | * @param text The string to parse |
| 653 | * @param parsePosition The initial position is ignored and assumed |
| 654 | * to be 0. On exit, this object has been updated to point to the |
| 655 | * first character position this rule set didn't consume. |
| 656 | * @param upperBound Limits the rules that can be allowed to match. |
| 657 | * Only rules whose base values are strictly less than upperBound |
| 658 | * are considered. |
| 659 | * @return The numerical result of parsing this string. This will |
| 660 | * be the matching rule's base value, composed appropriately with |
| 661 | * the results of matching any of its substitutions. The object |
| 662 | * will be an instance of Long if it's an integral value; otherwise, |
| 663 | * it will be an instance of Double. This function always returns |
| 664 | * a valid object: If nothing matched the input string at all, |
| 665 | * this function returns new Long(0), and the parse position is |
| 666 | * left unchanged. |
| 667 | */ |
| 668 | #ifdef RBNF_DEBUG |
| 669 | #include <stdio.h> |
| 670 | |
| 671 | static void dumpUS(FILE* f, const UnicodeString& us) { |
| 672 | int len = us.length(); |
| 673 | char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; |
| 674 | if (buf != NULL) { |
| 675 | us.extract(0, len, buf); |
| 676 | buf[len] = 0; |
| 677 | fprintf(f, "%s" , buf); |
| 678 | uprv_free(buf); //delete[] buf; |
| 679 | } |
| 680 | } |
| 681 | #endif |
| 682 | |
| 683 | UBool |
| 684 | NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const |
| 685 | { |
| 686 | // try matching each rule in the rule set against the text being |
| 687 | // parsed. Whichever one matches the most characters is the one |
| 688 | // that determines the value we return. |
| 689 | |
| 690 | result.setLong(0); |
| 691 | |
| 692 | // dump out if there's no text to parse |
| 693 | if (text.length() == 0) { |
| 694 | return 0; |
| 695 | } |
| 696 | |
| 697 | ParsePosition highWaterMark; |
| 698 | ParsePosition workingPos = pos; |
| 699 | |
| 700 | #ifdef RBNF_DEBUG |
| 701 | fprintf(stderr, "<nfrs> %x '" , this); |
| 702 | dumpUS(stderr, name); |
| 703 | fprintf(stderr, "' text '" ); |
| 704 | dumpUS(stderr, text); |
| 705 | fprintf(stderr, "'\n" ); |
| 706 | fprintf(stderr, " parse negative: %d\n" , this, negativeNumberRule != 0); |
| 707 | #endif |
| 708 | // Try each of the negative rules, fraction rules, infinity rules and NaN rules |
| 709 | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { |
| 710 | if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) { |
| 711 | // Mark this rule as being executed so that we don't try to execute it again. |
| 712 | nonNumericalExecutedRuleMask |= 1 << i; |
| 713 | |
| 714 | Formattable tempResult; |
| 715 | UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult); |
| 716 | if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { |
| 717 | result = tempResult; |
| 718 | highWaterMark = workingPos; |
| 719 | } |
| 720 | workingPos = pos; |
| 721 | } |
| 722 | } |
| 723 | #ifdef RBNF_DEBUG |
| 724 | fprintf(stderr, "<nfrs> continue other with text '" ); |
| 725 | dumpUS(stderr, text); |
| 726 | fprintf(stderr, "' hwm: %d\n" , highWaterMark.getIndex()); |
| 727 | #endif |
| 728 | |
| 729 | // finally, go through the regular rules one at a time. We start |
| 730 | // at the end of the list because we want to try matching the most |
| 731 | // sigificant rule first (this helps ensure that we parse |
| 732 | // "five thousand three hundred six" as |
| 733 | // "(five thousand) (three hundred) (six)" rather than |
| 734 | // "((five thousand three) hundred) (six)"). Skip rules whose |
| 735 | // base values are higher than the upper bound (again, this helps |
| 736 | // limit ambiguity by making sure the rules that match a rule's |
| 737 | // are less significant than the rule containing the substitutions)/ |
| 738 | { |
| 739 | int64_t ub = util64_fromDouble(upperBound); |
| 740 | #ifdef RBNF_DEBUG |
| 741 | { |
| 742 | char ubstr[64]; |
| 743 | util64_toa(ub, ubstr, 64); |
| 744 | char ubstrhex[64]; |
| 745 | util64_toa(ub, ubstrhex, 64, 16); |
| 746 | fprintf(stderr, "ub: %g, i64: %s (%s)\n" , upperBound, ubstr, ubstrhex); |
| 747 | } |
| 748 | #endif |
| 749 | for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { |
| 750 | if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { |
| 751 | continue; |
| 752 | } |
| 753 | Formattable tempResult; |
| 754 | UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult); |
| 755 | if (success && workingPos.getIndex() > highWaterMark.getIndex()) { |
| 756 | result = tempResult; |
| 757 | highWaterMark = workingPos; |
| 758 | } |
| 759 | workingPos = pos; |
| 760 | } |
| 761 | } |
| 762 | #ifdef RBNF_DEBUG |
| 763 | fprintf(stderr, "<nfrs> exit\n" ); |
| 764 | #endif |
| 765 | // finally, update the parse postion we were passed to point to the |
| 766 | // first character we didn't use, and return the result that |
| 767 | // corresponds to that string of characters |
| 768 | pos = highWaterMark; |
| 769 | |
| 770 | return 1; |
| 771 | } |
| 772 | |
| 773 | void |
| 774 | NFRuleSet::appendRules(UnicodeString& result) const |
| 775 | { |
| 776 | uint32_t i; |
| 777 | |
| 778 | // the rule set name goes first... |
| 779 | result.append(name); |
| 780 | result.append(gColon); |
| 781 | result.append(gLineFeed); |
| 782 | |
| 783 | // followed by the regular rules... |
| 784 | for (i = 0; i < rules.size(); i++) { |
| 785 | rules[i]->_appendRuleText(result); |
| 786 | result.append(gLineFeed); |
| 787 | } |
| 788 | |
| 789 | // followed by the special rules (if they exist) |
| 790 | for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { |
| 791 | NFRule *rule = nonNumericalRules[i]; |
| 792 | if (nonNumericalRules[i]) { |
| 793 | if (rule->getBaseValue() == NFRule::kImproperFractionRule |
| 794 | || rule->getBaseValue() == NFRule::kProperFractionRule |
| 795 | || rule->getBaseValue() == NFRule::kMasterRule) |
| 796 | { |
| 797 | for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { |
| 798 | NFRule *fractionRule = fractionRules[fIdx]; |
| 799 | if (fractionRule->getBaseValue() == rule->getBaseValue()) { |
| 800 | fractionRule->_appendRuleText(result); |
| 801 | result.append(gLineFeed); |
| 802 | } |
| 803 | } |
| 804 | } |
| 805 | else { |
| 806 | rule->_appendRuleText(result); |
| 807 | result.append(gLineFeed); |
| 808 | } |
| 809 | } |
| 810 | } |
| 811 | } |
| 812 | |
| 813 | // utility functions |
| 814 | |
| 815 | int64_t util64_fromDouble(double d) { |
| 816 | int64_t result = 0; |
| 817 | if (!uprv_isNaN(d)) { |
| 818 | double mant = uprv_maxMantissa(); |
| 819 | if (d < -mant) { |
| 820 | d = -mant; |
| 821 | } else if (d > mant) { |
| 822 | d = mant; |
| 823 | } |
| 824 | UBool neg = d < 0; |
| 825 | if (neg) { |
| 826 | d = -d; |
| 827 | } |
| 828 | result = (int64_t)uprv_floor(d); |
| 829 | if (neg) { |
| 830 | result = -result; |
| 831 | } |
| 832 | } |
| 833 | return result; |
| 834 | } |
| 835 | |
| 836 | uint64_t util64_pow(uint32_t base, uint16_t exponent) { |
| 837 | if (base == 0) { |
| 838 | return 0; |
| 839 | } |
| 840 | uint64_t result = 1; |
| 841 | uint64_t pow = base; |
| 842 | while (true) { |
| 843 | if ((exponent & 1) == 1) { |
| 844 | result *= pow; |
| 845 | } |
| 846 | exponent >>= 1; |
| 847 | if (exponent == 0) { |
| 848 | break; |
| 849 | } |
| 850 | pow *= pow; |
| 851 | } |
| 852 | return result; |
| 853 | } |
| 854 | |
| 855 | static const uint8_t asciiDigits[] = { |
| 856 | 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, |
| 857 | 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, |
| 858 | 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, |
| 859 | 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, |
| 860 | 0x77u, 0x78u, 0x79u, 0x7au, |
| 861 | }; |
| 862 | |
| 863 | static const UChar kUMinus = (UChar)0x002d; |
| 864 | |
| 865 | #ifdef RBNF_DEBUG |
| 866 | static const char kMinus = '-'; |
| 867 | |
| 868 | static const uint8_t digitInfo[] = { |
| 869 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 870 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 871 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 872 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 873 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 874 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 875 | 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, |
| 876 | 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, |
| 877 | 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, |
| 878 | 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, |
| 879 | 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, |
| 880 | 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, |
| 881 | 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, |
| 882 | 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, |
| 883 | 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, |
| 884 | 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, |
| 885 | }; |
| 886 | |
| 887 | int64_t util64_atoi(const char* str, uint32_t radix) |
| 888 | { |
| 889 | if (radix > 36) { |
| 890 | radix = 36; |
| 891 | } else if (radix < 2) { |
| 892 | radix = 2; |
| 893 | } |
| 894 | int64_t lradix = radix; |
| 895 | |
| 896 | int neg = 0; |
| 897 | if (*str == kMinus) { |
| 898 | ++str; |
| 899 | neg = 1; |
| 900 | } |
| 901 | int64_t result = 0; |
| 902 | uint8_t b; |
| 903 | while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { |
| 904 | result *= lradix; |
| 905 | result += (int32_t)b; |
| 906 | } |
| 907 | if (neg) { |
| 908 | result = -result; |
| 909 | } |
| 910 | return result; |
| 911 | } |
| 912 | |
| 913 | int64_t util64_utoi(const UChar* str, uint32_t radix) |
| 914 | { |
| 915 | if (radix > 36) { |
| 916 | radix = 36; |
| 917 | } else if (radix < 2) { |
| 918 | radix = 2; |
| 919 | } |
| 920 | int64_t lradix = radix; |
| 921 | |
| 922 | int neg = 0; |
| 923 | if (*str == kUMinus) { |
| 924 | ++str; |
| 925 | neg = 1; |
| 926 | } |
| 927 | int64_t result = 0; |
| 928 | UChar c; |
| 929 | uint8_t b; |
| 930 | while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { |
| 931 | result *= lradix; |
| 932 | result += (int32_t)b; |
| 933 | } |
| 934 | if (neg) { |
| 935 | result = -result; |
| 936 | } |
| 937 | return result; |
| 938 | } |
| 939 | |
| 940 | uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) |
| 941 | { |
| 942 | if (radix > 36) { |
| 943 | radix = 36; |
| 944 | } else if (radix < 2) { |
| 945 | radix = 2; |
| 946 | } |
| 947 | int64_t base = radix; |
| 948 | |
| 949 | char* p = buf; |
| 950 | if (len && (w < 0) && (radix == 10) && !raw) { |
| 951 | w = -w; |
| 952 | *p++ = kMinus; |
| 953 | --len; |
| 954 | } else if (len && (w == 0)) { |
| 955 | *p++ = (char)raw ? 0 : asciiDigits[0]; |
| 956 | --len; |
| 957 | } |
| 958 | |
| 959 | while (len && w != 0) { |
| 960 | int64_t n = w / base; |
| 961 | int64_t m = n * base; |
| 962 | int32_t d = (int32_t)(w-m); |
| 963 | *p++ = raw ? (char)d : asciiDigits[d]; |
| 964 | w = n; |
| 965 | --len; |
| 966 | } |
| 967 | if (len) { |
| 968 | *p = 0; // null terminate if room for caller convenience |
| 969 | } |
| 970 | |
| 971 | len = p - buf; |
| 972 | if (*buf == kMinus) { |
| 973 | ++buf; |
| 974 | } |
| 975 | while (--p > buf) { |
| 976 | char c = *p; |
| 977 | *p = *buf; |
| 978 | *buf = c; |
| 979 | ++buf; |
| 980 | } |
| 981 | |
| 982 | return len; |
| 983 | } |
| 984 | #endif |
| 985 | |
| 986 | uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw) |
| 987 | { |
| 988 | if (radix > 36) { |
| 989 | radix = 36; |
| 990 | } else if (radix < 2) { |
| 991 | radix = 2; |
| 992 | } |
| 993 | int64_t base = radix; |
| 994 | |
| 995 | UChar* p = buf; |
| 996 | if (len && (w < 0) && (radix == 10) && !raw) { |
| 997 | w = -w; |
| 998 | *p++ = kUMinus; |
| 999 | --len; |
| 1000 | } else if (len && (w == 0)) { |
| 1001 | *p++ = (UChar)raw ? 0 : asciiDigits[0]; |
| 1002 | --len; |
| 1003 | } |
| 1004 | |
| 1005 | while (len && (w != 0)) { |
| 1006 | int64_t n = w / base; |
| 1007 | int64_t m = n * base; |
| 1008 | int32_t d = (int32_t)(w-m); |
| 1009 | *p++ = (UChar)(raw ? d : asciiDigits[d]); |
| 1010 | w = n; |
| 1011 | --len; |
| 1012 | } |
| 1013 | if (len) { |
| 1014 | *p = 0; // null terminate if room for caller convenience |
| 1015 | } |
| 1016 | |
| 1017 | len = (uint32_t)(p - buf); |
| 1018 | if (*buf == kUMinus) { |
| 1019 | ++buf; |
| 1020 | } |
| 1021 | while (--p > buf) { |
| 1022 | UChar c = *p; |
| 1023 | *p = *buf; |
| 1024 | *buf = c; |
| 1025 | ++buf; |
| 1026 | } |
| 1027 | |
| 1028 | return len; |
| 1029 | } |
| 1030 | |
| 1031 | |
| 1032 | U_NAMESPACE_END |
| 1033 | |
| 1034 | /* U_HAVE_RBNF */ |
| 1035 | #endif |
| 1036 | |