| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ******************************************************************************* | 
|---|
| 5 | * Copyright (C) 2007-2016, International Business Machines Corporation and | 
|---|
| 6 | * others. All Rights Reserved. | 
|---|
| 7 | ******************************************************************************* | 
|---|
| 8 | * | 
|---|
| 9 | * File PLURRULE_IMPL.H | 
|---|
| 10 | * | 
|---|
| 11 | ******************************************************************************* | 
|---|
| 12 | */ | 
|---|
| 13 |  | 
|---|
| 14 |  | 
|---|
| 15 | #ifndef PLURRULE_IMPL | 
|---|
| 16 | #define PLURRULE_IMPL | 
|---|
| 17 |  | 
|---|
| 18 | // Internal definitions for the PluralRules implementation. | 
|---|
| 19 |  | 
|---|
| 20 | #include "unicode/utypes.h" | 
|---|
| 21 |  | 
|---|
| 22 | #if !UCONFIG_NO_FORMATTING | 
|---|
| 23 |  | 
|---|
| 24 | #include "unicode/format.h" | 
|---|
| 25 | #include "unicode/locid.h" | 
|---|
| 26 | #include "unicode/parseerr.h" | 
|---|
| 27 | #include "unicode/strenum.h" | 
|---|
| 28 | #include "unicode/ures.h" | 
|---|
| 29 | #include "uvector.h" | 
|---|
| 30 | #include "hash.h" | 
|---|
| 31 | #include "uassert.h" | 
|---|
| 32 |  | 
|---|
| 33 | class PluralRulesTest; | 
|---|
| 34 |  | 
|---|
| 35 | U_NAMESPACE_BEGIN | 
|---|
| 36 |  | 
|---|
| 37 | class AndConstraint; | 
|---|
| 38 | class RuleChain; | 
|---|
| 39 | class DigitInterval; | 
|---|
| 40 | class PluralRules; | 
|---|
| 41 | class VisibleDigits; | 
|---|
| 42 |  | 
|---|
| 43 | namespace pluralimpl { | 
|---|
| 44 |  | 
|---|
| 45 | // TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility. | 
|---|
| 46 |  | 
|---|
| 47 | static const UChar DOT = ((UChar) 0x002E); | 
|---|
| 48 | static const UChar SINGLE_QUOTE = ((UChar) 0x0027); | 
|---|
| 49 | static const UChar SLASH = ((UChar) 0x002F); | 
|---|
| 50 | static const UChar BACKSLASH = ((UChar) 0x005C); | 
|---|
| 51 | static const UChar SPACE = ((UChar) 0x0020); | 
|---|
| 52 | static const UChar EXCLAMATION = ((UChar) 0x0021); | 
|---|
| 53 | static const UChar QUOTATION_MARK = ((UChar) 0x0022); | 
|---|
| 54 | static const UChar NUMBER_SIGN = ((UChar) 0x0023); | 
|---|
| 55 | static const UChar PERCENT_SIGN = ((UChar) 0x0025); | 
|---|
| 56 | static const UChar ASTERISK = ((UChar) 0x002A); | 
|---|
| 57 | static const UChar COMMA = ((UChar) 0x002C); | 
|---|
| 58 | static const UChar HYPHEN = ((UChar) 0x002D); | 
|---|
| 59 | static const UChar U_ZERO = ((UChar) 0x0030); | 
|---|
| 60 | static const UChar U_ONE = ((UChar) 0x0031); | 
|---|
| 61 | static const UChar U_TWO = ((UChar) 0x0032); | 
|---|
| 62 | static const UChar U_THREE = ((UChar) 0x0033); | 
|---|
| 63 | static const UChar U_FOUR = ((UChar) 0x0034); | 
|---|
| 64 | static const UChar U_FIVE = ((UChar) 0x0035); | 
|---|
| 65 | static const UChar U_SIX = ((UChar) 0x0036); | 
|---|
| 66 | static const UChar U_SEVEN = ((UChar) 0x0037); | 
|---|
| 67 | static const UChar U_EIGHT = ((UChar) 0x0038); | 
|---|
| 68 | static const UChar U_NINE = ((UChar) 0x0039); | 
|---|
| 69 | static const UChar COLON = ((UChar) 0x003A); | 
|---|
| 70 | static const UChar SEMI_COLON = ((UChar) 0x003B); | 
|---|
| 71 | static const UChar EQUALS = ((UChar) 0x003D); | 
|---|
| 72 | static const UChar AT = ((UChar) 0x0040); | 
|---|
| 73 | static const UChar CAP_A = ((UChar) 0x0041); | 
|---|
| 74 | static const UChar CAP_B = ((UChar) 0x0042); | 
|---|
| 75 | static const UChar CAP_R = ((UChar) 0x0052); | 
|---|
| 76 | static const UChar CAP_Z = ((UChar) 0x005A); | 
|---|
| 77 | static const UChar LOWLINE = ((UChar) 0x005F); | 
|---|
| 78 | static const UChar LEFTBRACE = ((UChar) 0x007B); | 
|---|
| 79 | static const UChar RIGHTBRACE = ((UChar) 0x007D); | 
|---|
| 80 | static const UChar TILDE = ((UChar) 0x007E); | 
|---|
| 81 | static const UChar ELLIPSIS = ((UChar) 0x2026); | 
|---|
| 82 |  | 
|---|
| 83 | static const UChar LOW_A = ((UChar) 0x0061); | 
|---|
| 84 | static const UChar LOW_B = ((UChar) 0x0062); | 
|---|
| 85 | static const UChar LOW_C = ((UChar) 0x0063); | 
|---|
| 86 | static const UChar LOW_D = ((UChar) 0x0064); | 
|---|
| 87 | static const UChar LOW_E = ((UChar) 0x0065); | 
|---|
| 88 | static const UChar LOW_F = ((UChar) 0x0066); | 
|---|
| 89 | static const UChar LOW_G = ((UChar) 0x0067); | 
|---|
| 90 | static const UChar LOW_H = ((UChar) 0x0068); | 
|---|
| 91 | static const UChar LOW_I = ((UChar) 0x0069); | 
|---|
| 92 | static const UChar LOW_J = ((UChar) 0x006a); | 
|---|
| 93 | static const UChar LOW_K = ((UChar) 0x006B); | 
|---|
| 94 | static const UChar LOW_L = ((UChar) 0x006C); | 
|---|
| 95 | static const UChar LOW_M = ((UChar) 0x006D); | 
|---|
| 96 | static const UChar LOW_N = ((UChar) 0x006E); | 
|---|
| 97 | static const UChar LOW_O = ((UChar) 0x006F); | 
|---|
| 98 | static const UChar LOW_P = ((UChar) 0x0070); | 
|---|
| 99 | static const UChar LOW_Q = ((UChar) 0x0071); | 
|---|
| 100 | static const UChar LOW_R = ((UChar) 0x0072); | 
|---|
| 101 | static const UChar LOW_S = ((UChar) 0x0073); | 
|---|
| 102 | static const UChar LOW_T = ((UChar) 0x0074); | 
|---|
| 103 | static const UChar LOW_U = ((UChar) 0x0075); | 
|---|
| 104 | static const UChar LOW_V = ((UChar) 0x0076); | 
|---|
| 105 | static const UChar LOW_W = ((UChar) 0x0077); | 
|---|
| 106 | static const UChar LOW_Y = ((UChar) 0x0079); | 
|---|
| 107 | static const UChar LOW_Z = ((UChar) 0x007A); | 
|---|
| 108 |  | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 |  | 
|---|
| 112 | static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; | 
|---|
| 113 |  | 
|---|
| 114 | enum tokenType { | 
|---|
| 115 | none, | 
|---|
| 116 | tNumber, | 
|---|
| 117 | tComma, | 
|---|
| 118 | tSemiColon, | 
|---|
| 119 | tSpace, | 
|---|
| 120 | tColon, | 
|---|
| 121 | tAt,           // '@' | 
|---|
| 122 | tDot, | 
|---|
| 123 | tDot2, | 
|---|
| 124 | tEllipsis, | 
|---|
| 125 | tKeyword, | 
|---|
| 126 | tAnd, | 
|---|
| 127 | tOr, | 
|---|
| 128 | tMod,          // 'mod' or '%' | 
|---|
| 129 | tNot,          //  'not' only. | 
|---|
| 130 | tIn,           //  'in'  only. | 
|---|
| 131 | tEqual,        //  '='   only. | 
|---|
| 132 | tNotEqual,     //  '!=' | 
|---|
| 133 | tTilde, | 
|---|
| 134 | tWithin, | 
|---|
| 135 | tIs, | 
|---|
| 136 | tVariableN, | 
|---|
| 137 | tVariableI, | 
|---|
| 138 | tVariableF, | 
|---|
| 139 | tVariableV, | 
|---|
| 140 | tVariableT, | 
|---|
| 141 | tDecimal, | 
|---|
| 142 | tInteger, | 
|---|
| 143 | tEOF | 
|---|
| 144 | }; | 
|---|
| 145 |  | 
|---|
| 146 |  | 
|---|
| 147 | class PluralRuleParser: public UMemory { | 
|---|
| 148 | public: | 
|---|
| 149 | PluralRuleParser(); | 
|---|
| 150 | virtual ~PluralRuleParser(); | 
|---|
| 151 |  | 
|---|
| 152 | void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); | 
|---|
| 153 | void getNextToken(UErrorCode &status); | 
|---|
| 154 | void checkSyntax(UErrorCode &status); | 
|---|
| 155 | static int32_t getNumberValue(const UnicodeString &token); | 
|---|
| 156 |  | 
|---|
| 157 | private: | 
|---|
| 158 | static tokenType getKeyType(const UnicodeString& token, tokenType type); | 
|---|
| 159 | static tokenType charType(UChar ch); | 
|---|
| 160 | static UBool isValidKeyword(const UnicodeString& token); | 
|---|
| 161 |  | 
|---|
| 162 | const UnicodeString  *ruleSrc;  // The rules string. | 
|---|
| 163 | int32_t        ruleIndex;       // String index in the input rules, the current parse position. | 
|---|
| 164 | UnicodeString  token;           // Token most recently scanned. | 
|---|
| 165 | tokenType      type; | 
|---|
| 166 | tokenType      prevType; | 
|---|
| 167 |  | 
|---|
| 168 | // The items currently being parsed & built. | 
|---|
| 169 | // Note: currentChain may not be the last RuleChain in the | 
|---|
| 170 | //       list because the "other" chain is forced to the end. | 
|---|
| 171 | AndConstraint *curAndConstraint; | 
|---|
| 172 | RuleChain     *currentChain; | 
|---|
| 173 |  | 
|---|
| 174 | int32_t        rangeLowIdx;     // Indices in the UVector of ranges of the | 
|---|
| 175 | int32_t        rangeHiIdx;      //    low and hi values currently being parsed. | 
|---|
| 176 |  | 
|---|
| 177 | enum EParseState { | 
|---|
| 178 | kKeyword, | 
|---|
| 179 | kExpr, | 
|---|
| 180 | kValue, | 
|---|
| 181 | kRangeList, | 
|---|
| 182 | kSamples | 
|---|
| 183 | }; | 
|---|
| 184 | }; | 
|---|
| 185 |  | 
|---|
| 186 | enum PluralOperand { | 
|---|
| 187 | /** | 
|---|
| 188 | * The double value of the entire number. | 
|---|
| 189 | */ | 
|---|
| 190 | PLURAL_OPERAND_N, | 
|---|
| 191 |  | 
|---|
| 192 | /** | 
|---|
| 193 | * The integer value, with the fraction digits truncated off. | 
|---|
| 194 | */ | 
|---|
| 195 | PLURAL_OPERAND_I, | 
|---|
| 196 |  | 
|---|
| 197 | /** | 
|---|
| 198 | * All visible fraction digits as an integer, including trailing zeros. | 
|---|
| 199 | */ | 
|---|
| 200 | PLURAL_OPERAND_F, | 
|---|
| 201 |  | 
|---|
| 202 | /** | 
|---|
| 203 | * Visible fraction digits as an integer, not including trailing zeros. | 
|---|
| 204 | */ | 
|---|
| 205 | PLURAL_OPERAND_T, | 
|---|
| 206 |  | 
|---|
| 207 | /** | 
|---|
| 208 | * Number of visible fraction digits. | 
|---|
| 209 | */ | 
|---|
| 210 | PLURAL_OPERAND_V, | 
|---|
| 211 |  | 
|---|
| 212 | /** | 
|---|
| 213 | * Number of visible fraction digits, not including trailing zeros. | 
|---|
| 214 | */ | 
|---|
| 215 | PLURAL_OPERAND_W, | 
|---|
| 216 |  | 
|---|
| 217 | /** | 
|---|
| 218 | * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC. | 
|---|
| 219 | * | 
|---|
| 220 | * <p>Returns the integer value, but will fail if the number has fraction digits. | 
|---|
| 221 | * That is, using "j" instead of "i" is like implicitly adding "v is 0". | 
|---|
| 222 | * | 
|---|
| 223 | * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches | 
|---|
| 224 | * "3" but not "3.1" or "3.0". | 
|---|
| 225 | */ | 
|---|
| 226 | PLURAL_OPERAND_J | 
|---|
| 227 | }; | 
|---|
| 228 |  | 
|---|
| 229 | /** | 
|---|
| 230 | * Converts from the tokenType enum to PluralOperand. Asserts that the given | 
|---|
| 231 | * tokenType can be mapped to a PluralOperand. | 
|---|
| 232 | */ | 
|---|
| 233 | PluralOperand tokenTypeToPluralOperand(tokenType tt); | 
|---|
| 234 |  | 
|---|
| 235 | /** | 
|---|
| 236 | * An interface to FixedDecimal, allowing for other implementations. | 
|---|
| 237 | * @internal | 
|---|
| 238 | */ | 
|---|
| 239 | class U_I18N_API IFixedDecimal { | 
|---|
| 240 | public: | 
|---|
| 241 | virtual ~IFixedDecimal(); | 
|---|
| 242 |  | 
|---|
| 243 | /** | 
|---|
| 244 | * Returns the value corresponding to the specified operand (n, i, f, t, v, or w). | 
|---|
| 245 | * If the operand is 'n', returns a double; otherwise, returns an integer. | 
|---|
| 246 | */ | 
|---|
| 247 | virtual double getPluralOperand(PluralOperand operand) const = 0; | 
|---|
| 248 |  | 
|---|
| 249 | virtual bool isNaN() const = 0; | 
|---|
| 250 |  | 
|---|
| 251 | virtual bool isInfinite() const = 0; | 
|---|
| 252 |  | 
|---|
| 253 | /** Whether the number has no nonzero fraction digits. */ | 
|---|
| 254 | virtual bool hasIntegerValue() const = 0; | 
|---|
| 255 | }; | 
|---|
| 256 |  | 
|---|
| 257 | /** | 
|---|
| 258 | * class FixedDecimal serves to communicate the properties | 
|---|
| 259 | * of a formatted number from a decimal formatter to PluralRules::select() | 
|---|
| 260 | * | 
|---|
| 261 | * see DecimalFormat::getFixedDecimal() | 
|---|
| 262 | * @internal | 
|---|
| 263 | */ | 
|---|
| 264 | class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { | 
|---|
| 265 | public: | 
|---|
| 266 | /** | 
|---|
| 267 | * @param n   the number, e.g. 12.345 | 
|---|
| 268 | * @param v   The number of visible fraction digits, e.g. 3 | 
|---|
| 269 | * @param f   The fraction digits, e.g. 345 | 
|---|
| 270 | */ | 
|---|
| 271 | FixedDecimal(double  n, int32_t v, int64_t f); | 
|---|
| 272 | FixedDecimal(double n, int32_t); | 
|---|
| 273 | explicit FixedDecimal(double n); | 
|---|
| 274 | FixedDecimal(); | 
|---|
| 275 | ~FixedDecimal() U_OVERRIDE; | 
|---|
| 276 | FixedDecimal(const UnicodeString &s, UErrorCode &ec); | 
|---|
| 277 | FixedDecimal(const FixedDecimal &other); | 
|---|
| 278 |  | 
|---|
| 279 | double getPluralOperand(PluralOperand operand) const U_OVERRIDE; | 
|---|
| 280 | bool isNaN() const U_OVERRIDE; | 
|---|
| 281 | bool isInfinite() const U_OVERRIDE; | 
|---|
| 282 | bool hasIntegerValue() const U_OVERRIDE; | 
|---|
| 283 |  | 
|---|
| 284 | bool isNanOrInfinity() const;  // used in decimfmtimpl.cpp | 
|---|
| 285 |  | 
|---|
| 286 | int32_t getVisibleFractionDigitCount() const; | 
|---|
| 287 |  | 
|---|
| 288 | void init(double n, int32_t v, int64_t f); | 
|---|
| 289 | void init(double n); | 
|---|
| 290 | UBool quickInit(double n);  // Try a fast-path only initialization, | 
|---|
| 291 | //    return TRUE if successful. | 
|---|
| 292 | void adjustForMinFractionDigits(int32_t min); | 
|---|
| 293 | static int64_t getFractionalDigits(double n, int32_t v); | 
|---|
| 294 | static int32_t decimals(double n); | 
|---|
| 295 |  | 
|---|
| 296 | double      source; | 
|---|
| 297 | int32_t     visibleDecimalDigitCount; | 
|---|
| 298 | int64_t     decimalDigits; | 
|---|
| 299 | int64_t     decimalDigitsWithoutTrailingZeros; | 
|---|
| 300 | int64_t     intValue; | 
|---|
| 301 | UBool       _hasIntegerValue; | 
|---|
| 302 | UBool       isNegative; | 
|---|
| 303 | UBool       _isNaN; | 
|---|
| 304 | UBool       _isInfinite; | 
|---|
| 305 | }; | 
|---|
| 306 |  | 
|---|
| 307 | class AndConstraint : public UMemory  { | 
|---|
| 308 | public: | 
|---|
| 309 | typedef enum RuleOp { | 
|---|
| 310 | NONE, | 
|---|
| 311 | MOD | 
|---|
| 312 | } RuleOp; | 
|---|
| 313 | RuleOp op = AndConstraint::NONE; | 
|---|
| 314 | int32_t opNum = -1;             // for mod expressions, the right operand of the mod. | 
|---|
| 315 | int32_t value = -1;             // valid for 'is' rules only. | 
|---|
| 316 | UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise. | 
|---|
| 317 | UBool negated = FALSE;          // TRUE for negated rules. | 
|---|
| 318 | UBool integerOnly = FALSE;      // TRUE for 'within' rules. | 
|---|
| 319 | tokenType digitsType = none;    // n | i | v | f constraint. | 
|---|
| 320 | AndConstraint *next = nullptr; | 
|---|
| 321 | // Internal error status, used for errors that occur during the copy constructor. | 
|---|
| 322 | UErrorCode fInternalStatus = U_ZERO_ERROR; | 
|---|
| 323 |  | 
|---|
| 324 | AndConstraint() = default; | 
|---|
| 325 | AndConstraint(const AndConstraint& other); | 
|---|
| 326 | virtual ~AndConstraint(); | 
|---|
| 327 | AndConstraint* add(UErrorCode& status); | 
|---|
| 328 | // UBool isFulfilled(double number); | 
|---|
| 329 | UBool isFulfilled(const IFixedDecimal &number); | 
|---|
| 330 | }; | 
|---|
| 331 |  | 
|---|
| 332 | class OrConstraint : public UMemory  { | 
|---|
| 333 | public: | 
|---|
| 334 | AndConstraint *childNode = nullptr; | 
|---|
| 335 | OrConstraint *next = nullptr; | 
|---|
| 336 | // Internal error status, used for errors that occur during the copy constructor. | 
|---|
| 337 | UErrorCode fInternalStatus = U_ZERO_ERROR; | 
|---|
| 338 |  | 
|---|
| 339 | OrConstraint() = default; | 
|---|
| 340 | OrConstraint(const OrConstraint& other); | 
|---|
| 341 | virtual ~OrConstraint(); | 
|---|
| 342 | AndConstraint* add(UErrorCode& status); | 
|---|
| 343 | // UBool isFulfilled(double number); | 
|---|
| 344 | UBool isFulfilled(const IFixedDecimal &number); | 
|---|
| 345 | }; | 
|---|
| 346 |  | 
|---|
| 347 | class RuleChain : public UMemory  { | 
|---|
| 348 | public: | 
|---|
| 349 | UnicodeString   fKeyword; | 
|---|
| 350 | RuleChain      *fNext = nullptr; | 
|---|
| 351 | OrConstraint   * = nullptr; | 
|---|
| 352 | UnicodeString   fDecimalSamples;  // Samples strings from rule source | 
|---|
| 353 | UnicodeString   fIntegerSamples;  //   without @decimal or @integer, otherwise unprocessed. | 
|---|
| 354 | UBool           fDecimalSamplesUnbounded = FALSE; | 
|---|
| 355 | UBool           fIntegerSamplesUnbounded = FALSE; | 
|---|
| 356 | // Internal error status, used for errors that occur during the copy constructor. | 
|---|
| 357 | UErrorCode      fInternalStatus = U_ZERO_ERROR; | 
|---|
| 358 |  | 
|---|
| 359 | RuleChain() = default; | 
|---|
| 360 | RuleChain(const RuleChain& other); | 
|---|
| 361 | virtual ~RuleChain(); | 
|---|
| 362 |  | 
|---|
| 363 | UnicodeString select(const IFixedDecimal &number) const; | 
|---|
| 364 | void          dumpRules(UnicodeString& result); | 
|---|
| 365 | UErrorCode    getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; | 
|---|
| 366 | UBool         isKeyword(const UnicodeString& keyword) const; | 
|---|
| 367 | }; | 
|---|
| 368 |  | 
|---|
| 369 | class PluralKeywordEnumeration : public StringEnumeration { | 
|---|
| 370 | public: | 
|---|
| 371 | PluralKeywordEnumeration(RuleChain *, UErrorCode& status); | 
|---|
| 372 | virtual ~PluralKeywordEnumeration(); | 
|---|
| 373 | static UClassID U_EXPORT2 getStaticClassID(void); | 
|---|
| 374 | virtual UClassID getDynamicClassID(void) const; | 
|---|
| 375 | virtual const UnicodeString* snext(UErrorCode& status); | 
|---|
| 376 | virtual void reset(UErrorCode& status); | 
|---|
| 377 | virtual int32_t count(UErrorCode& status) const; | 
|---|
| 378 | private: | 
|---|
| 379 | int32_t         pos; | 
|---|
| 380 | UVector         fKeywordNames; | 
|---|
| 381 | }; | 
|---|
| 382 |  | 
|---|
| 383 |  | 
|---|
| 384 | class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { | 
|---|
| 385 | public: | 
|---|
| 386 | PluralAvailableLocalesEnumeration(UErrorCode &status); | 
|---|
| 387 | virtual ~PluralAvailableLocalesEnumeration(); | 
|---|
| 388 | virtual const char* next(int32_t *resultLength, UErrorCode& status); | 
|---|
| 389 | virtual void reset(UErrorCode& status); | 
|---|
| 390 | virtual int32_t count(UErrorCode& status) const; | 
|---|
| 391 | private: | 
|---|
| 392 | UErrorCode      fOpenStatus; | 
|---|
| 393 | UResourceBundle *fLocales = nullptr; | 
|---|
| 394 | UResourceBundle *fRes = nullptr; | 
|---|
| 395 | }; | 
|---|
| 396 |  | 
|---|
| 397 | U_NAMESPACE_END | 
|---|
| 398 |  | 
|---|
| 399 | #endif /* #if !UCONFIG_NO_FORMATTING */ | 
|---|
| 400 |  | 
|---|
| 401 | #endif // _PLURRULE_IMPL | 
|---|
| 402 | //eof | 
|---|
| 403 |  | 
|---|