1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * Copyright (C) 2007-2016, International Business Machines Corporation and |
6 | * others. All Rights Reserved. |
7 | ******************************************************************************* |
8 | * |
9 | * File PLURRULE_IMPL.H |
10 | * |
11 | ******************************************************************************* |
12 | */ |
13 | |
14 | |
15 | #ifndef PLURRULE_IMPL |
16 | #define PLURRULE_IMPL |
17 | |
18 | // Internal definitions for the PluralRules implementation. |
19 | |
20 | #include "unicode/utypes.h" |
21 | |
22 | #if !UCONFIG_NO_FORMATTING |
23 | |
24 | #include "unicode/format.h" |
25 | #include "unicode/locid.h" |
26 | #include "unicode/parseerr.h" |
27 | #include "unicode/strenum.h" |
28 | #include "unicode/ures.h" |
29 | #include "uvector.h" |
30 | #include "hash.h" |
31 | #include "uassert.h" |
32 | |
33 | class PluralRulesTest; |
34 | |
35 | U_NAMESPACE_BEGIN |
36 | |
37 | class AndConstraint; |
38 | class RuleChain; |
39 | class DigitInterval; |
40 | class PluralRules; |
41 | class VisibleDigits; |
42 | |
43 | namespace pluralimpl { |
44 | |
45 | // TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility. |
46 | |
47 | static const UChar DOT = ((UChar) 0x002E); |
48 | static const UChar SINGLE_QUOTE = ((UChar) 0x0027); |
49 | static const UChar SLASH = ((UChar) 0x002F); |
50 | static const UChar BACKSLASH = ((UChar) 0x005C); |
51 | static const UChar SPACE = ((UChar) 0x0020); |
52 | static const UChar EXCLAMATION = ((UChar) 0x0021); |
53 | static const UChar QUOTATION_MARK = ((UChar) 0x0022); |
54 | static const UChar NUMBER_SIGN = ((UChar) 0x0023); |
55 | static const UChar PERCENT_SIGN = ((UChar) 0x0025); |
56 | static const UChar ASTERISK = ((UChar) 0x002A); |
57 | static const UChar COMMA = ((UChar) 0x002C); |
58 | static const UChar HYPHEN = ((UChar) 0x002D); |
59 | static const UChar U_ZERO = ((UChar) 0x0030); |
60 | static const UChar U_ONE = ((UChar) 0x0031); |
61 | static const UChar U_TWO = ((UChar) 0x0032); |
62 | static const UChar U_THREE = ((UChar) 0x0033); |
63 | static const UChar U_FOUR = ((UChar) 0x0034); |
64 | static const UChar U_FIVE = ((UChar) 0x0035); |
65 | static const UChar U_SIX = ((UChar) 0x0036); |
66 | static const UChar U_SEVEN = ((UChar) 0x0037); |
67 | static const UChar U_EIGHT = ((UChar) 0x0038); |
68 | static const UChar U_NINE = ((UChar) 0x0039); |
69 | static const UChar COLON = ((UChar) 0x003A); |
70 | static const UChar SEMI_COLON = ((UChar) 0x003B); |
71 | static const UChar EQUALS = ((UChar) 0x003D); |
72 | static const UChar AT = ((UChar) 0x0040); |
73 | static const UChar CAP_A = ((UChar) 0x0041); |
74 | static const UChar CAP_B = ((UChar) 0x0042); |
75 | static const UChar CAP_R = ((UChar) 0x0052); |
76 | static const UChar CAP_Z = ((UChar) 0x005A); |
77 | static const UChar LOWLINE = ((UChar) 0x005F); |
78 | static const UChar LEFTBRACE = ((UChar) 0x007B); |
79 | static const UChar RIGHTBRACE = ((UChar) 0x007D); |
80 | static const UChar TILDE = ((UChar) 0x007E); |
81 | static const UChar ELLIPSIS = ((UChar) 0x2026); |
82 | |
83 | static const UChar LOW_A = ((UChar) 0x0061); |
84 | static const UChar LOW_B = ((UChar) 0x0062); |
85 | static const UChar LOW_C = ((UChar) 0x0063); |
86 | static const UChar LOW_D = ((UChar) 0x0064); |
87 | static const UChar LOW_E = ((UChar) 0x0065); |
88 | static const UChar LOW_F = ((UChar) 0x0066); |
89 | static const UChar LOW_G = ((UChar) 0x0067); |
90 | static const UChar LOW_H = ((UChar) 0x0068); |
91 | static const UChar LOW_I = ((UChar) 0x0069); |
92 | static const UChar LOW_J = ((UChar) 0x006a); |
93 | static const UChar LOW_K = ((UChar) 0x006B); |
94 | static const UChar LOW_L = ((UChar) 0x006C); |
95 | static const UChar LOW_M = ((UChar) 0x006D); |
96 | static const UChar LOW_N = ((UChar) 0x006E); |
97 | static const UChar LOW_O = ((UChar) 0x006F); |
98 | static const UChar LOW_P = ((UChar) 0x0070); |
99 | static const UChar LOW_Q = ((UChar) 0x0071); |
100 | static const UChar LOW_R = ((UChar) 0x0072); |
101 | static const UChar LOW_S = ((UChar) 0x0073); |
102 | static const UChar LOW_T = ((UChar) 0x0074); |
103 | static const UChar LOW_U = ((UChar) 0x0075); |
104 | static const UChar LOW_V = ((UChar) 0x0076); |
105 | static const UChar LOW_W = ((UChar) 0x0077); |
106 | static const UChar LOW_Y = ((UChar) 0x0079); |
107 | static const UChar LOW_Z = ((UChar) 0x007A); |
108 | |
109 | } |
110 | |
111 | |
112 | static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; |
113 | |
114 | enum tokenType { |
115 | none, |
116 | tNumber, |
117 | tComma, |
118 | tSemiColon, |
119 | tSpace, |
120 | tColon, |
121 | tAt, // '@' |
122 | tDot, |
123 | tDot2, |
124 | tEllipsis, |
125 | tKeyword, |
126 | tAnd, |
127 | tOr, |
128 | tMod, // 'mod' or '%' |
129 | tNot, // 'not' only. |
130 | tIn, // 'in' only. |
131 | tEqual, // '=' only. |
132 | tNotEqual, // '!=' |
133 | tTilde, |
134 | tWithin, |
135 | tIs, |
136 | tVariableN, |
137 | tVariableI, |
138 | tVariableF, |
139 | tVariableV, |
140 | tVariableT, |
141 | tDecimal, |
142 | tInteger, |
143 | tEOF |
144 | }; |
145 | |
146 | |
147 | class PluralRuleParser: public UMemory { |
148 | public: |
149 | PluralRuleParser(); |
150 | virtual ~PluralRuleParser(); |
151 | |
152 | void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); |
153 | void getNextToken(UErrorCode &status); |
154 | void checkSyntax(UErrorCode &status); |
155 | static int32_t getNumberValue(const UnicodeString &token); |
156 | |
157 | private: |
158 | static tokenType getKeyType(const UnicodeString& token, tokenType type); |
159 | static tokenType charType(UChar ch); |
160 | static UBool isValidKeyword(const UnicodeString& token); |
161 | |
162 | const UnicodeString *ruleSrc; // The rules string. |
163 | int32_t ruleIndex; // String index in the input rules, the current parse position. |
164 | UnicodeString token; // Token most recently scanned. |
165 | tokenType type; |
166 | tokenType prevType; |
167 | |
168 | // The items currently being parsed & built. |
169 | // Note: currentChain may not be the last RuleChain in the |
170 | // list because the "other" chain is forced to the end. |
171 | AndConstraint *curAndConstraint; |
172 | RuleChain *currentChain; |
173 | |
174 | int32_t rangeLowIdx; // Indices in the UVector of ranges of the |
175 | int32_t rangeHiIdx; // low and hi values currently being parsed. |
176 | |
177 | enum EParseState { |
178 | kKeyword, |
179 | kExpr, |
180 | kValue, |
181 | kRangeList, |
182 | kSamples |
183 | }; |
184 | }; |
185 | |
186 | enum PluralOperand { |
187 | /** |
188 | * The double value of the entire number. |
189 | */ |
190 | PLURAL_OPERAND_N, |
191 | |
192 | /** |
193 | * The integer value, with the fraction digits truncated off. |
194 | */ |
195 | PLURAL_OPERAND_I, |
196 | |
197 | /** |
198 | * All visible fraction digits as an integer, including trailing zeros. |
199 | */ |
200 | PLURAL_OPERAND_F, |
201 | |
202 | /** |
203 | * Visible fraction digits as an integer, not including trailing zeros. |
204 | */ |
205 | PLURAL_OPERAND_T, |
206 | |
207 | /** |
208 | * Number of visible fraction digits. |
209 | */ |
210 | PLURAL_OPERAND_V, |
211 | |
212 | /** |
213 | * Number of visible fraction digits, not including trailing zeros. |
214 | */ |
215 | PLURAL_OPERAND_W, |
216 | |
217 | /** |
218 | * Suppressed exponent for compact notation (exponent needed in |
219 | * scientific notation with compact notation to approximate i). |
220 | */ |
221 | PLURAL_OPERAND_E, |
222 | |
223 | /** |
224 | * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC. |
225 | * |
226 | * <p>Returns the integer value, but will fail if the number has fraction digits. |
227 | * That is, using "j" instead of "i" is like implicitly adding "v is 0". |
228 | * |
229 | * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches |
230 | * "3" but not "3.1" or "3.0". |
231 | */ |
232 | PLURAL_OPERAND_J |
233 | }; |
234 | |
235 | /** |
236 | * Converts from the tokenType enum to PluralOperand. Asserts that the given |
237 | * tokenType can be mapped to a PluralOperand. |
238 | */ |
239 | PluralOperand tokenTypeToPluralOperand(tokenType tt); |
240 | |
241 | /** |
242 | * An interface to FixedDecimal, allowing for other implementations. |
243 | * @internal |
244 | */ |
245 | class U_I18N_API IFixedDecimal { |
246 | public: |
247 | virtual ~IFixedDecimal(); |
248 | |
249 | /** |
250 | * Returns the value corresponding to the specified operand (n, i, f, t, v, or w). |
251 | * If the operand is 'n', returns a double; otherwise, returns an integer. |
252 | */ |
253 | virtual double getPluralOperand(PluralOperand operand) const = 0; |
254 | |
255 | virtual bool isNaN() const = 0; |
256 | |
257 | virtual bool isInfinite() const = 0; |
258 | |
259 | /** Whether the number has no nonzero fraction digits. */ |
260 | virtual bool hasIntegerValue() const = 0; |
261 | }; |
262 | |
263 | /** |
264 | * class FixedDecimal serves to communicate the properties |
265 | * of a formatted number from a decimal formatter to PluralRules::select() |
266 | * |
267 | * see DecimalFormat::getFixedDecimal() |
268 | * @internal |
269 | */ |
270 | class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { |
271 | public: |
272 | /** |
273 | * @param n the number, e.g. 12.345 |
274 | * @param v The number of visible fraction digits, e.g. 3 |
275 | * @param f The fraction digits, e.g. 345 |
276 | */ |
277 | FixedDecimal(double n, int32_t v, int64_t f); |
278 | FixedDecimal(double n, int32_t); |
279 | explicit FixedDecimal(double n); |
280 | FixedDecimal(); |
281 | ~FixedDecimal() U_OVERRIDE; |
282 | FixedDecimal(const UnicodeString &s, UErrorCode &ec); |
283 | FixedDecimal(const FixedDecimal &other); |
284 | |
285 | double getPluralOperand(PluralOperand operand) const U_OVERRIDE; |
286 | bool isNaN() const U_OVERRIDE; |
287 | bool isInfinite() const U_OVERRIDE; |
288 | bool hasIntegerValue() const U_OVERRIDE; |
289 | |
290 | bool isNanOrInfinity() const; // used in decimfmtimpl.cpp |
291 | |
292 | int32_t getVisibleFractionDigitCount() const; |
293 | |
294 | void init(double n, int32_t v, int64_t f); |
295 | void init(double n); |
296 | UBool quickInit(double n); // Try a fast-path only initialization, |
297 | // return TRUE if successful. |
298 | void adjustForMinFractionDigits(int32_t min); |
299 | static int64_t getFractionalDigits(double n, int32_t v); |
300 | static int32_t decimals(double n); |
301 | |
302 | double source; |
303 | int32_t visibleDecimalDigitCount; |
304 | int64_t decimalDigits; |
305 | int64_t decimalDigitsWithoutTrailingZeros; |
306 | int64_t intValue; |
307 | UBool _hasIntegerValue; |
308 | UBool isNegative; |
309 | UBool _isNaN; |
310 | UBool _isInfinite; |
311 | }; |
312 | |
313 | class AndConstraint : public UMemory { |
314 | public: |
315 | typedef enum RuleOp { |
316 | NONE, |
317 | MOD |
318 | } RuleOp; |
319 | RuleOp op = AndConstraint::NONE; |
320 | int32_t opNum = -1; // for mod expressions, the right operand of the mod. |
321 | int32_t value = -1; // valid for 'is' rules only. |
322 | UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise. |
323 | UBool negated = FALSE; // TRUE for negated rules. |
324 | UBool integerOnly = FALSE; // TRUE for 'within' rules. |
325 | tokenType digitsType = none; // n | i | v | f constraint. |
326 | AndConstraint *next = nullptr; |
327 | // Internal error status, used for errors that occur during the copy constructor. |
328 | UErrorCode fInternalStatus = U_ZERO_ERROR; |
329 | |
330 | AndConstraint() = default; |
331 | AndConstraint(const AndConstraint& other); |
332 | virtual ~AndConstraint(); |
333 | AndConstraint* add(UErrorCode& status); |
334 | // UBool isFulfilled(double number); |
335 | UBool isFulfilled(const IFixedDecimal &number); |
336 | }; |
337 | |
338 | class OrConstraint : public UMemory { |
339 | public: |
340 | AndConstraint *childNode = nullptr; |
341 | OrConstraint *next = nullptr; |
342 | // Internal error status, used for errors that occur during the copy constructor. |
343 | UErrorCode fInternalStatus = U_ZERO_ERROR; |
344 | |
345 | OrConstraint() = default; |
346 | OrConstraint(const OrConstraint& other); |
347 | virtual ~OrConstraint(); |
348 | AndConstraint* add(UErrorCode& status); |
349 | // UBool isFulfilled(double number); |
350 | UBool isFulfilled(const IFixedDecimal &number); |
351 | }; |
352 | |
353 | class RuleChain : public UMemory { |
354 | public: |
355 | UnicodeString fKeyword; |
356 | RuleChain *fNext = nullptr; |
357 | OrConstraint * = nullptr; |
358 | UnicodeString fDecimalSamples; // Samples strings from rule source |
359 | UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. |
360 | UBool fDecimalSamplesUnbounded = FALSE; |
361 | UBool fIntegerSamplesUnbounded = FALSE; |
362 | // Internal error status, used for errors that occur during the copy constructor. |
363 | UErrorCode fInternalStatus = U_ZERO_ERROR; |
364 | |
365 | RuleChain() = default; |
366 | RuleChain(const RuleChain& other); |
367 | virtual ~RuleChain(); |
368 | |
369 | UnicodeString select(const IFixedDecimal &number) const; |
370 | void dumpRules(UnicodeString& result); |
371 | UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; |
372 | UBool isKeyword(const UnicodeString& keyword) const; |
373 | }; |
374 | |
375 | class PluralKeywordEnumeration : public StringEnumeration { |
376 | public: |
377 | PluralKeywordEnumeration(RuleChain *, UErrorCode& status); |
378 | virtual ~PluralKeywordEnumeration(); |
379 | static UClassID U_EXPORT2 getStaticClassID(void); |
380 | virtual UClassID getDynamicClassID(void) const; |
381 | virtual const UnicodeString* snext(UErrorCode& status); |
382 | virtual void reset(UErrorCode& status); |
383 | virtual int32_t count(UErrorCode& status) const; |
384 | private: |
385 | int32_t pos; |
386 | UVector fKeywordNames; |
387 | }; |
388 | |
389 | |
390 | class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { |
391 | public: |
392 | PluralAvailableLocalesEnumeration(UErrorCode &status); |
393 | virtual ~PluralAvailableLocalesEnumeration(); |
394 | virtual const char* next(int32_t *resultLength, UErrorCode& status); |
395 | virtual void reset(UErrorCode& status); |
396 | virtual int32_t count(UErrorCode& status) const; |
397 | private: |
398 | UErrorCode fOpenStatus; |
399 | UResourceBundle *fLocales = nullptr; |
400 | UResourceBundle *fRes = nullptr; |
401 | }; |
402 | |
403 | U_NAMESPACE_END |
404 | |
405 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
406 | |
407 | #endif // _PLURRULE_IMPL |
408 | //eof |
409 | |