1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * Copyright (C) 2007-2016, International Business Machines Corporation and |
6 | * others. All Rights Reserved. |
7 | ******************************************************************************* |
8 | * |
9 | * File PLURRULE_IMPL.H |
10 | * |
11 | ******************************************************************************* |
12 | */ |
13 | |
14 | |
15 | #ifndef PLURRULE_IMPL |
16 | #define PLURRULE_IMPL |
17 | |
18 | // Internal definitions for the PluralRules implementation. |
19 | |
20 | #include "unicode/utypes.h" |
21 | |
22 | #if !UCONFIG_NO_FORMATTING |
23 | |
24 | #include "unicode/format.h" |
25 | #include "unicode/locid.h" |
26 | #include "unicode/parseerr.h" |
27 | #include "unicode/strenum.h" |
28 | #include "unicode/ures.h" |
29 | #include "uvector.h" |
30 | #include "hash.h" |
31 | #include "uassert.h" |
32 | |
33 | class PluralRulesTest; |
34 | |
35 | U_NAMESPACE_BEGIN |
36 | |
37 | class AndConstraint; |
38 | class RuleChain; |
39 | class DigitInterval; |
40 | class PluralRules; |
41 | class VisibleDigits; |
42 | |
43 | namespace pluralimpl { |
44 | |
45 | // TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility. |
46 | |
47 | static const UChar DOT = ((UChar) 0x002E); |
48 | static const UChar SINGLE_QUOTE = ((UChar) 0x0027); |
49 | static const UChar SLASH = ((UChar) 0x002F); |
50 | static const UChar BACKSLASH = ((UChar) 0x005C); |
51 | static const UChar SPACE = ((UChar) 0x0020); |
52 | static const UChar EXCLAMATION = ((UChar) 0x0021); |
53 | static const UChar QUOTATION_MARK = ((UChar) 0x0022); |
54 | static const UChar NUMBER_SIGN = ((UChar) 0x0023); |
55 | static const UChar PERCENT_SIGN = ((UChar) 0x0025); |
56 | static const UChar ASTERISK = ((UChar) 0x002A); |
57 | static const UChar COMMA = ((UChar) 0x002C); |
58 | static const UChar HYPHEN = ((UChar) 0x002D); |
59 | static const UChar U_ZERO = ((UChar) 0x0030); |
60 | static const UChar U_ONE = ((UChar) 0x0031); |
61 | static const UChar U_TWO = ((UChar) 0x0032); |
62 | static const UChar U_THREE = ((UChar) 0x0033); |
63 | static const UChar U_FOUR = ((UChar) 0x0034); |
64 | static const UChar U_FIVE = ((UChar) 0x0035); |
65 | static const UChar U_SIX = ((UChar) 0x0036); |
66 | static const UChar U_SEVEN = ((UChar) 0x0037); |
67 | static const UChar U_EIGHT = ((UChar) 0x0038); |
68 | static const UChar U_NINE = ((UChar) 0x0039); |
69 | static const UChar COLON = ((UChar) 0x003A); |
70 | static const UChar SEMI_COLON = ((UChar) 0x003B); |
71 | static const UChar EQUALS = ((UChar) 0x003D); |
72 | static const UChar AT = ((UChar) 0x0040); |
73 | static const UChar CAP_A = ((UChar) 0x0041); |
74 | static const UChar CAP_B = ((UChar) 0x0042); |
75 | static const UChar CAP_R = ((UChar) 0x0052); |
76 | static const UChar CAP_Z = ((UChar) 0x005A); |
77 | static const UChar LOWLINE = ((UChar) 0x005F); |
78 | static const UChar LEFTBRACE = ((UChar) 0x007B); |
79 | static const UChar RIGHTBRACE = ((UChar) 0x007D); |
80 | static const UChar TILDE = ((UChar) 0x007E); |
81 | static const UChar ELLIPSIS = ((UChar) 0x2026); |
82 | |
83 | static const UChar LOW_A = ((UChar) 0x0061); |
84 | static const UChar LOW_B = ((UChar) 0x0062); |
85 | static const UChar LOW_C = ((UChar) 0x0063); |
86 | static const UChar LOW_D = ((UChar) 0x0064); |
87 | static const UChar LOW_E = ((UChar) 0x0065); |
88 | static const UChar LOW_F = ((UChar) 0x0066); |
89 | static const UChar LOW_G = ((UChar) 0x0067); |
90 | static const UChar LOW_H = ((UChar) 0x0068); |
91 | static const UChar LOW_I = ((UChar) 0x0069); |
92 | static const UChar LOW_J = ((UChar) 0x006a); |
93 | static const UChar LOW_K = ((UChar) 0x006B); |
94 | static const UChar LOW_L = ((UChar) 0x006C); |
95 | static const UChar LOW_M = ((UChar) 0x006D); |
96 | static const UChar LOW_N = ((UChar) 0x006E); |
97 | static const UChar LOW_O = ((UChar) 0x006F); |
98 | static const UChar LOW_P = ((UChar) 0x0070); |
99 | static const UChar LOW_Q = ((UChar) 0x0071); |
100 | static const UChar LOW_R = ((UChar) 0x0072); |
101 | static const UChar LOW_S = ((UChar) 0x0073); |
102 | static const UChar LOW_T = ((UChar) 0x0074); |
103 | static const UChar LOW_U = ((UChar) 0x0075); |
104 | static const UChar LOW_V = ((UChar) 0x0076); |
105 | static const UChar LOW_W = ((UChar) 0x0077); |
106 | static const UChar LOW_Y = ((UChar) 0x0079); |
107 | static const UChar LOW_Z = ((UChar) 0x007A); |
108 | |
109 | } |
110 | |
111 | |
112 | static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; |
113 | |
114 | enum tokenType { |
115 | none, |
116 | tNumber, |
117 | tComma, |
118 | tSemiColon, |
119 | tSpace, |
120 | tColon, |
121 | tAt, // '@' |
122 | tDot, |
123 | tDot2, |
124 | tEllipsis, |
125 | tKeyword, |
126 | tAnd, |
127 | tOr, |
128 | tMod, // 'mod' or '%' |
129 | tNot, // 'not' only. |
130 | tIn, // 'in' only. |
131 | tEqual, // '=' only. |
132 | tNotEqual, // '!=' |
133 | tTilde, |
134 | tWithin, |
135 | tIs, |
136 | tVariableN, |
137 | tVariableI, |
138 | tVariableF, |
139 | tVariableV, |
140 | tVariableT, |
141 | tDecimal, |
142 | tInteger, |
143 | tEOF |
144 | }; |
145 | |
146 | |
147 | class PluralRuleParser: public UMemory { |
148 | public: |
149 | PluralRuleParser(); |
150 | virtual ~PluralRuleParser(); |
151 | |
152 | void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); |
153 | void getNextToken(UErrorCode &status); |
154 | void checkSyntax(UErrorCode &status); |
155 | static int32_t getNumberValue(const UnicodeString &token); |
156 | |
157 | private: |
158 | static tokenType getKeyType(const UnicodeString& token, tokenType type); |
159 | static tokenType charType(UChar ch); |
160 | static UBool isValidKeyword(const UnicodeString& token); |
161 | |
162 | const UnicodeString *ruleSrc; // The rules string. |
163 | int32_t ruleIndex; // String index in the input rules, the current parse position. |
164 | UnicodeString token; // Token most recently scanned. |
165 | tokenType type; |
166 | tokenType prevType; |
167 | |
168 | // The items currently being parsed & built. |
169 | // Note: currentChain may not be the last RuleChain in the |
170 | // list because the "other" chain is forced to the end. |
171 | AndConstraint *curAndConstraint; |
172 | RuleChain *currentChain; |
173 | |
174 | int32_t rangeLowIdx; // Indices in the UVector of ranges of the |
175 | int32_t rangeHiIdx; // low and hi values currently being parsed. |
176 | |
177 | enum EParseState { |
178 | kKeyword, |
179 | kExpr, |
180 | kValue, |
181 | kRangeList, |
182 | kSamples |
183 | }; |
184 | }; |
185 | |
186 | enum PluralOperand { |
187 | /** |
188 | * The double value of the entire number. |
189 | */ |
190 | PLURAL_OPERAND_N, |
191 | |
192 | /** |
193 | * The integer value, with the fraction digits truncated off. |
194 | */ |
195 | PLURAL_OPERAND_I, |
196 | |
197 | /** |
198 | * All visible fraction digits as an integer, including trailing zeros. |
199 | */ |
200 | PLURAL_OPERAND_F, |
201 | |
202 | /** |
203 | * Visible fraction digits as an integer, not including trailing zeros. |
204 | */ |
205 | PLURAL_OPERAND_T, |
206 | |
207 | /** |
208 | * Number of visible fraction digits. |
209 | */ |
210 | PLURAL_OPERAND_V, |
211 | |
212 | /** |
213 | * Number of visible fraction digits, not including trailing zeros. |
214 | */ |
215 | PLURAL_OPERAND_W, |
216 | |
217 | /** |
218 | * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC. |
219 | * |
220 | * <p>Returns the integer value, but will fail if the number has fraction digits. |
221 | * That is, using "j" instead of "i" is like implicitly adding "v is 0". |
222 | * |
223 | * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches |
224 | * "3" but not "3.1" or "3.0". |
225 | */ |
226 | PLURAL_OPERAND_J |
227 | }; |
228 | |
229 | /** |
230 | * Converts from the tokenType enum to PluralOperand. Asserts that the given |
231 | * tokenType can be mapped to a PluralOperand. |
232 | */ |
233 | PluralOperand tokenTypeToPluralOperand(tokenType tt); |
234 | |
235 | /** |
236 | * An interface to FixedDecimal, allowing for other implementations. |
237 | * @internal |
238 | */ |
239 | class U_I18N_API IFixedDecimal { |
240 | public: |
241 | virtual ~IFixedDecimal(); |
242 | |
243 | /** |
244 | * Returns the value corresponding to the specified operand (n, i, f, t, v, or w). |
245 | * If the operand is 'n', returns a double; otherwise, returns an integer. |
246 | */ |
247 | virtual double getPluralOperand(PluralOperand operand) const = 0; |
248 | |
249 | virtual bool isNaN() const = 0; |
250 | |
251 | virtual bool isInfinite() const = 0; |
252 | |
253 | /** Whether the number has no nonzero fraction digits. */ |
254 | virtual bool hasIntegerValue() const = 0; |
255 | }; |
256 | |
257 | /** |
258 | * class FixedDecimal serves to communicate the properties |
259 | * of a formatted number from a decimal formatter to PluralRules::select() |
260 | * |
261 | * see DecimalFormat::getFixedDecimal() |
262 | * @internal |
263 | */ |
264 | class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { |
265 | public: |
266 | /** |
267 | * @param n the number, e.g. 12.345 |
268 | * @param v The number of visible fraction digits, e.g. 3 |
269 | * @param f The fraction digits, e.g. 345 |
270 | */ |
271 | FixedDecimal(double n, int32_t v, int64_t f); |
272 | FixedDecimal(double n, int32_t); |
273 | explicit FixedDecimal(double n); |
274 | FixedDecimal(); |
275 | ~FixedDecimal() U_OVERRIDE; |
276 | FixedDecimal(const UnicodeString &s, UErrorCode &ec); |
277 | FixedDecimal(const FixedDecimal &other); |
278 | |
279 | double getPluralOperand(PluralOperand operand) const U_OVERRIDE; |
280 | bool isNaN() const U_OVERRIDE; |
281 | bool isInfinite() const U_OVERRIDE; |
282 | bool hasIntegerValue() const U_OVERRIDE; |
283 | |
284 | bool isNanOrInfinity() const; // used in decimfmtimpl.cpp |
285 | |
286 | int32_t getVisibleFractionDigitCount() const; |
287 | |
288 | void init(double n, int32_t v, int64_t f); |
289 | void init(double n); |
290 | UBool quickInit(double n); // Try a fast-path only initialization, |
291 | // return TRUE if successful. |
292 | void adjustForMinFractionDigits(int32_t min); |
293 | static int64_t getFractionalDigits(double n, int32_t v); |
294 | static int32_t decimals(double n); |
295 | |
296 | double source; |
297 | int32_t visibleDecimalDigitCount; |
298 | int64_t decimalDigits; |
299 | int64_t decimalDigitsWithoutTrailingZeros; |
300 | int64_t intValue; |
301 | UBool _hasIntegerValue; |
302 | UBool isNegative; |
303 | UBool _isNaN; |
304 | UBool _isInfinite; |
305 | }; |
306 | |
307 | class AndConstraint : public UMemory { |
308 | public: |
309 | typedef enum RuleOp { |
310 | NONE, |
311 | MOD |
312 | } RuleOp; |
313 | RuleOp op = AndConstraint::NONE; |
314 | int32_t opNum = -1; // for mod expressions, the right operand of the mod. |
315 | int32_t value = -1; // valid for 'is' rules only. |
316 | UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise. |
317 | UBool negated = FALSE; // TRUE for negated rules. |
318 | UBool integerOnly = FALSE; // TRUE for 'within' rules. |
319 | tokenType digitsType = none; // n | i | v | f constraint. |
320 | AndConstraint *next = nullptr; |
321 | // Internal error status, used for errors that occur during the copy constructor. |
322 | UErrorCode fInternalStatus = U_ZERO_ERROR; |
323 | |
324 | AndConstraint() = default; |
325 | AndConstraint(const AndConstraint& other); |
326 | virtual ~AndConstraint(); |
327 | AndConstraint* add(UErrorCode& status); |
328 | // UBool isFulfilled(double number); |
329 | UBool isFulfilled(const IFixedDecimal &number); |
330 | }; |
331 | |
332 | class OrConstraint : public UMemory { |
333 | public: |
334 | AndConstraint *childNode = nullptr; |
335 | OrConstraint *next = nullptr; |
336 | // Internal error status, used for errors that occur during the copy constructor. |
337 | UErrorCode fInternalStatus = U_ZERO_ERROR; |
338 | |
339 | OrConstraint() = default; |
340 | OrConstraint(const OrConstraint& other); |
341 | virtual ~OrConstraint(); |
342 | AndConstraint* add(UErrorCode& status); |
343 | // UBool isFulfilled(double number); |
344 | UBool isFulfilled(const IFixedDecimal &number); |
345 | }; |
346 | |
347 | class RuleChain : public UMemory { |
348 | public: |
349 | UnicodeString fKeyword; |
350 | RuleChain *fNext = nullptr; |
351 | OrConstraint * = nullptr; |
352 | UnicodeString fDecimalSamples; // Samples strings from rule source |
353 | UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. |
354 | UBool fDecimalSamplesUnbounded = FALSE; |
355 | UBool fIntegerSamplesUnbounded = FALSE; |
356 | // Internal error status, used for errors that occur during the copy constructor. |
357 | UErrorCode fInternalStatus = U_ZERO_ERROR; |
358 | |
359 | RuleChain() = default; |
360 | RuleChain(const RuleChain& other); |
361 | virtual ~RuleChain(); |
362 | |
363 | UnicodeString select(const IFixedDecimal &number) const; |
364 | void dumpRules(UnicodeString& result); |
365 | UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; |
366 | UBool isKeyword(const UnicodeString& keyword) const; |
367 | }; |
368 | |
369 | class PluralKeywordEnumeration : public StringEnumeration { |
370 | public: |
371 | PluralKeywordEnumeration(RuleChain *, UErrorCode& status); |
372 | virtual ~PluralKeywordEnumeration(); |
373 | static UClassID U_EXPORT2 getStaticClassID(void); |
374 | virtual UClassID getDynamicClassID(void) const; |
375 | virtual const UnicodeString* snext(UErrorCode& status); |
376 | virtual void reset(UErrorCode& status); |
377 | virtual int32_t count(UErrorCode& status) const; |
378 | private: |
379 | int32_t pos; |
380 | UVector fKeywordNames; |
381 | }; |
382 | |
383 | |
384 | class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { |
385 | public: |
386 | PluralAvailableLocalesEnumeration(UErrorCode &status); |
387 | virtual ~PluralAvailableLocalesEnumeration(); |
388 | virtual const char* next(int32_t *resultLength, UErrorCode& status); |
389 | virtual void reset(UErrorCode& status); |
390 | virtual int32_t count(UErrorCode& status) const; |
391 | private: |
392 | UErrorCode fOpenStatus; |
393 | UResourceBundle *fLocales = nullptr; |
394 | UResourceBundle *fRes = nullptr; |
395 | }; |
396 | |
397 | U_NAMESPACE_END |
398 | |
399 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
400 | |
401 | #endif // _PLURRULE_IMPL |
402 | //eof |
403 | |