plurrule_impl.h source code [engine/third_party/icu/source/i18n/plurrule_impl.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	* Copyright (C) 2007-2016, International Business Machines Corporation and
6	* others. All Rights Reserved.
7	*******************************************************************************
8	*
9	* File PLURRULE_IMPL.H
10	*
11	*******************************************************************************
12	*/
13
14
15	#ifndef PLURRULE_IMPL
16	#define PLURRULE_IMPL
17
18	// Internal definitions for the PluralRules implementation.
19
20	#include "unicode/utypes.h"
21
22	#if !UCONFIG_NO_FORMATTING
23
24	#include "unicode/format.h"
25	#include "unicode/locid.h"
26	#include "unicode/parseerr.h"
27	#include "unicode/strenum.h"
28	#include "unicode/ures.h"
29	#include "uvector.h"
30	#include "hash.h"
31	#include "uassert.h"
32
33	class PluralRulesTest;
34
35	U_NAMESPACE_BEGIN
36
37	class AndConstraint;
38	class RuleChain;
39	class DigitInterval;
40	class PluralRules;
41	class VisibleDigits;
42
43	namespace pluralimpl {
44
45	// TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility.
46
47	static const UChar DOT = ((UChar) `0x002E`);
48	static const UChar SINGLE_QUOTE = ((UChar) `0x0027`);
49	static const UChar SLASH = ((UChar) `0x002F`);
50	static const UChar BACKSLASH = ((UChar) `0x005C`);
51	static const UChar SPACE = ((UChar) `0x0020`);
52	static const UChar EXCLAMATION = ((UChar) `0x0021`);
53	static const UChar QUOTATION_MARK = ((UChar) `0x0022`);
54	static const UChar NUMBER_SIGN = ((UChar) `0x0023`);
55	static const UChar PERCENT_SIGN = ((UChar) `0x0025`);
56	static const UChar ASTERISK = ((UChar) `0x002A`);
57	static const UChar COMMA = ((UChar) `0x002C`);
58	static const UChar HYPHEN = ((UChar) `0x002D`);
59	static const UChar U_ZERO = ((UChar) `0x0030`);
60	static const UChar U_ONE = ((UChar) `0x0031`);
61	static const UChar U_TWO = ((UChar) `0x0032`);
62	static const UChar U_THREE = ((UChar) `0x0033`);
63	static const UChar U_FOUR = ((UChar) `0x0034`);
64	static const UChar U_FIVE = ((UChar) `0x0035`);
65	static const UChar U_SIX = ((UChar) `0x0036`);
66	static const UChar U_SEVEN = ((UChar) `0x0037`);
67	static const UChar U_EIGHT = ((UChar) `0x0038`);
68	static const UChar U_NINE = ((UChar) `0x0039`);
69	static const UChar COLON = ((UChar) `0x003A`);
70	static const UChar SEMI_COLON = ((UChar) `0x003B`);
71	static const UChar EQUALS = ((UChar) `0x003D`);
72	static const UChar AT = ((UChar) `0x0040`);
73	static const UChar CAP_A = ((UChar) `0x0041`);
74	static const UChar CAP_B = ((UChar) `0x0042`);
75	static const UChar CAP_R = ((UChar) `0x0052`);
76	static const UChar CAP_Z = ((UChar) `0x005A`);
77	static const UChar LOWLINE = ((UChar) `0x005F`);
78	static const UChar LEFTBRACE = ((UChar) `0x007B`);
79	static const UChar RIGHTBRACE = ((UChar) `0x007D`);
80	static const UChar TILDE = ((UChar) `0x007E`);
81	static const UChar ELLIPSIS = ((UChar) `0x2026`);
82
83	static const UChar LOW_A = ((UChar) `0x0061`);
84	static const UChar LOW_B = ((UChar) `0x0062`);
85	static const UChar LOW_C = ((UChar) `0x0063`);
86	static const UChar LOW_D = ((UChar) `0x0064`);
87	static const UChar LOW_E = ((UChar) `0x0065`);
88	static const UChar LOW_F = ((UChar) `0x0066`);
89	static const UChar LOW_G = ((UChar) `0x0067`);
90	static const UChar LOW_H = ((UChar) `0x0068`);
91	static const UChar LOW_I = ((UChar) `0x0069`);
92	static const UChar LOW_J = ((UChar) `0x006a`);
93	static const UChar LOW_K = ((UChar) `0x006B`);
94	static const UChar LOW_L = ((UChar) `0x006C`);
95	static const UChar LOW_M = ((UChar) `0x006D`);
96	static const UChar LOW_N = ((UChar) `0x006E`);
97	static const UChar LOW_O = ((UChar) `0x006F`);
98	static const UChar LOW_P = ((UChar) `0x0070`);
99	static const UChar LOW_Q = ((UChar) `0x0071`);
100	static const UChar LOW_R = ((UChar) `0x0072`);
101	static const UChar LOW_S = ((UChar) `0x0073`);
102	static const UChar LOW_T = ((UChar) `0x0074`);
103	static const UChar LOW_U = ((UChar) `0x0075`);
104	static const UChar LOW_V = ((UChar) `0x0076`);
105	static const UChar LOW_W = ((UChar) `0x0077`);
106	static const UChar LOW_Y = ((UChar) `0x0079`);
107	static const UChar LOW_Z = ((UChar) `0x007A`);
108
109	}
110
111
112	static const int32_t PLURAL_RANGE_HIGH = `0x7fffffff`;
113
114	enum tokenType {
115	none,
116	tNumber,
117	tComma,
118	tSemiColon,
119	tSpace,
120	tColon,
121	tAt, // '@'
122	tDot,
123	tDot2,
124	tEllipsis,
125	tKeyword,
126	tAnd,
127	tOr,
128	tMod, // 'mod' or '%'
129	tNot, // 'not' only.
130	tIn, // 'in' only.
131	tEqual, // '=' only.
132	tNotEqual, // '!='
133	tTilde,
134	tWithin,
135	tIs,
136	tVariableN,
137	tVariableI,
138	tVariableF,
139	tVariableV,
140	tVariableT,
141	tDecimal,
142	tInteger,
143	tEOF
144	};
145
146
147	class PluralRuleParser: public UMemory {
148	public:
149	PluralRuleParser();
150	virtual ~PluralRuleParser();
151
152	void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status);
153	void getNextToken(UErrorCode &status);
154	void checkSyntax(UErrorCode &status);
155	static int32_t getNumberValue(const UnicodeString &token);
156
157	private:
158	static tokenType getKeyType(const UnicodeString& token, tokenType type);
159	static tokenType charType(UChar ch);
160	static UBool isValidKeyword(const UnicodeString& token);
161
162	const UnicodeString ruleSrc; // The rules string.*
163	int32_t ruleIndex; // String index in the input rules, the current parse position.
164	UnicodeString token; // Token most recently scanned.
165	tokenType type;
166	tokenType prevType;
167
168	// The items currently being parsed & built.
169	// Note: currentChain may not be the last RuleChain in the
170	// list because the "other" chain is forced to the end.
171	AndConstraint *curAndConstraint;
172	RuleChain *currentChain;
173
174	int32_t rangeLowIdx; // Indices in the UVector of ranges of the
175	int32_t rangeHiIdx; // low and hi values currently being parsed.
176
177	enum EParseState {
178	kKeyword,
179	kExpr,
180	kValue,
181	kRangeList,
182	kSamples
183	};
184	};
185
186	enum PluralOperand {
187	/**
188	* The double value of the entire number.
189	*/
190	PLURAL_OPERAND_N,
191
192	/**
193	* The integer value, with the fraction digits truncated off.
194	*/
195	PLURAL_OPERAND_I,
196
197	/**
198	* All visible fraction digits as an integer, including trailing zeros.
199	*/
200	PLURAL_OPERAND_F,
201
202	/**
203	* Visible fraction digits as an integer, not including trailing zeros.
204	*/
205	PLURAL_OPERAND_T,
206
207	/**
208	* Number of visible fraction digits.
209	*/
210	PLURAL_OPERAND_V,
211
212	/**
213	* Number of visible fraction digits, not including trailing zeros.
214	*/
215	PLURAL_OPERAND_W,
216
217	/**
218	* Suppressed exponent for compact notation (exponent needed in
219	* scientific notation with compact notation to approximate i).
220	*/
221	PLURAL_OPERAND_E,
222
223	/**
224	* THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC.
225	*
226	* <p>Returns the integer value, but will fail if the number has fraction digits.
227	* That is, using "j" instead of "i" is like implicitly adding "v is 0".
228	*
229	* <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches
230	* "3" but not "3.1" or "3.0".
231	*/
232	PLURAL_OPERAND_J
233	};
234
235	/**
236	* Converts from the tokenType enum to PluralOperand. Asserts that the given
237	* tokenType can be mapped to a PluralOperand.
238	*/
239	PluralOperand tokenTypeToPluralOperand(tokenType tt);
240
241	/**
242	* An interface to FixedDecimal, allowing for other implementations.
243	* @internal
244	*/
245	class U_I18N_API IFixedDecimal {
246	public:
247	virtual ~IFixedDecimal();
248
249	/**
250	* Returns the value corresponding to the specified operand (n, i, f, t, v, or w).
251	* If the operand is 'n', returns a double; otherwise, returns an integer.
252	*/
253	virtual double getPluralOperand(PluralOperand operand) const = `0`;
254
255	virtual bool isNaN() const = `0`;
256
257	virtual bool isInfinite() const = `0`;
258
259	/* Whether the number has no nonzero fraction digits. /
260	virtual bool hasIntegerValue() const = `0`;
261	};
262
263	/**
264	* class FixedDecimal serves to communicate the properties
265	* of a formatted number from a decimal formatter to PluralRules::select()
266	*
267	* see DecimalFormat::getFixedDecimal()
268	* @internal
269	*/
270	class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject {
271	public:
272	/**
273	* @param n the number, e.g. 12.345
274	* @param v The number of visible fraction digits, e.g. 3
275	* @param f The fraction digits, e.g. 345
276	*/
277	FixedDecimal(double n, int32_t v, int64_t f);
278	FixedDecimal(double n, int32_t);
279	explicit FixedDecimal(double n);
280	FixedDecimal();
281	~FixedDecimal() U_OVERRIDE;
282	FixedDecimal(const UnicodeString &s, UErrorCode &ec);
283	FixedDecimal(const FixedDecimal &other);
284
285	double getPluralOperand(PluralOperand operand) const U_OVERRIDE;
286	bool isNaN() const U_OVERRIDE;
287	bool isInfinite() const U_OVERRIDE;
288	bool hasIntegerValue() const U_OVERRIDE;
289
290	bool isNanOrInfinity() const; // used in decimfmtimpl.cpp
291
292	int32_t getVisibleFractionDigitCount() const;
293
294	void init(double n, int32_t v, int64_t f);
295	void init(double n);
296	UBool quickInit(double n); // Try a fast-path only initialization,
297	// return TRUE if successful.
298	void adjustForMinFractionDigits(int32_t min);
299	static int64_t getFractionalDigits(double n, int32_t v);
300	static int32_t decimals(double n);
301
302	double source;
303	int32_t visibleDecimalDigitCount;
304	int64_t decimalDigits;
305	int64_t decimalDigitsWithoutTrailingZeros;
306	int64_t intValue;
307	UBool _hasIntegerValue;
308	UBool isNegative;
309	UBool _isNaN;
310	UBool _isInfinite;
311	};
312
313	class AndConstraint : public UMemory {
314	public:
315	typedef enum RuleOp {
316	NONE,
317	MOD
318	} RuleOp;
319	RuleOp op = AndConstraint::NONE;
320	int32_t opNum = -`1`; // for mod expressions, the right operand of the mod.
321	int32_t value = -`1`; // valid for 'is' rules only.
322	UVector32 rangeList = nullptr; // for 'in', 'within' rules. Null otherwise.*
323	UBool negated = FALSE; // TRUE for negated rules.
324	UBool integerOnly = FALSE; // TRUE for 'within' rules.
325	tokenType digitsType = none; // n \| i \| v \| f constraint.
326	AndConstraint next = nullptr*;
327	// Internal error status, used for errors that occur during the copy constructor.
328	UErrorCode fInternalStatus = U_ZERO_ERROR;
329
330	AndConstraint() = default;
331	AndConstraint(const AndConstraint& other);
332	virtual ~AndConstraint();
333	AndConstraint* add(UErrorCode& status);
334	// UBool isFulfilled(double number);
335	UBool isFulfilled(const IFixedDecimal &number);
336	};
337
338	class OrConstraint : public UMemory {
339	public:
340	AndConstraint childNode = nullptr*;
341	OrConstraint next = nullptr*;
342	// Internal error status, used for errors that occur during the copy constructor.
343	UErrorCode fInternalStatus = U_ZERO_ERROR;
344
345	OrConstraint() = default;
346	OrConstraint(const OrConstraint& other);
347	virtual ~OrConstraint();
348	AndConstraint* add(UErrorCode& status);
349	// UBool isFulfilled(double number);
350	UBool isFulfilled(const IFixedDecimal &number);
351	};
352
353	class RuleChain : public UMemory {
354	public:
355	UnicodeString fKeyword;
356	RuleChain fNext = nullptr*;
357	OrConstraint ruleHeader = nullptr*;
358	UnicodeString fDecimalSamples; // Samples strings from rule source
359	UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed.
360	UBool fDecimalSamplesUnbounded = FALSE;
361	UBool fIntegerSamplesUnbounded = FALSE;
362	// Internal error status, used for errors that occur during the copy constructor.
363	UErrorCode fInternalStatus = U_ZERO_ERROR;
364
365	RuleChain() = default;
366	RuleChain(const RuleChain& other);
367	virtual ~RuleChain();
368
369	UnicodeString select(const IFixedDecimal &number) const;
370	void dumpRules(UnicodeString& result);
371	UErrorCode getKeywords(int32_t maxArraySize, UnicodeString keywords, int32_t& arraySize) const*;
372	UBool isKeyword(const UnicodeString& keyword) const;
373	};
374
375	class PluralKeywordEnumeration : public StringEnumeration {
376	public:
377	PluralKeywordEnumeration(RuleChain *header, UErrorCode& status);
378	virtual ~PluralKeywordEnumeration();
379	static UClassID U_EXPORT2 getStaticClassID(void);
380	virtual UClassID getDynamicClassID(void) const;
381	virtual const UnicodeString* snext(UErrorCode& status);
382	virtual void reset(UErrorCode& status);
383	virtual int32_t count(UErrorCode& status) const;
384	private:
385	int32_t pos;
386	UVector fKeywordNames;
387	};
388
389
390	class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration {
391	public:
392	PluralAvailableLocalesEnumeration(UErrorCode &status);
393	virtual ~PluralAvailableLocalesEnumeration();
394	virtual const char* next(int32_t *resultLength, UErrorCode& status);
395	virtual void reset(UErrorCode& status);
396	virtual int32_t count(UErrorCode& status) const;
397	private:
398	UErrorCode fOpenStatus;
399	UResourceBundle fLocales = nullptr*;
400	UResourceBundle fRes = nullptr*;
401	};
402
403	U_NAMESPACE_END
404
405	#endif /* #if !UCONFIG_NO_FORMATTING */
406
407	#endif // _PLURRULE_IMPL
408	//eof
409

Browse the source code of engine/third_party/icu/source/i18n/plurrule_impl.h