1 | // © 2018 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | #include "unicode/utypes.h" |
5 | |
6 | #if !UCONFIG_NO_FORMATTING |
7 | #ifndef __SOURCE_NUMBER_SKELETONS_H__ |
8 | #define __SOURCE_NUMBER_SKELETONS_H__ |
9 | |
10 | #include "number_types.h" |
11 | #include "numparse_types.h" |
12 | #include "unicode/ucharstrie.h" |
13 | #include "string_segment.h" |
14 | |
15 | U_NAMESPACE_BEGIN |
16 | namespace number { |
17 | namespace impl { |
18 | |
19 | // Forward-declaration |
20 | struct SeenMacroProps; |
21 | |
22 | // namespace for enums and entrypoint functions |
23 | namespace skeleton { |
24 | |
25 | /////////////////////////////////////////////////////////////////////////////////////// |
26 | // NOTE: For an example of how to add a new stem to the number skeleton parser, see: // |
27 | // http://bugs.icu-project.org/trac/changeset/41193 // |
28 | /////////////////////////////////////////////////////////////////////////////////////// |
29 | |
30 | /** |
31 | * While parsing a skeleton, this enum records what type of option we expect to find next. |
32 | */ |
33 | enum ParseState { |
34 | |
35 | // Section 0: We expect whitespace or a stem, but not an option: |
36 | |
37 | STATE_NULL, |
38 | |
39 | // Section 1: We might accept an option, but it is not required: |
40 | |
41 | STATE_SCIENTIFIC, |
42 | STATE_FRACTION_PRECISION, |
43 | |
44 | // Section 2: An option is required: |
45 | |
46 | STATE_INCREMENT_PRECISION, |
47 | STATE_MEASURE_UNIT, |
48 | STATE_PER_MEASURE_UNIT, |
49 | STATE_IDENTIFIER_UNIT, |
50 | STATE_CURRENCY_UNIT, |
51 | STATE_INTEGER_WIDTH, |
52 | STATE_NUMBERING_SYSTEM, |
53 | STATE_SCALE, |
54 | }; |
55 | |
56 | /** |
57 | * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem |
58 | * string literal written in upper snake case. |
59 | * |
60 | * @see StemToObject |
61 | * @see #SERIALIZED_STEM_TRIE |
62 | */ |
63 | enum { |
64 | |
65 | // Section 1: Stems that do not require an option: |
66 | |
67 | STEM_COMPACT_SHORT, |
68 | STEM_COMPACT_LONG, |
69 | STEM_SCIENTIFIC, |
70 | STEM_ENGINEERING, |
71 | STEM_NOTATION_SIMPLE, |
72 | STEM_BASE_UNIT, |
73 | STEM_PERCENT, |
74 | STEM_PERMILLE, |
75 | STEM_PERCENT_100, // concise-only |
76 | STEM_PRECISION_INTEGER, |
77 | STEM_PRECISION_UNLIMITED, |
78 | STEM_PRECISION_CURRENCY_STANDARD, |
79 | STEM_PRECISION_CURRENCY_CASH, |
80 | STEM_ROUNDING_MODE_CEILING, |
81 | STEM_ROUNDING_MODE_FLOOR, |
82 | STEM_ROUNDING_MODE_DOWN, |
83 | STEM_ROUNDING_MODE_UP, |
84 | STEM_ROUNDING_MODE_HALF_EVEN, |
85 | STEM_ROUNDING_MODE_HALF_DOWN, |
86 | STEM_ROUNDING_MODE_HALF_UP, |
87 | STEM_ROUNDING_MODE_UNNECESSARY, |
88 | STEM_GROUP_OFF, |
89 | STEM_GROUP_MIN2, |
90 | STEM_GROUP_AUTO, |
91 | STEM_GROUP_ON_ALIGNED, |
92 | STEM_GROUP_THOUSANDS, |
93 | STEM_LATIN, |
94 | STEM_UNIT_WIDTH_NARROW, |
95 | STEM_UNIT_WIDTH_SHORT, |
96 | STEM_UNIT_WIDTH_FULL_NAME, |
97 | STEM_UNIT_WIDTH_ISO_CODE, |
98 | STEM_UNIT_WIDTH_HIDDEN, |
99 | STEM_SIGN_AUTO, |
100 | STEM_SIGN_ALWAYS, |
101 | STEM_SIGN_NEVER, |
102 | STEM_SIGN_ACCOUNTING, |
103 | STEM_SIGN_ACCOUNTING_ALWAYS, |
104 | STEM_SIGN_EXCEPT_ZERO, |
105 | STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, |
106 | STEM_DECIMAL_AUTO, |
107 | STEM_DECIMAL_ALWAYS, |
108 | |
109 | // Section 2: Stems that DO require an option: |
110 | |
111 | STEM_PRECISION_INCREMENT, |
112 | STEM_MEASURE_UNIT, |
113 | STEM_PER_MEASURE_UNIT, |
114 | STEM_UNIT, |
115 | STEM_CURRENCY, |
116 | STEM_INTEGER_WIDTH, |
117 | STEM_NUMBERING_SYSTEM, |
118 | STEM_SCALE, |
119 | }; |
120 | |
121 | /** Default wildcard char, accepted on input and printed in output */ |
122 | constexpr char16_t kWildcardChar = u'*'; |
123 | |
124 | /** Alternative wildcard char, accept on input but not printed in output */ |
125 | constexpr char16_t kAltWildcardChar = u'+'; |
126 | |
127 | /** Checks whether the char is a wildcard on input */ |
128 | inline bool isWildcardChar(char16_t c) { |
129 | return c == kWildcardChar || c == kAltWildcardChar; |
130 | } |
131 | |
132 | /** |
133 | * Creates a NumberFormatter corresponding to the given skeleton string. |
134 | * |
135 | * @param skeletonString |
136 | * A number skeleton string, possibly not in its shortest form. |
137 | * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string. |
138 | */ |
139 | UnlocalizedNumberFormatter create( |
140 | const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status); |
141 | |
142 | /** |
143 | * Create a skeleton string corresponding to the given NumberFormatter. |
144 | * |
145 | * @param macros |
146 | * The NumberFormatter options object. |
147 | * @return A skeleton string in normalized form. |
148 | */ |
149 | UnicodeString generate(const MacroProps& macros, UErrorCode& status); |
150 | |
151 | /** |
152 | * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop. |
153 | * |
154 | * Internal: use the create() endpoint instead of this function. |
155 | */ |
156 | MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status); |
157 | |
158 | /** |
159 | * Given that the current segment represents a stem, parse it and save the result. |
160 | * |
161 | * @return The next state after parsing this stem, corresponding to what subset of options to expect. |
162 | */ |
163 | ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen, |
164 | MacroProps& macros, UErrorCode& status); |
165 | |
166 | /** |
167 | * Given that the current segment represents an option, parse it and save the result. |
168 | * |
169 | * @return The next state after parsing this option, corresponding to what subset of options to |
170 | * expect next. |
171 | */ |
172 | ParseState |
173 | parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
174 | |
175 | } // namespace skeleton |
176 | |
177 | |
178 | /** |
179 | * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This |
180 | * applies to only the "Section 1" stems, those that are well-defined without an option. |
181 | */ |
182 | namespace stem_to_object { |
183 | |
184 | Notation (skeleton::StemEnum stem); |
185 | |
186 | MeasureUnit (skeleton::StemEnum stem); |
187 | |
188 | Precision (skeleton::StemEnum stem); |
189 | |
190 | UNumberFormatRoundingMode (skeleton::StemEnum stem); |
191 | |
192 | UNumberGroupingStrategy (skeleton::StemEnum stem); |
193 | |
194 | UNumberUnitWidth (skeleton::StemEnum stem); |
195 | |
196 | UNumberSignDisplay (skeleton::StemEnum stem); |
197 | |
198 | UNumberDecimalSeparatorDisplay (skeleton::StemEnum stem); |
199 | |
200 | } // namespace stem_to_object |
201 | |
202 | /** |
203 | * Namespace for utility methods that convert from enums to stem strings. More complex object conversions |
204 | * take place in the object_to_stem_string namespace. |
205 | */ |
206 | namespace enum_to_stem_string { |
207 | |
208 | void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb); |
209 | |
210 | void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb); |
211 | |
212 | void unitWidth(UNumberUnitWidth value, UnicodeString& sb); |
213 | |
214 | void signDisplay(UNumberSignDisplay value, UnicodeString& sb); |
215 | |
216 | void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb); |
217 | |
218 | } // namespace enum_to_stem_string |
219 | |
220 | /** |
221 | * Namespace for utility methods for processing stems and options that cannot be interpreted literally. |
222 | */ |
223 | namespace blueprint_helpers { |
224 | |
225 | /** @return Whether we successfully found and parsed an exponent width option. */ |
226 | bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
227 | |
228 | void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status); |
229 | |
230 | /** @return Whether we successfully found and parsed an exponent sign option. */ |
231 | bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
232 | |
233 | void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
234 | |
235 | void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status); |
236 | |
237 | void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
238 | |
239 | void generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, UErrorCode& status); |
240 | |
241 | void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
242 | |
243 | void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
244 | |
245 | void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
246 | |
247 | void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status); |
248 | |
249 | void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
250 | |
251 | void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status); |
252 | |
253 | void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
254 | |
255 | // Note: no generateScientificStem since this syntax was added later in ICU 67 |
256 | |
257 | void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
258 | |
259 | // Note: no generateIntegerStem since this syntax was added later in ICU 67 |
260 | |
261 | /** @return Whether we successfully found and parsed a frac-sig option. */ |
262 | bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
263 | |
264 | void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
265 | |
266 | void |
267 | generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status); |
268 | |
269 | void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
270 | |
271 | void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status); |
272 | |
273 | void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
274 | |
275 | void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status); |
276 | |
277 | void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
278 | |
279 | void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb, |
280 | UErrorCode& status); |
281 | |
282 | } // namespace blueprint_helpers |
283 | |
284 | /** |
285 | * Class for utility methods for generating a token corresponding to each macro-prop. Each method |
286 | * returns whether or not a token was written to the string builder. |
287 | * |
288 | * This needs to be a class, not a namespace, so it can be friended. |
289 | */ |
290 | class GeneratorHelpers { |
291 | public: |
292 | /** |
293 | * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given |
294 | * StringBuilder. |
295 | * |
296 | * Internal: use the create() endpoint instead of this function. |
297 | */ |
298 | static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
299 | |
300 | private: |
301 | static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
302 | |
303 | static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
304 | |
305 | static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
306 | |
307 | static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
308 | |
309 | static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
310 | |
311 | static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
312 | |
313 | static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
314 | |
315 | static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
316 | |
317 | static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
318 | |
319 | static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
320 | |
321 | static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
322 | |
323 | static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
324 | |
325 | }; |
326 | |
327 | /** |
328 | * Struct for null-checking. |
329 | * In Java, we can just check the object reference. In C++, we need a different method. |
330 | */ |
331 | struct SeenMacroProps { |
332 | bool notation = false; |
333 | bool unit = false; |
334 | bool perUnit = false; |
335 | bool precision = false; |
336 | bool roundingMode = false; |
337 | bool grouper = false; |
338 | bool padder = false; |
339 | bool integerWidth = false; |
340 | bool symbols = false; |
341 | bool unitWidth = false; |
342 | bool sign = false; |
343 | bool decimal = false; |
344 | bool scale = false; |
345 | }; |
346 | |
347 | } // namespace impl |
348 | } // namespace number |
349 | U_NAMESPACE_END |
350 | |
351 | #endif //__SOURCE_NUMBER_SKELETONS_H__ |
352 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
353 | |