1 | // © 2018 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | #include "unicode/utypes.h" |
5 | |
6 | #if !UCONFIG_NO_FORMATTING |
7 | #ifndef __SOURCE_NUMBER_SKELETONS_H__ |
8 | #define __SOURCE_NUMBER_SKELETONS_H__ |
9 | |
10 | #include "number_types.h" |
11 | #include "numparse_types.h" |
12 | #include "unicode/ucharstrie.h" |
13 | #include "string_segment.h" |
14 | |
15 | U_NAMESPACE_BEGIN |
16 | namespace number { |
17 | namespace impl { |
18 | |
19 | // Forward-declaration |
20 | struct SeenMacroProps; |
21 | |
22 | // namespace for enums and entrypoint functions |
23 | namespace skeleton { |
24 | |
25 | /////////////////////////////////////////////////////////////////////////////////////// |
26 | // NOTE: For an example of how to add a new stem to the number skeleton parser, see: // |
27 | // http://bugs.icu-project.org/trac/changeset/41193 // |
28 | /////////////////////////////////////////////////////////////////////////////////////// |
29 | |
30 | /** |
31 | * While parsing a skeleton, this enum records what type of option we expect to find next. |
32 | */ |
33 | enum ParseState { |
34 | |
35 | // Section 0: We expect whitespace or a stem, but not an option: |
36 | |
37 | STATE_NULL, |
38 | |
39 | // Section 1: We might accept an option, but it is not required: |
40 | |
41 | STATE_SCIENTIFIC, |
42 | STATE_FRACTION_PRECISION, |
43 | |
44 | // Section 2: An option is required: |
45 | |
46 | STATE_INCREMENT_PRECISION, |
47 | STATE_MEASURE_UNIT, |
48 | STATE_PER_MEASURE_UNIT, |
49 | STATE_CURRENCY_UNIT, |
50 | STATE_INTEGER_WIDTH, |
51 | STATE_NUMBERING_SYSTEM, |
52 | STATE_SCALE, |
53 | }; |
54 | |
55 | /** |
56 | * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem |
57 | * string literal written in upper snake case. |
58 | * |
59 | * @see StemToObject |
60 | * @see #SERIALIZED_STEM_TRIE |
61 | */ |
62 | enum { |
63 | |
64 | // Section 1: Stems that do not require an option: |
65 | |
66 | STEM_COMPACT_SHORT, |
67 | STEM_COMPACT_LONG, |
68 | STEM_SCIENTIFIC, |
69 | STEM_ENGINEERING, |
70 | STEM_NOTATION_SIMPLE, |
71 | STEM_BASE_UNIT, |
72 | STEM_PERCENT, |
73 | STEM_PERMILLE, |
74 | STEM_PRECISION_INTEGER, |
75 | STEM_PRECISION_UNLIMITED, |
76 | STEM_PRECISION_CURRENCY_STANDARD, |
77 | STEM_PRECISION_CURRENCY_CASH, |
78 | STEM_ROUNDING_MODE_CEILING, |
79 | STEM_ROUNDING_MODE_FLOOR, |
80 | STEM_ROUNDING_MODE_DOWN, |
81 | STEM_ROUNDING_MODE_UP, |
82 | STEM_ROUNDING_MODE_HALF_EVEN, |
83 | STEM_ROUNDING_MODE_HALF_DOWN, |
84 | STEM_ROUNDING_MODE_HALF_UP, |
85 | STEM_ROUNDING_MODE_UNNECESSARY, |
86 | STEM_GROUP_OFF, |
87 | STEM_GROUP_MIN2, |
88 | STEM_GROUP_AUTO, |
89 | STEM_GROUP_ON_ALIGNED, |
90 | STEM_GROUP_THOUSANDS, |
91 | STEM_LATIN, |
92 | STEM_UNIT_WIDTH_NARROW, |
93 | STEM_UNIT_WIDTH_SHORT, |
94 | STEM_UNIT_WIDTH_FULL_NAME, |
95 | STEM_UNIT_WIDTH_ISO_CODE, |
96 | STEM_UNIT_WIDTH_HIDDEN, |
97 | STEM_SIGN_AUTO, |
98 | STEM_SIGN_ALWAYS, |
99 | STEM_SIGN_NEVER, |
100 | STEM_SIGN_ACCOUNTING, |
101 | STEM_SIGN_ACCOUNTING_ALWAYS, |
102 | STEM_SIGN_EXCEPT_ZERO, |
103 | STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, |
104 | STEM_DECIMAL_AUTO, |
105 | STEM_DECIMAL_ALWAYS, |
106 | |
107 | // Section 2: Stems that DO require an option: |
108 | |
109 | STEM_PRECISION_INCREMENT, |
110 | STEM_MEASURE_UNIT, |
111 | STEM_PER_MEASURE_UNIT, |
112 | STEM_CURRENCY, |
113 | STEM_INTEGER_WIDTH, |
114 | STEM_NUMBERING_SYSTEM, |
115 | STEM_SCALE, |
116 | }; |
117 | |
118 | /** |
119 | * Creates a NumberFormatter corresponding to the given skeleton string. |
120 | * |
121 | * @param skeletonString |
122 | * A number skeleton string, possibly not in its shortest form. |
123 | * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string. |
124 | */ |
125 | UnlocalizedNumberFormatter create( |
126 | const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status); |
127 | |
128 | /** |
129 | * Create a skeleton string corresponding to the given NumberFormatter. |
130 | * |
131 | * @param macros |
132 | * The NumberFormatter options object. |
133 | * @return A skeleton string in normalized form. |
134 | */ |
135 | UnicodeString generate(const MacroProps& macros, UErrorCode& status); |
136 | |
137 | /** |
138 | * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop. |
139 | * |
140 | * Internal: use the create() endpoint instead of this function. |
141 | */ |
142 | MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status); |
143 | |
144 | /** |
145 | * Given that the current segment represents a stem, parse it and save the result. |
146 | * |
147 | * @return The next state after parsing this stem, corresponding to what subset of options to expect. |
148 | */ |
149 | ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen, |
150 | MacroProps& macros, UErrorCode& status); |
151 | |
152 | /** |
153 | * Given that the current segment represents an option, parse it and save the result. |
154 | * |
155 | * @return The next state after parsing this option, corresponding to what subset of options to |
156 | * expect next. |
157 | */ |
158 | ParseState |
159 | parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
160 | |
161 | } // namespace skeleton |
162 | |
163 | |
164 | /** |
165 | * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This |
166 | * applies to only the "Section 1" stems, those that are well-defined without an option. |
167 | */ |
168 | namespace stem_to_object { |
169 | |
170 | Notation (skeleton::StemEnum stem); |
171 | |
172 | MeasureUnit (skeleton::StemEnum stem); |
173 | |
174 | Precision (skeleton::StemEnum stem); |
175 | |
176 | UNumberFormatRoundingMode (skeleton::StemEnum stem); |
177 | |
178 | UNumberGroupingStrategy (skeleton::StemEnum stem); |
179 | |
180 | UNumberUnitWidth (skeleton::StemEnum stem); |
181 | |
182 | UNumberSignDisplay (skeleton::StemEnum stem); |
183 | |
184 | UNumberDecimalSeparatorDisplay (skeleton::StemEnum stem); |
185 | |
186 | } // namespace stem_to_object |
187 | |
188 | /** |
189 | * Namespace for utility methods that convert from enums to stem strings. More complex object conversions |
190 | * take place in the object_to_stem_string namespace. |
191 | */ |
192 | namespace enum_to_stem_string { |
193 | |
194 | void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb); |
195 | |
196 | void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb); |
197 | |
198 | void unitWidth(UNumberUnitWidth value, UnicodeString& sb); |
199 | |
200 | void signDisplay(UNumberSignDisplay value, UnicodeString& sb); |
201 | |
202 | void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb); |
203 | |
204 | } // namespace enum_to_stem_string |
205 | |
206 | /** |
207 | * Namespace for utility methods for processing stems and options that cannot be interpreted literally. |
208 | */ |
209 | namespace blueprint_helpers { |
210 | |
211 | /** @return Whether we successfully found and parsed an exponent width option. */ |
212 | bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
213 | |
214 | void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status); |
215 | |
216 | /** @return Whether we successfully found and parsed an exponent sign option. */ |
217 | bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
218 | |
219 | void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
220 | |
221 | void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status); |
222 | |
223 | void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
224 | |
225 | void generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, UErrorCode& status); |
226 | |
227 | void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
228 | |
229 | void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
230 | |
231 | void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status); |
232 | |
233 | void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
234 | |
235 | void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status); |
236 | |
237 | /** @return Whether we successfully found and parsed a frac-sig option. */ |
238 | bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
239 | |
240 | void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
241 | |
242 | void |
243 | generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status); |
244 | |
245 | void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
246 | |
247 | void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status); |
248 | |
249 | void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
250 | |
251 | void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status); |
252 | |
253 | void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
254 | |
255 | void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb, |
256 | UErrorCode& status); |
257 | |
258 | } // namespace blueprint_helpers |
259 | |
260 | /** |
261 | * Class for utility methods for generating a token corresponding to each macro-prop. Each method |
262 | * returns whether or not a token was written to the string builder. |
263 | * |
264 | * This needs to be a class, not a namespace, so it can be friended. |
265 | */ |
266 | class GeneratorHelpers { |
267 | public: |
268 | /** |
269 | * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given |
270 | * StringBuilder. |
271 | * |
272 | * Internal: use the create() endpoint instead of this function. |
273 | */ |
274 | static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
275 | |
276 | private: |
277 | static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
278 | |
279 | static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
280 | |
281 | static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
282 | |
283 | static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
284 | |
285 | static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
286 | |
287 | static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
288 | |
289 | static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
290 | |
291 | static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
292 | |
293 | static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
294 | |
295 | static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
296 | |
297 | static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
298 | |
299 | static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
300 | |
301 | }; |
302 | |
303 | /** |
304 | * Struct for null-checking. |
305 | * In Java, we can just check the object reference. In C++, we need a different method. |
306 | */ |
307 | struct SeenMacroProps { |
308 | bool notation = false; |
309 | bool unit = false; |
310 | bool perUnit = false; |
311 | bool precision = false; |
312 | bool roundingMode = false; |
313 | bool grouper = false; |
314 | bool padder = false; |
315 | bool integerWidth = false; |
316 | bool symbols = false; |
317 | bool unitWidth = false; |
318 | bool sign = false; |
319 | bool decimal = false; |
320 | bool scale = false; |
321 | }; |
322 | |
323 | } // namespace impl |
324 | } // namespace number |
325 | U_NAMESPACE_END |
326 | |
327 | #endif //__SOURCE_NUMBER_SKELETONS_H__ |
328 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
329 | |