1// © 2018 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7#ifndef __SOURCE_NUMBER_SKELETONS_H__
8#define __SOURCE_NUMBER_SKELETONS_H__
9
10#include "number_types.h"
11#include "numparse_types.h"
12#include "unicode/ucharstrie.h"
13#include "string_segment.h"
14
15U_NAMESPACE_BEGIN
16namespace number {
17namespace impl {
18
19// Forward-declaration
20struct SeenMacroProps;
21
22// namespace for enums and entrypoint functions
23namespace skeleton {
24
25///////////////////////////////////////////////////////////////////////////////////////
26// NOTE: For an example of how to add a new stem to the number skeleton parser, see: //
27// http://bugs.icu-project.org/trac/changeset/41193 //
28///////////////////////////////////////////////////////////////////////////////////////
29
30/**
31 * While parsing a skeleton, this enum records what type of option we expect to find next.
32 */
33enum ParseState {
34
35 // Section 0: We expect whitespace or a stem, but not an option:
36
37 STATE_NULL,
38
39 // Section 1: We might accept an option, but it is not required:
40
41 STATE_SCIENTIFIC,
42 STATE_FRACTION_PRECISION,
43
44 // Section 2: An option is required:
45
46 STATE_INCREMENT_PRECISION,
47 STATE_MEASURE_UNIT,
48 STATE_PER_MEASURE_UNIT,
49 STATE_CURRENCY_UNIT,
50 STATE_INTEGER_WIDTH,
51 STATE_NUMBERING_SYSTEM,
52 STATE_SCALE,
53};
54
55/**
56 * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
57 * string literal written in upper snake case.
58 *
59 * @see StemToObject
60 * @see #SERIALIZED_STEM_TRIE
61 */
62enum StemEnum {
63
64 // Section 1: Stems that do not require an option:
65
66 STEM_COMPACT_SHORT,
67 STEM_COMPACT_LONG,
68 STEM_SCIENTIFIC,
69 STEM_ENGINEERING,
70 STEM_NOTATION_SIMPLE,
71 STEM_BASE_UNIT,
72 STEM_PERCENT,
73 STEM_PERMILLE,
74 STEM_PRECISION_INTEGER,
75 STEM_PRECISION_UNLIMITED,
76 STEM_PRECISION_CURRENCY_STANDARD,
77 STEM_PRECISION_CURRENCY_CASH,
78 STEM_ROUNDING_MODE_CEILING,
79 STEM_ROUNDING_MODE_FLOOR,
80 STEM_ROUNDING_MODE_DOWN,
81 STEM_ROUNDING_MODE_UP,
82 STEM_ROUNDING_MODE_HALF_EVEN,
83 STEM_ROUNDING_MODE_HALF_DOWN,
84 STEM_ROUNDING_MODE_HALF_UP,
85 STEM_ROUNDING_MODE_UNNECESSARY,
86 STEM_GROUP_OFF,
87 STEM_GROUP_MIN2,
88 STEM_GROUP_AUTO,
89 STEM_GROUP_ON_ALIGNED,
90 STEM_GROUP_THOUSANDS,
91 STEM_LATIN,
92 STEM_UNIT_WIDTH_NARROW,
93 STEM_UNIT_WIDTH_SHORT,
94 STEM_UNIT_WIDTH_FULL_NAME,
95 STEM_UNIT_WIDTH_ISO_CODE,
96 STEM_UNIT_WIDTH_HIDDEN,
97 STEM_SIGN_AUTO,
98 STEM_SIGN_ALWAYS,
99 STEM_SIGN_NEVER,
100 STEM_SIGN_ACCOUNTING,
101 STEM_SIGN_ACCOUNTING_ALWAYS,
102 STEM_SIGN_EXCEPT_ZERO,
103 STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
104 STEM_DECIMAL_AUTO,
105 STEM_DECIMAL_ALWAYS,
106
107 // Section 2: Stems that DO require an option:
108
109 STEM_PRECISION_INCREMENT,
110 STEM_MEASURE_UNIT,
111 STEM_PER_MEASURE_UNIT,
112 STEM_CURRENCY,
113 STEM_INTEGER_WIDTH,
114 STEM_NUMBERING_SYSTEM,
115 STEM_SCALE,
116};
117
118/**
119 * Creates a NumberFormatter corresponding to the given skeleton string.
120 *
121 * @param skeletonString
122 * A number skeleton string, possibly not in its shortest form.
123 * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
124 */
125UnlocalizedNumberFormatter create(
126 const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
127
128/**
129 * Create a skeleton string corresponding to the given NumberFormatter.
130 *
131 * @param macros
132 * The NumberFormatter options object.
133 * @return A skeleton string in normalized form.
134 */
135UnicodeString generate(const MacroProps& macros, UErrorCode& status);
136
137/**
138 * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
139 *
140 * Internal: use the create() endpoint instead of this function.
141 */
142MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
143
144/**
145 * Given that the current segment represents a stem, parse it and save the result.
146 *
147 * @return The next state after parsing this stem, corresponding to what subset of options to expect.
148 */
149ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
150 MacroProps& macros, UErrorCode& status);
151
152/**
153 * Given that the current segment represents an option, parse it and save the result.
154 *
155 * @return The next state after parsing this option, corresponding to what subset of options to
156 * expect next.
157 */
158ParseState
159parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
160
161} // namespace skeleton
162
163
164/**
165 * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
166 * applies to only the "Section 1" stems, those that are well-defined without an option.
167 */
168namespace stem_to_object {
169
170Notation notation(skeleton::StemEnum stem);
171
172MeasureUnit unit(skeleton::StemEnum stem);
173
174Precision precision(skeleton::StemEnum stem);
175
176UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
177
178UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
179
180UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
181
182UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
183
184UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
185
186} // namespace stem_to_object
187
188/**
189 * Namespace for utility methods that convert from enums to stem strings. More complex object conversions
190 * take place in the object_to_stem_string namespace.
191 */
192namespace enum_to_stem_string {
193
194void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
195
196void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);
197
198void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
199
200void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
201
202void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
203
204} // namespace enum_to_stem_string
205
206/**
207 * Namespace for utility methods for processing stems and options that cannot be interpreted literally.
208 */
209namespace blueprint_helpers {
210
211/** @return Whether we successfully found and parsed an exponent width option. */
212bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
213
214void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
215
216/** @return Whether we successfully found and parsed an exponent sign option. */
217bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
218
219void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
220
221void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
222
223void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
224
225void generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, UErrorCode& status);
226
227void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
228
229void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
230
231void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
232
233void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
234
235void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
236
237/** @return Whether we successfully found and parsed a frac-sig option. */
238bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
239
240void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
241
242void
243generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status);
244
245void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
246
247void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
248
249void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
250
251void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
252
253void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
254
255void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
256 UErrorCode& status);
257
258} // namespace blueprint_helpers
259
260/**
261 * Class for utility methods for generating a token corresponding to each macro-prop. Each method
262 * returns whether or not a token was written to the string builder.
263 *
264 * This needs to be a class, not a namespace, so it can be friended.
265 */
266class GeneratorHelpers {
267 public:
268 /**
269 * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
270 * StringBuilder.
271 *
272 * Internal: use the create() endpoint instead of this function.
273 */
274 static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
275
276 private:
277 static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
278
279 static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
280
281 static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
282
283 static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
284
285 static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
286
287 static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
288
289 static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
290
291 static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
292
293 static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
294
295 static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
296
297 static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
298
299 static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
300
301};
302
303/**
304 * Struct for null-checking.
305 * In Java, we can just check the object reference. In C++, we need a different method.
306 */
307struct SeenMacroProps {
308 bool notation = false;
309 bool unit = false;
310 bool perUnit = false;
311 bool precision = false;
312 bool roundingMode = false;
313 bool grouper = false;
314 bool padder = false;
315 bool integerWidth = false;
316 bool symbols = false;
317 bool unitWidth = false;
318 bool sign = false;
319 bool decimal = false;
320 bool scale = false;
321};
322
323} // namespace impl
324} // namespace number
325U_NAMESPACE_END
326
327#endif //__SOURCE_NUMBER_SKELETONS_H__
328#endif /* #if !UCONFIG_NO_FORMATTING */
329