1// © 2018 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7#ifndef __SOURCE_NUMBER_SKELETONS_H__
8#define __SOURCE_NUMBER_SKELETONS_H__
9
10#include "number_types.h"
11#include "numparse_types.h"
12#include "unicode/ucharstrie.h"
13#include "string_segment.h"
14
15U_NAMESPACE_BEGIN
16namespace number {
17namespace impl {
18
19// Forward-declaration
20struct SeenMacroProps;
21
22// namespace for enums and entrypoint functions
23namespace skeleton {
24
25///////////////////////////////////////////////////////////////////////////////////////
26// NOTE: For an example of how to add a new stem to the number skeleton parser, see: //
27// http://bugs.icu-project.org/trac/changeset/41193 //
28///////////////////////////////////////////////////////////////////////////////////////
29
30/**
31 * While parsing a skeleton, this enum records what type of option we expect to find next.
32 */
33enum ParseState {
34
35 // Section 0: We expect whitespace or a stem, but not an option:
36
37 STATE_NULL,
38
39 // Section 1: We might accept an option, but it is not required:
40
41 STATE_SCIENTIFIC,
42 STATE_FRACTION_PRECISION,
43
44 // Section 2: An option is required:
45
46 STATE_INCREMENT_PRECISION,
47 STATE_MEASURE_UNIT,
48 STATE_PER_MEASURE_UNIT,
49 STATE_IDENTIFIER_UNIT,
50 STATE_CURRENCY_UNIT,
51 STATE_INTEGER_WIDTH,
52 STATE_NUMBERING_SYSTEM,
53 STATE_SCALE,
54};
55
56/**
57 * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
58 * string literal written in upper snake case.
59 *
60 * @see StemToObject
61 * @see #SERIALIZED_STEM_TRIE
62 */
63enum StemEnum {
64
65 // Section 1: Stems that do not require an option:
66
67 STEM_COMPACT_SHORT,
68 STEM_COMPACT_LONG,
69 STEM_SCIENTIFIC,
70 STEM_ENGINEERING,
71 STEM_NOTATION_SIMPLE,
72 STEM_BASE_UNIT,
73 STEM_PERCENT,
74 STEM_PERMILLE,
75 STEM_PERCENT_100, // concise-only
76 STEM_PRECISION_INTEGER,
77 STEM_PRECISION_UNLIMITED,
78 STEM_PRECISION_CURRENCY_STANDARD,
79 STEM_PRECISION_CURRENCY_CASH,
80 STEM_ROUNDING_MODE_CEILING,
81 STEM_ROUNDING_MODE_FLOOR,
82 STEM_ROUNDING_MODE_DOWN,
83 STEM_ROUNDING_MODE_UP,
84 STEM_ROUNDING_MODE_HALF_EVEN,
85 STEM_ROUNDING_MODE_HALF_DOWN,
86 STEM_ROUNDING_MODE_HALF_UP,
87 STEM_ROUNDING_MODE_UNNECESSARY,
88 STEM_GROUP_OFF,
89 STEM_GROUP_MIN2,
90 STEM_GROUP_AUTO,
91 STEM_GROUP_ON_ALIGNED,
92 STEM_GROUP_THOUSANDS,
93 STEM_LATIN,
94 STEM_UNIT_WIDTH_NARROW,
95 STEM_UNIT_WIDTH_SHORT,
96 STEM_UNIT_WIDTH_FULL_NAME,
97 STEM_UNIT_WIDTH_ISO_CODE,
98 STEM_UNIT_WIDTH_HIDDEN,
99 STEM_SIGN_AUTO,
100 STEM_SIGN_ALWAYS,
101 STEM_SIGN_NEVER,
102 STEM_SIGN_ACCOUNTING,
103 STEM_SIGN_ACCOUNTING_ALWAYS,
104 STEM_SIGN_EXCEPT_ZERO,
105 STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
106 STEM_DECIMAL_AUTO,
107 STEM_DECIMAL_ALWAYS,
108
109 // Section 2: Stems that DO require an option:
110
111 STEM_PRECISION_INCREMENT,
112 STEM_MEASURE_UNIT,
113 STEM_PER_MEASURE_UNIT,
114 STEM_UNIT,
115 STEM_CURRENCY,
116 STEM_INTEGER_WIDTH,
117 STEM_NUMBERING_SYSTEM,
118 STEM_SCALE,
119};
120
121/** Default wildcard char, accepted on input and printed in output */
122constexpr char16_t kWildcardChar = u'*';
123
124/** Alternative wildcard char, accept on input but not printed in output */
125constexpr char16_t kAltWildcardChar = u'+';
126
127/** Checks whether the char is a wildcard on input */
128inline bool isWildcardChar(char16_t c) {
129 return c == kWildcardChar || c == kAltWildcardChar;
130}
131
132/**
133 * Creates a NumberFormatter corresponding to the given skeleton string.
134 *
135 * @param skeletonString
136 * A number skeleton string, possibly not in its shortest form.
137 * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
138 */
139UnlocalizedNumberFormatter create(
140 const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
141
142/**
143 * Create a skeleton string corresponding to the given NumberFormatter.
144 *
145 * @param macros
146 * The NumberFormatter options object.
147 * @return A skeleton string in normalized form.
148 */
149UnicodeString generate(const MacroProps& macros, UErrorCode& status);
150
151/**
152 * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
153 *
154 * Internal: use the create() endpoint instead of this function.
155 */
156MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
157
158/**
159 * Given that the current segment represents a stem, parse it and save the result.
160 *
161 * @return The next state after parsing this stem, corresponding to what subset of options to expect.
162 */
163ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
164 MacroProps& macros, UErrorCode& status);
165
166/**
167 * Given that the current segment represents an option, parse it and save the result.
168 *
169 * @return The next state after parsing this option, corresponding to what subset of options to
170 * expect next.
171 */
172ParseState
173parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
174
175} // namespace skeleton
176
177
178/**
179 * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
180 * applies to only the "Section 1" stems, those that are well-defined without an option.
181 */
182namespace stem_to_object {
183
184Notation notation(skeleton::StemEnum stem);
185
186MeasureUnit unit(skeleton::StemEnum stem);
187
188Precision precision(skeleton::StemEnum stem);
189
190UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
191
192UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
193
194UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
195
196UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
197
198UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
199
200} // namespace stem_to_object
201
202/**
203 * Namespace for utility methods that convert from enums to stem strings. More complex object conversions
204 * take place in the object_to_stem_string namespace.
205 */
206namespace enum_to_stem_string {
207
208void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
209
210void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);
211
212void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
213
214void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
215
216void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
217
218} // namespace enum_to_stem_string
219
220/**
221 * Namespace for utility methods for processing stems and options that cannot be interpreted literally.
222 */
223namespace blueprint_helpers {
224
225/** @return Whether we successfully found and parsed an exponent width option. */
226bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
227
228void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
229
230/** @return Whether we successfully found and parsed an exponent sign option. */
231bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
232
233void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
234
235void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
236
237void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
238
239void generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, UErrorCode& status);
240
241void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
242
243void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
244
245void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
246
247void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
248
249void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
250
251void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
252
253void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
254
255// Note: no generateScientificStem since this syntax was added later in ICU 67
256
257void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
258
259// Note: no generateIntegerStem since this syntax was added later in ICU 67
260
261/** @return Whether we successfully found and parsed a frac-sig option. */
262bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
263
264void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
265
266void
267generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status);
268
269void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
270
271void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
272
273void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
274
275void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
276
277void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
278
279void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
280 UErrorCode& status);
281
282} // namespace blueprint_helpers
283
284/**
285 * Class for utility methods for generating a token corresponding to each macro-prop. Each method
286 * returns whether or not a token was written to the string builder.
287 *
288 * This needs to be a class, not a namespace, so it can be friended.
289 */
290class GeneratorHelpers {
291 public:
292 /**
293 * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
294 * StringBuilder.
295 *
296 * Internal: use the create() endpoint instead of this function.
297 */
298 static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
299
300 private:
301 static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
302
303 static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
304
305 static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
306
307 static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
308
309 static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
310
311 static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
312
313 static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
314
315 static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
316
317 static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
318
319 static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
320
321 static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
322
323 static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
324
325};
326
327/**
328 * Struct for null-checking.
329 * In Java, we can just check the object reference. In C++, we need a different method.
330 */
331struct SeenMacroProps {
332 bool notation = false;
333 bool unit = false;
334 bool perUnit = false;
335 bool precision = false;
336 bool roundingMode = false;
337 bool grouper = false;
338 bool padder = false;
339 bool integerWidth = false;
340 bool symbols = false;
341 bool unitWidth = false;
342 bool sign = false;
343 bool decimal = false;
344 bool scale = false;
345};
346
347} // namespace impl
348} // namespace number
349U_NAMESPACE_END
350
351#endif //__SOURCE_NUMBER_SKELETONS_H__
352#endif /* #if !UCONFIG_NO_FORMATTING */
353