| 1 | // © 2018 and later: Unicode, Inc. and others. | 
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
| 3 |  | 
| 4 | #include "unicode/utypes.h" | 
| 5 |  | 
| 6 | #if !UCONFIG_NO_FORMATTING | 
| 7 | #ifndef __SOURCE_NUMBER_SKELETONS_H__ | 
| 8 | #define __SOURCE_NUMBER_SKELETONS_H__ | 
| 9 |  | 
| 10 | #include "number_types.h" | 
| 11 | #include "numparse_types.h" | 
| 12 | #include "unicode/ucharstrie.h" | 
| 13 | #include "string_segment.h" | 
| 14 |  | 
| 15 | U_NAMESPACE_BEGIN | 
| 16 | namespace number { | 
| 17 | namespace impl { | 
| 18 |  | 
| 19 | // Forward-declaration | 
| 20 | struct SeenMacroProps; | 
| 21 |  | 
| 22 | // namespace for enums and entrypoint functions | 
| 23 | namespace skeleton { | 
| 24 |  | 
| 25 | /////////////////////////////////////////////////////////////////////////////////////// | 
| 26 | // NOTE: For an example of how to add a new stem to the number skeleton parser, see: // | 
| 27 | // http://bugs.icu-project.org/trac/changeset/41193                                  // | 
| 28 | /////////////////////////////////////////////////////////////////////////////////////// | 
| 29 |  | 
| 30 | /** | 
| 31 |  * While parsing a skeleton, this enum records what type of option we expect to find next. | 
| 32 |  */ | 
| 33 | enum ParseState { | 
| 34 |  | 
| 35 |     // Section 0: We expect whitespace or a stem, but not an option: | 
| 36 |  | 
| 37 |     STATE_NULL, | 
| 38 |  | 
| 39 |     // Section 1: We might accept an option, but it is not required: | 
| 40 |  | 
| 41 |     STATE_SCIENTIFIC, | 
| 42 |     STATE_FRACTION_PRECISION, | 
| 43 |  | 
| 44 |     // Section 2: An option is required: | 
| 45 |  | 
| 46 |     STATE_INCREMENT_PRECISION, | 
| 47 |     STATE_MEASURE_UNIT, | 
| 48 |     STATE_PER_MEASURE_UNIT, | 
| 49 |     STATE_IDENTIFIER_UNIT, | 
| 50 |     STATE_CURRENCY_UNIT, | 
| 51 |     STATE_INTEGER_WIDTH, | 
| 52 |     STATE_NUMBERING_SYSTEM, | 
| 53 |     STATE_SCALE, | 
| 54 | }; | 
| 55 |  | 
| 56 | /** | 
| 57 |  * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem | 
| 58 |  * string literal written in upper snake case. | 
| 59 |  * | 
| 60 |  * @see StemToObject | 
| 61 |  * @see #SERIALIZED_STEM_TRIE | 
| 62 |  */ | 
| 63 | enum  { | 
| 64 |  | 
| 65 |     // Section 1: Stems that do not require an option: | 
| 66 |  | 
| 67 |     STEM_COMPACT_SHORT, | 
| 68 |     STEM_COMPACT_LONG, | 
| 69 |     STEM_SCIENTIFIC, | 
| 70 |     STEM_ENGINEERING, | 
| 71 |     STEM_NOTATION_SIMPLE, | 
| 72 |     STEM_BASE_UNIT, | 
| 73 |     STEM_PERCENT, | 
| 74 |     STEM_PERMILLE, | 
| 75 |     STEM_PERCENT_100, // concise-only | 
| 76 |     STEM_PRECISION_INTEGER, | 
| 77 |     STEM_PRECISION_UNLIMITED, | 
| 78 |     STEM_PRECISION_CURRENCY_STANDARD, | 
| 79 |     STEM_PRECISION_CURRENCY_CASH, | 
| 80 |     STEM_ROUNDING_MODE_CEILING, | 
| 81 |     STEM_ROUNDING_MODE_FLOOR, | 
| 82 |     STEM_ROUNDING_MODE_DOWN, | 
| 83 |     STEM_ROUNDING_MODE_UP, | 
| 84 |     STEM_ROUNDING_MODE_HALF_EVEN, | 
| 85 |     STEM_ROUNDING_MODE_HALF_DOWN, | 
| 86 |     STEM_ROUNDING_MODE_HALF_UP, | 
| 87 |     STEM_ROUNDING_MODE_UNNECESSARY, | 
| 88 |     STEM_GROUP_OFF, | 
| 89 |     STEM_GROUP_MIN2, | 
| 90 |     STEM_GROUP_AUTO, | 
| 91 |     STEM_GROUP_ON_ALIGNED, | 
| 92 |     STEM_GROUP_THOUSANDS, | 
| 93 |     STEM_LATIN, | 
| 94 |     STEM_UNIT_WIDTH_NARROW, | 
| 95 |     STEM_UNIT_WIDTH_SHORT, | 
| 96 |     STEM_UNIT_WIDTH_FULL_NAME, | 
| 97 |     STEM_UNIT_WIDTH_ISO_CODE, | 
| 98 |     STEM_UNIT_WIDTH_HIDDEN, | 
| 99 |     STEM_SIGN_AUTO, | 
| 100 |     STEM_SIGN_ALWAYS, | 
| 101 |     STEM_SIGN_NEVER, | 
| 102 |     STEM_SIGN_ACCOUNTING, | 
| 103 |     STEM_SIGN_ACCOUNTING_ALWAYS, | 
| 104 |     STEM_SIGN_EXCEPT_ZERO, | 
| 105 |     STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, | 
| 106 |     STEM_DECIMAL_AUTO, | 
| 107 |     STEM_DECIMAL_ALWAYS, | 
| 108 |  | 
| 109 |     // Section 2: Stems that DO require an option: | 
| 110 |  | 
| 111 |     STEM_PRECISION_INCREMENT, | 
| 112 |     STEM_MEASURE_UNIT, | 
| 113 |     STEM_PER_MEASURE_UNIT, | 
| 114 |     STEM_UNIT, | 
| 115 |     STEM_CURRENCY, | 
| 116 |     STEM_INTEGER_WIDTH, | 
| 117 |     STEM_NUMBERING_SYSTEM, | 
| 118 |     STEM_SCALE, | 
| 119 | }; | 
| 120 |  | 
| 121 | /** Default wildcard char, accepted on input and printed in output */ | 
| 122 | constexpr char16_t kWildcardChar = u'*'; | 
| 123 |  | 
| 124 | /** Alternative wildcard char, accept on input but not printed in output */ | 
| 125 | constexpr char16_t kAltWildcardChar = u'+'; | 
| 126 |  | 
| 127 | /** Checks whether the char is a wildcard on input */ | 
| 128 | inline bool isWildcardChar(char16_t c) { | 
| 129 |     return c == kWildcardChar || c == kAltWildcardChar; | 
| 130 | } | 
| 131 |  | 
| 132 | /** | 
| 133 |  * Creates a NumberFormatter corresponding to the given skeleton string. | 
| 134 |  * | 
| 135 |  * @param skeletonString | 
| 136 |  *            A number skeleton string, possibly not in its shortest form. | 
| 137 |  * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string. | 
| 138 |  */ | 
| 139 | UnlocalizedNumberFormatter create( | 
| 140 |     const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status); | 
| 141 |  | 
| 142 | /** | 
| 143 |  * Create a skeleton string corresponding to the given NumberFormatter. | 
| 144 |  * | 
| 145 |  * @param macros | 
| 146 |  *            The NumberFormatter options object. | 
| 147 |  * @return A skeleton string in normalized form. | 
| 148 |  */ | 
| 149 | UnicodeString generate(const MacroProps& macros, UErrorCode& status); | 
| 150 |  | 
| 151 | /** | 
| 152 |  * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop. | 
| 153 |  * | 
| 154 |  * Internal: use the create() endpoint instead of this function. | 
| 155 |  */ | 
| 156 | MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status); | 
| 157 |  | 
| 158 | /** | 
| 159 |  * Given that the current segment represents a stem, parse it and save the result. | 
| 160 |  * | 
| 161 |  * @return The next state after parsing this stem, corresponding to what subset of options to expect. | 
| 162 |  */ | 
| 163 | ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen, | 
| 164 |                      MacroProps& macros, UErrorCode& status); | 
| 165 |  | 
| 166 | /** | 
| 167 |  * Given that the current segment represents an option, parse it and save the result. | 
| 168 |  * | 
| 169 |  * @return The next state after parsing this option, corresponding to what subset of options to | 
| 170 |  *         expect next. | 
| 171 |  */ | 
| 172 | ParseState | 
| 173 | parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 174 |  | 
| 175 | } // namespace skeleton | 
| 176 |  | 
| 177 |  | 
| 178 | /** | 
| 179 |  * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This | 
| 180 |  * applies to only the "Section 1" stems, those that are well-defined without an option. | 
| 181 |  */ | 
| 182 | namespace stem_to_object { | 
| 183 |  | 
| 184 | Notation (skeleton::StemEnum stem); | 
| 185 |  | 
| 186 | MeasureUnit (skeleton::StemEnum stem); | 
| 187 |  | 
| 188 | Precision (skeleton::StemEnum stem); | 
| 189 |  | 
| 190 | UNumberFormatRoundingMode (skeleton::StemEnum stem); | 
| 191 |  | 
| 192 | UNumberGroupingStrategy (skeleton::StemEnum stem); | 
| 193 |  | 
| 194 | UNumberUnitWidth (skeleton::StemEnum stem); | 
| 195 |  | 
| 196 | UNumberSignDisplay (skeleton::StemEnum stem); | 
| 197 |  | 
| 198 | UNumberDecimalSeparatorDisplay (skeleton::StemEnum stem); | 
| 199 |  | 
| 200 | } // namespace stem_to_object | 
| 201 |  | 
| 202 | /** | 
| 203 |  * Namespace for utility methods that convert from enums to stem strings. More complex object conversions | 
| 204 |  * take place in the object_to_stem_string namespace. | 
| 205 |  */ | 
| 206 | namespace enum_to_stem_string { | 
| 207 |  | 
| 208 | void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb); | 
| 209 |  | 
| 210 | void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb); | 
| 211 |  | 
| 212 | void unitWidth(UNumberUnitWidth value, UnicodeString& sb); | 
| 213 |  | 
| 214 | void signDisplay(UNumberSignDisplay value, UnicodeString& sb); | 
| 215 |  | 
| 216 | void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb); | 
| 217 |  | 
| 218 | } // namespace enum_to_stem_string | 
| 219 |  | 
| 220 | /** | 
| 221 |  * Namespace for utility methods for processing stems and options that cannot be interpreted literally. | 
| 222 |  */ | 
| 223 | namespace blueprint_helpers { | 
| 224 |  | 
| 225 | /** @return Whether we successfully found and parsed an exponent width option. */ | 
| 226 | bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 227 |  | 
| 228 | void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status); | 
| 229 |  | 
| 230 | /** @return Whether we successfully found and parsed an exponent sign option. */ | 
| 231 | bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 232 |  | 
| 233 | void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 234 |  | 
| 235 | void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status); | 
| 236 |  | 
| 237 | void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 238 |  | 
| 239 | void generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, UErrorCode& status); | 
| 240 |  | 
| 241 | void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 242 |  | 
| 243 | void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 244 |  | 
| 245 | void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 246 |  | 
| 247 | void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status); | 
| 248 |  | 
| 249 | void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 250 |  | 
| 251 | void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status); | 
| 252 |  | 
| 253 | void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 254 |  | 
| 255 | // Note: no generateScientificStem since this syntax was added later in ICU 67 | 
| 256 |  | 
| 257 | void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 258 |  | 
| 259 | // Note: no generateIntegerStem since this syntax was added later in ICU 67 | 
| 260 |  | 
| 261 | /** @return Whether we successfully found and parsed a frac-sig option. */ | 
| 262 | bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 263 |  | 
| 264 | void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 265 |  | 
| 266 | void | 
| 267 | generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status); | 
| 268 |  | 
| 269 | void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 270 |  | 
| 271 | void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status); | 
| 272 |  | 
| 273 | void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 274 |  | 
| 275 | void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status); | 
| 276 |  | 
| 277 | void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); | 
| 278 |  | 
| 279 | void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb, | 
| 280 |                               UErrorCode& status); | 
| 281 |  | 
| 282 | } // namespace blueprint_helpers | 
| 283 |  | 
| 284 | /** | 
| 285 |  * Class for utility methods for generating a token corresponding to each macro-prop. Each method | 
| 286 |  * returns whether or not a token was written to the string builder. | 
| 287 |  * | 
| 288 |  * This needs to be a class, not a namespace, so it can be friended. | 
| 289 |  */ | 
| 290 | class GeneratorHelpers { | 
| 291 |   public: | 
| 292 |     /** | 
| 293 |      * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given | 
| 294 |      * StringBuilder. | 
| 295 |      * | 
| 296 |      * Internal: use the create() endpoint instead of this function. | 
| 297 |      */ | 
| 298 |     static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 299 |  | 
| 300 |   private: | 
| 301 |     static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 302 |  | 
| 303 |     static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 304 |  | 
| 305 |     static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 306 |  | 
| 307 |     static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 308 |  | 
| 309 |     static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 310 |  | 
| 311 |     static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 312 |  | 
| 313 |     static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 314 |  | 
| 315 |     static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 316 |  | 
| 317 |     static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 318 |  | 
| 319 |     static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 320 |  | 
| 321 |     static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 322 |  | 
| 323 |     static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); | 
| 324 |  | 
| 325 | }; | 
| 326 |  | 
| 327 | /** | 
| 328 |  * Struct for null-checking. | 
| 329 |  * In Java, we can just check the object reference. In C++, we need a different method. | 
| 330 |  */ | 
| 331 | struct SeenMacroProps { | 
| 332 |     bool notation = false; | 
| 333 |     bool unit = false; | 
| 334 |     bool perUnit = false; | 
| 335 |     bool precision = false; | 
| 336 |     bool roundingMode = false; | 
| 337 |     bool grouper = false; | 
| 338 |     bool padder = false; | 
| 339 |     bool integerWidth = false; | 
| 340 |     bool symbols = false; | 
| 341 |     bool unitWidth = false; | 
| 342 |     bool sign = false; | 
| 343 |     bool decimal = false; | 
| 344 |     bool scale = false; | 
| 345 | }; | 
| 346 |  | 
| 347 | } // namespace impl | 
| 348 | } // namespace number | 
| 349 | U_NAMESPACE_END | 
| 350 |  | 
| 351 | #endif //__SOURCE_NUMBER_SKELETONS_H__ | 
| 352 | #endif /* #if !UCONFIG_NO_FORMATTING */ | 
| 353 |  |