| 1 | // © 2018 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | |
| 4 | #include "unicode/utypes.h" |
| 5 | |
| 6 | #if !UCONFIG_NO_FORMATTING |
| 7 | #ifndef __SOURCE_NUMBER_SKELETONS_H__ |
| 8 | #define __SOURCE_NUMBER_SKELETONS_H__ |
| 9 | |
| 10 | #include "number_types.h" |
| 11 | #include "numparse_types.h" |
| 12 | #include "unicode/ucharstrie.h" |
| 13 | #include "string_segment.h" |
| 14 | |
| 15 | U_NAMESPACE_BEGIN |
| 16 | namespace number { |
| 17 | namespace impl { |
| 18 | |
| 19 | // Forward-declaration |
| 20 | struct SeenMacroProps; |
| 21 | |
| 22 | // namespace for enums and entrypoint functions |
| 23 | namespace skeleton { |
| 24 | |
| 25 | /////////////////////////////////////////////////////////////////////////////////////// |
| 26 | // NOTE: For an example of how to add a new stem to the number skeleton parser, see: // |
| 27 | // http://bugs.icu-project.org/trac/changeset/41193 // |
| 28 | /////////////////////////////////////////////////////////////////////////////////////// |
| 29 | |
| 30 | /** |
| 31 | * While parsing a skeleton, this enum records what type of option we expect to find next. |
| 32 | */ |
| 33 | enum ParseState { |
| 34 | |
| 35 | // Section 0: We expect whitespace or a stem, but not an option: |
| 36 | |
| 37 | STATE_NULL, |
| 38 | |
| 39 | // Section 1: We might accept an option, but it is not required: |
| 40 | |
| 41 | STATE_SCIENTIFIC, |
| 42 | STATE_FRACTION_PRECISION, |
| 43 | |
| 44 | // Section 2: An option is required: |
| 45 | |
| 46 | STATE_INCREMENT_PRECISION, |
| 47 | STATE_MEASURE_UNIT, |
| 48 | STATE_PER_MEASURE_UNIT, |
| 49 | STATE_IDENTIFIER_UNIT, |
| 50 | STATE_CURRENCY_UNIT, |
| 51 | STATE_INTEGER_WIDTH, |
| 52 | STATE_NUMBERING_SYSTEM, |
| 53 | STATE_SCALE, |
| 54 | }; |
| 55 | |
| 56 | /** |
| 57 | * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem |
| 58 | * string literal written in upper snake case. |
| 59 | * |
| 60 | * @see StemToObject |
| 61 | * @see #SERIALIZED_STEM_TRIE |
| 62 | */ |
| 63 | enum { |
| 64 | |
| 65 | // Section 1: Stems that do not require an option: |
| 66 | |
| 67 | STEM_COMPACT_SHORT, |
| 68 | STEM_COMPACT_LONG, |
| 69 | STEM_SCIENTIFIC, |
| 70 | STEM_ENGINEERING, |
| 71 | STEM_NOTATION_SIMPLE, |
| 72 | STEM_BASE_UNIT, |
| 73 | STEM_PERCENT, |
| 74 | STEM_PERMILLE, |
| 75 | STEM_PERCENT_100, // concise-only |
| 76 | STEM_PRECISION_INTEGER, |
| 77 | STEM_PRECISION_UNLIMITED, |
| 78 | STEM_PRECISION_CURRENCY_STANDARD, |
| 79 | STEM_PRECISION_CURRENCY_CASH, |
| 80 | STEM_ROUNDING_MODE_CEILING, |
| 81 | STEM_ROUNDING_MODE_FLOOR, |
| 82 | STEM_ROUNDING_MODE_DOWN, |
| 83 | STEM_ROUNDING_MODE_UP, |
| 84 | STEM_ROUNDING_MODE_HALF_EVEN, |
| 85 | STEM_ROUNDING_MODE_HALF_DOWN, |
| 86 | STEM_ROUNDING_MODE_HALF_UP, |
| 87 | STEM_ROUNDING_MODE_UNNECESSARY, |
| 88 | STEM_GROUP_OFF, |
| 89 | STEM_GROUP_MIN2, |
| 90 | STEM_GROUP_AUTO, |
| 91 | STEM_GROUP_ON_ALIGNED, |
| 92 | STEM_GROUP_THOUSANDS, |
| 93 | STEM_LATIN, |
| 94 | STEM_UNIT_WIDTH_NARROW, |
| 95 | STEM_UNIT_WIDTH_SHORT, |
| 96 | STEM_UNIT_WIDTH_FULL_NAME, |
| 97 | STEM_UNIT_WIDTH_ISO_CODE, |
| 98 | STEM_UNIT_WIDTH_HIDDEN, |
| 99 | STEM_SIGN_AUTO, |
| 100 | STEM_SIGN_ALWAYS, |
| 101 | STEM_SIGN_NEVER, |
| 102 | STEM_SIGN_ACCOUNTING, |
| 103 | STEM_SIGN_ACCOUNTING_ALWAYS, |
| 104 | STEM_SIGN_EXCEPT_ZERO, |
| 105 | STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, |
| 106 | STEM_DECIMAL_AUTO, |
| 107 | STEM_DECIMAL_ALWAYS, |
| 108 | |
| 109 | // Section 2: Stems that DO require an option: |
| 110 | |
| 111 | STEM_PRECISION_INCREMENT, |
| 112 | STEM_MEASURE_UNIT, |
| 113 | STEM_PER_MEASURE_UNIT, |
| 114 | STEM_UNIT, |
| 115 | STEM_CURRENCY, |
| 116 | STEM_INTEGER_WIDTH, |
| 117 | STEM_NUMBERING_SYSTEM, |
| 118 | STEM_SCALE, |
| 119 | }; |
| 120 | |
| 121 | /** Default wildcard char, accepted on input and printed in output */ |
| 122 | constexpr char16_t kWildcardChar = u'*'; |
| 123 | |
| 124 | /** Alternative wildcard char, accept on input but not printed in output */ |
| 125 | constexpr char16_t kAltWildcardChar = u'+'; |
| 126 | |
| 127 | /** Checks whether the char is a wildcard on input */ |
| 128 | inline bool isWildcardChar(char16_t c) { |
| 129 | return c == kWildcardChar || c == kAltWildcardChar; |
| 130 | } |
| 131 | |
| 132 | /** |
| 133 | * Creates a NumberFormatter corresponding to the given skeleton string. |
| 134 | * |
| 135 | * @param skeletonString |
| 136 | * A number skeleton string, possibly not in its shortest form. |
| 137 | * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string. |
| 138 | */ |
| 139 | UnlocalizedNumberFormatter create( |
| 140 | const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status); |
| 141 | |
| 142 | /** |
| 143 | * Create a skeleton string corresponding to the given NumberFormatter. |
| 144 | * |
| 145 | * @param macros |
| 146 | * The NumberFormatter options object. |
| 147 | * @return A skeleton string in normalized form. |
| 148 | */ |
| 149 | UnicodeString generate(const MacroProps& macros, UErrorCode& status); |
| 150 | |
| 151 | /** |
| 152 | * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop. |
| 153 | * |
| 154 | * Internal: use the create() endpoint instead of this function. |
| 155 | */ |
| 156 | MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status); |
| 157 | |
| 158 | /** |
| 159 | * Given that the current segment represents a stem, parse it and save the result. |
| 160 | * |
| 161 | * @return The next state after parsing this stem, corresponding to what subset of options to expect. |
| 162 | */ |
| 163 | ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen, |
| 164 | MacroProps& macros, UErrorCode& status); |
| 165 | |
| 166 | /** |
| 167 | * Given that the current segment represents an option, parse it and save the result. |
| 168 | * |
| 169 | * @return The next state after parsing this option, corresponding to what subset of options to |
| 170 | * expect next. |
| 171 | */ |
| 172 | ParseState |
| 173 | parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 174 | |
| 175 | } // namespace skeleton |
| 176 | |
| 177 | |
| 178 | /** |
| 179 | * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This |
| 180 | * applies to only the "Section 1" stems, those that are well-defined without an option. |
| 181 | */ |
| 182 | namespace stem_to_object { |
| 183 | |
| 184 | Notation (skeleton::StemEnum stem); |
| 185 | |
| 186 | MeasureUnit (skeleton::StemEnum stem); |
| 187 | |
| 188 | Precision (skeleton::StemEnum stem); |
| 189 | |
| 190 | UNumberFormatRoundingMode (skeleton::StemEnum stem); |
| 191 | |
| 192 | UNumberGroupingStrategy (skeleton::StemEnum stem); |
| 193 | |
| 194 | UNumberUnitWidth (skeleton::StemEnum stem); |
| 195 | |
| 196 | UNumberSignDisplay (skeleton::StemEnum stem); |
| 197 | |
| 198 | UNumberDecimalSeparatorDisplay (skeleton::StemEnum stem); |
| 199 | |
| 200 | } // namespace stem_to_object |
| 201 | |
| 202 | /** |
| 203 | * Namespace for utility methods that convert from enums to stem strings. More complex object conversions |
| 204 | * take place in the object_to_stem_string namespace. |
| 205 | */ |
| 206 | namespace enum_to_stem_string { |
| 207 | |
| 208 | void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb); |
| 209 | |
| 210 | void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb); |
| 211 | |
| 212 | void unitWidth(UNumberUnitWidth value, UnicodeString& sb); |
| 213 | |
| 214 | void signDisplay(UNumberSignDisplay value, UnicodeString& sb); |
| 215 | |
| 216 | void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb); |
| 217 | |
| 218 | } // namespace enum_to_stem_string |
| 219 | |
| 220 | /** |
| 221 | * Namespace for utility methods for processing stems and options that cannot be interpreted literally. |
| 222 | */ |
| 223 | namespace blueprint_helpers { |
| 224 | |
| 225 | /** @return Whether we successfully found and parsed an exponent width option. */ |
| 226 | bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 227 | |
| 228 | void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status); |
| 229 | |
| 230 | /** @return Whether we successfully found and parsed an exponent sign option. */ |
| 231 | bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 232 | |
| 233 | void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 234 | |
| 235 | void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status); |
| 236 | |
| 237 | void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 238 | |
| 239 | void generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, UErrorCode& status); |
| 240 | |
| 241 | void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 242 | |
| 243 | void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 244 | |
| 245 | void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 246 | |
| 247 | void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status); |
| 248 | |
| 249 | void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 250 | |
| 251 | void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status); |
| 252 | |
| 253 | void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 254 | |
| 255 | // Note: no generateScientificStem since this syntax was added later in ICU 67 |
| 256 | |
| 257 | void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 258 | |
| 259 | // Note: no generateIntegerStem since this syntax was added later in ICU 67 |
| 260 | |
| 261 | /** @return Whether we successfully found and parsed a frac-sig option. */ |
| 262 | bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 263 | |
| 264 | void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 265 | |
| 266 | void |
| 267 | generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status); |
| 268 | |
| 269 | void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 270 | |
| 271 | void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status); |
| 272 | |
| 273 | void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 274 | |
| 275 | void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status); |
| 276 | |
| 277 | void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); |
| 278 | |
| 279 | void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb, |
| 280 | UErrorCode& status); |
| 281 | |
| 282 | } // namespace blueprint_helpers |
| 283 | |
| 284 | /** |
| 285 | * Class for utility methods for generating a token corresponding to each macro-prop. Each method |
| 286 | * returns whether or not a token was written to the string builder. |
| 287 | * |
| 288 | * This needs to be a class, not a namespace, so it can be friended. |
| 289 | */ |
| 290 | class GeneratorHelpers { |
| 291 | public: |
| 292 | /** |
| 293 | * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given |
| 294 | * StringBuilder. |
| 295 | * |
| 296 | * Internal: use the create() endpoint instead of this function. |
| 297 | */ |
| 298 | static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 299 | |
| 300 | private: |
| 301 | static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 302 | |
| 303 | static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 304 | |
| 305 | static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 306 | |
| 307 | static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 308 | |
| 309 | static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 310 | |
| 311 | static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 312 | |
| 313 | static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 314 | |
| 315 | static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 316 | |
| 317 | static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 318 | |
| 319 | static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 320 | |
| 321 | static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 322 | |
| 323 | static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); |
| 324 | |
| 325 | }; |
| 326 | |
| 327 | /** |
| 328 | * Struct for null-checking. |
| 329 | * In Java, we can just check the object reference. In C++, we need a different method. |
| 330 | */ |
| 331 | struct SeenMacroProps { |
| 332 | bool notation = false; |
| 333 | bool unit = false; |
| 334 | bool perUnit = false; |
| 335 | bool precision = false; |
| 336 | bool roundingMode = false; |
| 337 | bool grouper = false; |
| 338 | bool padder = false; |
| 339 | bool integerWidth = false; |
| 340 | bool symbols = false; |
| 341 | bool unitWidth = false; |
| 342 | bool sign = false; |
| 343 | bool decimal = false; |
| 344 | bool scale = false; |
| 345 | }; |
| 346 | |
| 347 | } // namespace impl |
| 348 | } // namespace number |
| 349 | U_NAMESPACE_END |
| 350 | |
| 351 | #endif //__SOURCE_NUMBER_SKELETONS_H__ |
| 352 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
| 353 | |