1// © 2018 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7
8// Allow implicit conversion from char16_t* to UnicodeString for this file:
9// Helpful in toString methods and elsewhere.
10#define UNISTR_FROM_STRING_EXPLICIT
11
12#include "number_decnum.h"
13#include "number_skeletons.h"
14#include "umutex.h"
15#include "ucln_in.h"
16#include "patternprops.h"
17#include "unicode/ucharstriebuilder.h"
18#include "number_utils.h"
19#include "number_decimalquantity.h"
20#include "unicode/numberformatter.h"
21#include "uinvchar.h"
22#include "charstr.h"
23#include "string_segment.h"
24#include "unicode/errorcode.h"
25#include "util.h"
26#include "measunit_impl.h"
27
28using namespace icu;
29using namespace icu::number;
30using namespace icu::number::impl;
31using namespace icu::number::impl::skeleton;
32
33namespace {
34
35icu::UInitOnce gNumberSkeletonsInitOnce = U_INITONCE_INITIALIZER;
36
37char16_t* kSerializedStemTrie = nullptr;
38
39UBool U_CALLCONV cleanupNumberSkeletons() {
40 uprv_free(kSerializedStemTrie);
41 kSerializedStemTrie = nullptr;
42 gNumberSkeletonsInitOnce.reset();
43 return TRUE;
44}
45
46void U_CALLCONV initNumberSkeletons(UErrorCode& status) {
47 ucln_i18n_registerCleanup(UCLN_I18N_NUMBER_SKELETONS, cleanupNumberSkeletons);
48
49 UCharsTrieBuilder b(status);
50 if (U_FAILURE(status)) { return; }
51
52 // Section 1:
53 b.add(u"compact-short", STEM_COMPACT_SHORT, status);
54 b.add(u"compact-long", STEM_COMPACT_LONG, status);
55 b.add(u"scientific", STEM_SCIENTIFIC, status);
56 b.add(u"engineering", STEM_ENGINEERING, status);
57 b.add(u"notation-simple", STEM_NOTATION_SIMPLE, status);
58 b.add(u"base-unit", STEM_BASE_UNIT, status);
59 b.add(u"percent", STEM_PERCENT, status);
60 b.add(u"permille", STEM_PERMILLE, status);
61 b.add(u"precision-integer", STEM_PRECISION_INTEGER, status);
62 b.add(u"precision-unlimited", STEM_PRECISION_UNLIMITED, status);
63 b.add(u"precision-currency-standard", STEM_PRECISION_CURRENCY_STANDARD, status);
64 b.add(u"precision-currency-cash", STEM_PRECISION_CURRENCY_CASH, status);
65 b.add(u"rounding-mode-ceiling", STEM_ROUNDING_MODE_CEILING, status);
66 b.add(u"rounding-mode-floor", STEM_ROUNDING_MODE_FLOOR, status);
67 b.add(u"rounding-mode-down", STEM_ROUNDING_MODE_DOWN, status);
68 b.add(u"rounding-mode-up", STEM_ROUNDING_MODE_UP, status);
69 b.add(u"rounding-mode-half-even", STEM_ROUNDING_MODE_HALF_EVEN, status);
70 b.add(u"rounding-mode-half-down", STEM_ROUNDING_MODE_HALF_DOWN, status);
71 b.add(u"rounding-mode-half-up", STEM_ROUNDING_MODE_HALF_UP, status);
72 b.add(u"rounding-mode-unnecessary", STEM_ROUNDING_MODE_UNNECESSARY, status);
73 b.add(u"group-off", STEM_GROUP_OFF, status);
74 b.add(u"group-min2", STEM_GROUP_MIN2, status);
75 b.add(u"group-auto", STEM_GROUP_AUTO, status);
76 b.add(u"group-on-aligned", STEM_GROUP_ON_ALIGNED, status);
77 b.add(u"group-thousands", STEM_GROUP_THOUSANDS, status);
78 b.add(u"latin", STEM_LATIN, status);
79 b.add(u"unit-width-narrow", STEM_UNIT_WIDTH_NARROW, status);
80 b.add(u"unit-width-short", STEM_UNIT_WIDTH_SHORT, status);
81 b.add(u"unit-width-full-name", STEM_UNIT_WIDTH_FULL_NAME, status);
82 b.add(u"unit-width-iso-code", STEM_UNIT_WIDTH_ISO_CODE, status);
83 b.add(u"unit-width-hidden", STEM_UNIT_WIDTH_HIDDEN, status);
84 b.add(u"sign-auto", STEM_SIGN_AUTO, status);
85 b.add(u"sign-always", STEM_SIGN_ALWAYS, status);
86 b.add(u"sign-never", STEM_SIGN_NEVER, status);
87 b.add(u"sign-accounting", STEM_SIGN_ACCOUNTING, status);
88 b.add(u"sign-accounting-always", STEM_SIGN_ACCOUNTING_ALWAYS, status);
89 b.add(u"sign-except-zero", STEM_SIGN_EXCEPT_ZERO, status);
90 b.add(u"sign-accounting-except-zero", STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, status);
91 b.add(u"decimal-auto", STEM_DECIMAL_AUTO, status);
92 b.add(u"decimal-always", STEM_DECIMAL_ALWAYS, status);
93 if (U_FAILURE(status)) { return; }
94
95 // Section 2:
96 b.add(u"precision-increment", STEM_PRECISION_INCREMENT, status);
97 b.add(u"measure-unit", STEM_MEASURE_UNIT, status);
98 b.add(u"per-measure-unit", STEM_PER_MEASURE_UNIT, status);
99 b.add(u"unit", STEM_UNIT, status);
100 b.add(u"currency", STEM_CURRENCY, status);
101 b.add(u"integer-width", STEM_INTEGER_WIDTH, status);
102 b.add(u"numbering-system", STEM_NUMBERING_SYSTEM, status);
103 b.add(u"scale", STEM_SCALE, status);
104 if (U_FAILURE(status)) { return; }
105
106 // Section 3 (concise tokens):
107 b.add(u"K", STEM_COMPACT_SHORT, status);
108 b.add(u"KK", STEM_COMPACT_LONG, status);
109 b.add(u"%", STEM_PERCENT, status);
110 b.add(u"%x100", STEM_PERCENT_100, status);
111 b.add(u",_", STEM_GROUP_OFF, status);
112 b.add(u",?", STEM_GROUP_MIN2, status);
113 b.add(u",!", STEM_GROUP_ON_ALIGNED, status);
114 b.add(u"+!", STEM_SIGN_ALWAYS, status);
115 b.add(u"+_", STEM_SIGN_NEVER, status);
116 b.add(u"()", STEM_SIGN_ACCOUNTING, status);
117 b.add(u"()!", STEM_SIGN_ACCOUNTING_ALWAYS, status);
118 b.add(u"+?", STEM_SIGN_EXCEPT_ZERO, status);
119 b.add(u"()?", STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, status);
120 if (U_FAILURE(status)) { return; }
121
122 // Build the CharsTrie
123 // TODO: Use SLOW or FAST here?
124 UnicodeString result;
125 b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status);
126 if (U_FAILURE(status)) { return; }
127
128 // Copy the result into the global constant pointer
129 size_t numBytes = result.length() * sizeof(char16_t);
130 kSerializedStemTrie = static_cast<char16_t*>(uprv_malloc(numBytes));
131 uprv_memcpy(kSerializedStemTrie, result.getBuffer(), numBytes);
132}
133
134
135inline void appendMultiple(UnicodeString& sb, UChar32 cp, int32_t count) {
136 for (int i = 0; i < count; i++) {
137 sb.append(cp);
138 }
139}
140
141
142#define CHECK_NULL(seen, field, status) (void)(seen); /* for auto-format line wrapping */ \
143UPRV_BLOCK_MACRO_BEGIN { \
144 if ((seen).field) { \
145 (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
146 return STATE_NULL; \
147 } \
148 (seen).field = true; \
149} UPRV_BLOCK_MACRO_END
150
151
152#define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \
153UPRV_BLOCK_MACRO_BEGIN { \
154 UErrorCode conversionStatus = U_ZERO_ERROR; \
155 (dest).appendInvariantChars({FALSE, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \
156 if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \
157 /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \
158 (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
159 return; \
160 } else if (U_FAILURE(conversionStatus)) { \
161 (status) = conversionStatus; \
162 return; \
163 } \
164} UPRV_BLOCK_MACRO_END
165
166
167} // anonymous namespace
168
169
170Notation stem_to_object::notation(skeleton::StemEnum stem) {
171 switch (stem) {
172 case STEM_COMPACT_SHORT:
173 return Notation::compactShort();
174 case STEM_COMPACT_LONG:
175 return Notation::compactLong();
176 case STEM_SCIENTIFIC:
177 return Notation::scientific();
178 case STEM_ENGINEERING:
179 return Notation::engineering();
180 case STEM_NOTATION_SIMPLE:
181 return Notation::simple();
182 default:
183 UPRV_UNREACHABLE;
184 }
185}
186
187MeasureUnit stem_to_object::unit(skeleton::StemEnum stem) {
188 switch (stem) {
189 case STEM_BASE_UNIT:
190 // Slicing is okay
191 return NoUnit::base(); // NOLINT
192 case STEM_PERCENT:
193 // Slicing is okay
194 return NoUnit::percent(); // NOLINT
195 case STEM_PERMILLE:
196 // Slicing is okay
197 return NoUnit::permille(); // NOLINT
198 default:
199 UPRV_UNREACHABLE;
200 }
201}
202
203Precision stem_to_object::precision(skeleton::StemEnum stem) {
204 switch (stem) {
205 case STEM_PRECISION_INTEGER:
206 return Precision::integer();
207 case STEM_PRECISION_UNLIMITED:
208 return Precision::unlimited();
209 case STEM_PRECISION_CURRENCY_STANDARD:
210 return Precision::currency(UCURR_USAGE_STANDARD);
211 case STEM_PRECISION_CURRENCY_CASH:
212 return Precision::currency(UCURR_USAGE_CASH);
213 default:
214 UPRV_UNREACHABLE;
215 }
216}
217
218UNumberFormatRoundingMode stem_to_object::roundingMode(skeleton::StemEnum stem) {
219 switch (stem) {
220 case STEM_ROUNDING_MODE_CEILING:
221 return UNUM_ROUND_CEILING;
222 case STEM_ROUNDING_MODE_FLOOR:
223 return UNUM_ROUND_FLOOR;
224 case STEM_ROUNDING_MODE_DOWN:
225 return UNUM_ROUND_DOWN;
226 case STEM_ROUNDING_MODE_UP:
227 return UNUM_ROUND_UP;
228 case STEM_ROUNDING_MODE_HALF_EVEN:
229 return UNUM_ROUND_HALFEVEN;
230 case STEM_ROUNDING_MODE_HALF_DOWN:
231 return UNUM_ROUND_HALFDOWN;
232 case STEM_ROUNDING_MODE_HALF_UP:
233 return UNUM_ROUND_HALFUP;
234 case STEM_ROUNDING_MODE_UNNECESSARY:
235 return UNUM_ROUND_UNNECESSARY;
236 default:
237 UPRV_UNREACHABLE;
238 }
239}
240
241UNumberGroupingStrategy stem_to_object::groupingStrategy(skeleton::StemEnum stem) {
242 switch (stem) {
243 case STEM_GROUP_OFF:
244 return UNUM_GROUPING_OFF;
245 case STEM_GROUP_MIN2:
246 return UNUM_GROUPING_MIN2;
247 case STEM_GROUP_AUTO:
248 return UNUM_GROUPING_AUTO;
249 case STEM_GROUP_ON_ALIGNED:
250 return UNUM_GROUPING_ON_ALIGNED;
251 case STEM_GROUP_THOUSANDS:
252 return UNUM_GROUPING_THOUSANDS;
253 default:
254 return UNUM_GROUPING_COUNT; // for objects, throw; for enums, return COUNT
255 }
256}
257
258UNumberUnitWidth stem_to_object::unitWidth(skeleton::StemEnum stem) {
259 switch (stem) {
260 case STEM_UNIT_WIDTH_NARROW:
261 return UNUM_UNIT_WIDTH_NARROW;
262 case STEM_UNIT_WIDTH_SHORT:
263 return UNUM_UNIT_WIDTH_SHORT;
264 case STEM_UNIT_WIDTH_FULL_NAME:
265 return UNUM_UNIT_WIDTH_FULL_NAME;
266 case STEM_UNIT_WIDTH_ISO_CODE:
267 return UNUM_UNIT_WIDTH_ISO_CODE;
268 case STEM_UNIT_WIDTH_HIDDEN:
269 return UNUM_UNIT_WIDTH_HIDDEN;
270 default:
271 return UNUM_UNIT_WIDTH_COUNT; // for objects, throw; for enums, return COUNT
272 }
273}
274
275UNumberSignDisplay stem_to_object::signDisplay(skeleton::StemEnum stem) {
276 switch (stem) {
277 case STEM_SIGN_AUTO:
278 return UNUM_SIGN_AUTO;
279 case STEM_SIGN_ALWAYS:
280 return UNUM_SIGN_ALWAYS;
281 case STEM_SIGN_NEVER:
282 return UNUM_SIGN_NEVER;
283 case STEM_SIGN_ACCOUNTING:
284 return UNUM_SIGN_ACCOUNTING;
285 case STEM_SIGN_ACCOUNTING_ALWAYS:
286 return UNUM_SIGN_ACCOUNTING_ALWAYS;
287 case STEM_SIGN_EXCEPT_ZERO:
288 return UNUM_SIGN_EXCEPT_ZERO;
289 case STEM_SIGN_ACCOUNTING_EXCEPT_ZERO:
290 return UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO;
291 default:
292 return UNUM_SIGN_COUNT; // for objects, throw; for enums, return COUNT
293 }
294}
295
296UNumberDecimalSeparatorDisplay stem_to_object::decimalSeparatorDisplay(skeleton::StemEnum stem) {
297 switch (stem) {
298 case STEM_DECIMAL_AUTO:
299 return UNUM_DECIMAL_SEPARATOR_AUTO;
300 case STEM_DECIMAL_ALWAYS:
301 return UNUM_DECIMAL_SEPARATOR_ALWAYS;
302 default:
303 return UNUM_DECIMAL_SEPARATOR_COUNT; // for objects, throw; for enums, return COUNT
304 }
305}
306
307
308void enum_to_stem_string::roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb) {
309 switch (value) {
310 case UNUM_ROUND_CEILING:
311 sb.append(u"rounding-mode-ceiling", -1);
312 break;
313 case UNUM_ROUND_FLOOR:
314 sb.append(u"rounding-mode-floor", -1);
315 break;
316 case UNUM_ROUND_DOWN:
317 sb.append(u"rounding-mode-down", -1);
318 break;
319 case UNUM_ROUND_UP:
320 sb.append(u"rounding-mode-up", -1);
321 break;
322 case UNUM_ROUND_HALFEVEN:
323 sb.append(u"rounding-mode-half-even", -1);
324 break;
325 case UNUM_ROUND_HALFDOWN:
326 sb.append(u"rounding-mode-half-down", -1);
327 break;
328 case UNUM_ROUND_HALFUP:
329 sb.append(u"rounding-mode-half-up", -1);
330 break;
331 case UNUM_ROUND_UNNECESSARY:
332 sb.append(u"rounding-mode-unnecessary", -1);
333 break;
334 default:
335 UPRV_UNREACHABLE;
336 }
337}
338
339void enum_to_stem_string::groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb) {
340 switch (value) {
341 case UNUM_GROUPING_OFF:
342 sb.append(u"group-off", -1);
343 break;
344 case UNUM_GROUPING_MIN2:
345 sb.append(u"group-min2", -1);
346 break;
347 case UNUM_GROUPING_AUTO:
348 sb.append(u"group-auto", -1);
349 break;
350 case UNUM_GROUPING_ON_ALIGNED:
351 sb.append(u"group-on-aligned", -1);
352 break;
353 case UNUM_GROUPING_THOUSANDS:
354 sb.append(u"group-thousands", -1);
355 break;
356 default:
357 UPRV_UNREACHABLE;
358 }
359}
360
361void enum_to_stem_string::unitWidth(UNumberUnitWidth value, UnicodeString& sb) {
362 switch (value) {
363 case UNUM_UNIT_WIDTH_NARROW:
364 sb.append(u"unit-width-narrow", -1);
365 break;
366 case UNUM_UNIT_WIDTH_SHORT:
367 sb.append(u"unit-width-short", -1);
368 break;
369 case UNUM_UNIT_WIDTH_FULL_NAME:
370 sb.append(u"unit-width-full-name", -1);
371 break;
372 case UNUM_UNIT_WIDTH_ISO_CODE:
373 sb.append(u"unit-width-iso-code", -1);
374 break;
375 case UNUM_UNIT_WIDTH_HIDDEN:
376 sb.append(u"unit-width-hidden", -1);
377 break;
378 default:
379 UPRV_UNREACHABLE;
380 }
381}
382
383void enum_to_stem_string::signDisplay(UNumberSignDisplay value, UnicodeString& sb) {
384 switch (value) {
385 case UNUM_SIGN_AUTO:
386 sb.append(u"sign-auto", -1);
387 break;
388 case UNUM_SIGN_ALWAYS:
389 sb.append(u"sign-always", -1);
390 break;
391 case UNUM_SIGN_NEVER:
392 sb.append(u"sign-never", -1);
393 break;
394 case UNUM_SIGN_ACCOUNTING:
395 sb.append(u"sign-accounting", -1);
396 break;
397 case UNUM_SIGN_ACCOUNTING_ALWAYS:
398 sb.append(u"sign-accounting-always", -1);
399 break;
400 case UNUM_SIGN_EXCEPT_ZERO:
401 sb.append(u"sign-except-zero", -1);
402 break;
403 case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO:
404 sb.append(u"sign-accounting-except-zero", -1);
405 break;
406 default:
407 UPRV_UNREACHABLE;
408 }
409}
410
411void
412enum_to_stem_string::decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb) {
413 switch (value) {
414 case UNUM_DECIMAL_SEPARATOR_AUTO:
415 sb.append(u"decimal-auto", -1);
416 break;
417 case UNUM_DECIMAL_SEPARATOR_ALWAYS:
418 sb.append(u"decimal-always", -1);
419 break;
420 default:
421 UPRV_UNREACHABLE;
422 }
423}
424
425
426UnlocalizedNumberFormatter skeleton::create(
427 const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status) {
428
429 // Initialize perror
430 if (perror != nullptr) {
431 perror->line = 0;
432 perror->offset = -1;
433 perror->preContext[0] = 0;
434 perror->postContext[0] = 0;
435 }
436
437 umtx_initOnce(gNumberSkeletonsInitOnce, &initNumberSkeletons, status);
438 if (U_FAILURE(status)) {
439 return {};
440 }
441
442 int32_t errOffset;
443 MacroProps macros = parseSkeleton(skeletonString, errOffset, status);
444 if (U_SUCCESS(status)) {
445 return NumberFormatter::with().macros(macros);
446 }
447
448 if (perror == nullptr) {
449 return {};
450 }
451
452 // Populate the UParseError with the error location
453 perror->offset = errOffset;
454 int32_t contextStart = uprv_max(0, errOffset - U_PARSE_CONTEXT_LEN + 1);
455 int32_t contextEnd = uprv_min(skeletonString.length(), errOffset + U_PARSE_CONTEXT_LEN - 1);
456 skeletonString.extract(contextStart, errOffset - contextStart, perror->preContext, 0);
457 perror->preContext[errOffset - contextStart] = 0;
458 skeletonString.extract(errOffset, contextEnd - errOffset, perror->postContext, 0);
459 perror->postContext[contextEnd - errOffset] = 0;
460 return {};
461}
462
463UnicodeString skeleton::generate(const MacroProps& macros, UErrorCode& status) {
464 umtx_initOnce(gNumberSkeletonsInitOnce, &initNumberSkeletons, status);
465 UnicodeString sb;
466 GeneratorHelpers::generateSkeleton(macros, sb, status);
467 return sb;
468}
469
470MacroProps skeleton::parseSkeleton(
471 const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status) {
472 U_ASSERT(U_SUCCESS(status));
473
474 // Add a trailing whitespace to the end of the skeleton string to make code cleaner.
475 UnicodeString tempSkeletonString(skeletonString);
476 tempSkeletonString.append(u' ');
477
478 SeenMacroProps seen;
479 MacroProps macros;
480 StringSegment segment(tempSkeletonString, false);
481 UCharsTrie stemTrie(kSerializedStemTrie);
482 ParseState stem = STATE_NULL;
483 int32_t offset = 0;
484
485 // Primary skeleton parse loop:
486 while (offset < segment.length()) {
487 UChar32 cp = segment.codePointAt(offset);
488 bool isTokenSeparator = PatternProps::isWhiteSpace(cp);
489 bool isOptionSeparator = (cp == u'/');
490
491 if (!isTokenSeparator && !isOptionSeparator) {
492 // Non-separator token; consume it.
493 offset += U16_LENGTH(cp);
494 if (stem == STATE_NULL) {
495 // We are currently consuming a stem.
496 // Go to the next state in the stem trie.
497 stemTrie.nextForCodePoint(cp);
498 }
499 continue;
500 }
501
502 // We are looking at a token or option separator.
503 // If the segment is nonempty, parse it and reset the segment.
504 // Otherwise, make sure it is a valid repeating separator.
505 if (offset != 0) {
506 segment.setLength(offset);
507 if (stem == STATE_NULL) {
508 // The first separator after the start of a token. Parse it as a stem.
509 stem = parseStem(segment, stemTrie, seen, macros, status);
510 stemTrie.reset();
511 } else {
512 // A separator after the first separator of a token. Parse it as an option.
513 stem = parseOption(stem, segment, macros, status);
514 }
515 segment.resetLength();
516 if (U_FAILURE(status)) {
517 errOffset = segment.getOffset();
518 return macros;
519 }
520
521 // Consume the segment:
522 segment.adjustOffset(offset);
523 offset = 0;
524
525 } else if (stem != STATE_NULL) {
526 // A separator ('/' or whitespace) following an option separator ('/')
527 // segment.setLength(U16_LENGTH(cp)); // for error message
528 // throw new SkeletonSyntaxException("Unexpected separator character", segment);
529 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
530 errOffset = segment.getOffset();
531 return macros;
532
533 } else {
534 // Two spaces in a row; this is OK.
535 }
536
537 // Does the current stem forbid options?
538 if (isOptionSeparator && stem == STATE_NULL) {
539 // segment.setLength(U16_LENGTH(cp)); // for error message
540 // throw new SkeletonSyntaxException("Unexpected option separator", segment);
541 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
542 errOffset = segment.getOffset();
543 return macros;
544 }
545
546 // Does the current stem require an option?
547 if (isTokenSeparator && stem != STATE_NULL) {
548 switch (stem) {
549 case STATE_INCREMENT_PRECISION:
550 case STATE_MEASURE_UNIT:
551 case STATE_PER_MEASURE_UNIT:
552 case STATE_IDENTIFIER_UNIT:
553 case STATE_CURRENCY_UNIT:
554 case STATE_INTEGER_WIDTH:
555 case STATE_NUMBERING_SYSTEM:
556 case STATE_SCALE:
557 // segment.setLength(U16_LENGTH(cp)); // for error message
558 // throw new SkeletonSyntaxException("Stem requires an option", segment);
559 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
560 errOffset = segment.getOffset();
561 return macros;
562 default:
563 break;
564 }
565 stem = STATE_NULL;
566 }
567
568 // Consume the separator:
569 segment.adjustOffset(U16_LENGTH(cp));
570 }
571 U_ASSERT(stem == STATE_NULL);
572 return macros;
573}
574
575ParseState
576skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
577 MacroProps& macros, UErrorCode& status) {
578 // First check for "blueprint" stems, which start with a "signal char"
579 switch (segment.charAt(0)) {
580 case u'.':
581 CHECK_NULL(seen, precision, status);
582 blueprint_helpers::parseFractionStem(segment, macros, status);
583 return STATE_FRACTION_PRECISION;
584 case u'@':
585 CHECK_NULL(seen, precision, status);
586 blueprint_helpers::parseDigitsStem(segment, macros, status);
587 return STATE_NULL;
588 case u'E':
589 CHECK_NULL(seen, notation, status);
590 blueprint_helpers::parseScientificStem(segment, macros, status);
591 return STATE_NULL;
592 case u'0':
593 CHECK_NULL(seen, integerWidth, status);
594 blueprint_helpers::parseIntegerStem(segment, macros, status);
595 return STATE_NULL;
596 default:
597 break;
598 }
599
600 // Now look at the stemsTrie, which is already be pointing at our stem.
601 UStringTrieResult stemResult = stemTrie.current();
602
603 if (stemResult != USTRINGTRIE_INTERMEDIATE_VALUE && stemResult != USTRINGTRIE_FINAL_VALUE) {
604 // throw new SkeletonSyntaxException("Unknown stem", segment);
605 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
606 return STATE_NULL;
607 }
608
609 auto stem = static_cast<StemEnum>(stemTrie.getValue());
610 switch (stem) {
611
612 // Stems with meaning on their own, not requiring an option:
613
614 case STEM_COMPACT_SHORT:
615 case STEM_COMPACT_LONG:
616 case STEM_SCIENTIFIC:
617 case STEM_ENGINEERING:
618 case STEM_NOTATION_SIMPLE:
619 CHECK_NULL(seen, notation, status);
620 macros.notation = stem_to_object::notation(stem);
621 switch (stem) {
622 case STEM_SCIENTIFIC:
623 case STEM_ENGINEERING:
624 return STATE_SCIENTIFIC; // allows for scientific options
625 default:
626 return STATE_NULL;
627 }
628
629 case STEM_BASE_UNIT:
630 case STEM_PERCENT:
631 case STEM_PERMILLE:
632 CHECK_NULL(seen, unit, status);
633 macros.unit = stem_to_object::unit(stem);
634 return STATE_NULL;
635
636 case STEM_PERCENT_100:
637 CHECK_NULL(seen, scale, status);
638 CHECK_NULL(seen, unit, status);
639 macros.scale = Scale::powerOfTen(2);
640 macros.unit = NoUnit::percent();
641 return STATE_NULL;
642
643 case STEM_PRECISION_INTEGER:
644 case STEM_PRECISION_UNLIMITED:
645 case STEM_PRECISION_CURRENCY_STANDARD:
646 case STEM_PRECISION_CURRENCY_CASH:
647 CHECK_NULL(seen, precision, status);
648 macros.precision = stem_to_object::precision(stem);
649 switch (stem) {
650 case STEM_PRECISION_INTEGER:
651 return STATE_FRACTION_PRECISION; // allows for "precision-integer/@##"
652 default:
653 return STATE_NULL;
654 }
655
656 case STEM_ROUNDING_MODE_CEILING:
657 case STEM_ROUNDING_MODE_FLOOR:
658 case STEM_ROUNDING_MODE_DOWN:
659 case STEM_ROUNDING_MODE_UP:
660 case STEM_ROUNDING_MODE_HALF_EVEN:
661 case STEM_ROUNDING_MODE_HALF_DOWN:
662 case STEM_ROUNDING_MODE_HALF_UP:
663 case STEM_ROUNDING_MODE_UNNECESSARY:
664 CHECK_NULL(seen, roundingMode, status);
665 macros.roundingMode = stem_to_object::roundingMode(stem);
666 return STATE_NULL;
667
668 case STEM_GROUP_OFF:
669 case STEM_GROUP_MIN2:
670 case STEM_GROUP_AUTO:
671 case STEM_GROUP_ON_ALIGNED:
672 case STEM_GROUP_THOUSANDS:
673 CHECK_NULL(seen, grouper, status);
674 macros.grouper = Grouper::forStrategy(stem_to_object::groupingStrategy(stem));
675 return STATE_NULL;
676
677 case STEM_LATIN:
678 CHECK_NULL(seen, symbols, status);
679 macros.symbols.setTo(NumberingSystem::createInstanceByName("latn", status));
680 return STATE_NULL;
681
682 case STEM_UNIT_WIDTH_NARROW:
683 case STEM_UNIT_WIDTH_SHORT:
684 case STEM_UNIT_WIDTH_FULL_NAME:
685 case STEM_UNIT_WIDTH_ISO_CODE:
686 case STEM_UNIT_WIDTH_HIDDEN:
687 CHECK_NULL(seen, unitWidth, status);
688 macros.unitWidth = stem_to_object::unitWidth(stem);
689 return STATE_NULL;
690
691 case STEM_SIGN_AUTO:
692 case STEM_SIGN_ALWAYS:
693 case STEM_SIGN_NEVER:
694 case STEM_SIGN_ACCOUNTING:
695 case STEM_SIGN_ACCOUNTING_ALWAYS:
696 case STEM_SIGN_EXCEPT_ZERO:
697 case STEM_SIGN_ACCOUNTING_EXCEPT_ZERO:
698 CHECK_NULL(seen, sign, status);
699 macros.sign = stem_to_object::signDisplay(stem);
700 return STATE_NULL;
701
702 case STEM_DECIMAL_AUTO:
703 case STEM_DECIMAL_ALWAYS:
704 CHECK_NULL(seen, decimal, status);
705 macros.decimal = stem_to_object::decimalSeparatorDisplay(stem);
706 return STATE_NULL;
707
708 // Stems requiring an option:
709
710 case STEM_PRECISION_INCREMENT:
711 CHECK_NULL(seen, precision, status);
712 return STATE_INCREMENT_PRECISION;
713
714 case STEM_MEASURE_UNIT:
715 CHECK_NULL(seen, unit, status);
716 return STATE_MEASURE_UNIT;
717
718 case STEM_PER_MEASURE_UNIT:
719 CHECK_NULL(seen, perUnit, status);
720 return STATE_PER_MEASURE_UNIT;
721
722 case STEM_UNIT:
723 CHECK_NULL(seen, unit, status);
724 CHECK_NULL(seen, perUnit, status);
725 return STATE_IDENTIFIER_UNIT;
726
727 case STEM_CURRENCY:
728 CHECK_NULL(seen, unit, status);
729 return STATE_CURRENCY_UNIT;
730
731 case STEM_INTEGER_WIDTH:
732 CHECK_NULL(seen, integerWidth, status);
733 return STATE_INTEGER_WIDTH;
734
735 case STEM_NUMBERING_SYSTEM:
736 CHECK_NULL(seen, symbols, status);
737 return STATE_NUMBERING_SYSTEM;
738
739 case STEM_SCALE:
740 CHECK_NULL(seen, scale, status);
741 return STATE_SCALE;
742
743 default:
744 UPRV_UNREACHABLE;
745 }
746}
747
748ParseState skeleton::parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros,
749 UErrorCode& status) {
750
751 ///// Required options: /////
752
753 switch (stem) {
754 case STATE_CURRENCY_UNIT:
755 blueprint_helpers::parseCurrencyOption(segment, macros, status);
756 return STATE_NULL;
757 case STATE_MEASURE_UNIT:
758 blueprint_helpers::parseMeasureUnitOption(segment, macros, status);
759 return STATE_NULL;
760 case STATE_PER_MEASURE_UNIT:
761 blueprint_helpers::parseMeasurePerUnitOption(segment, macros, status);
762 return STATE_NULL;
763 case STATE_IDENTIFIER_UNIT:
764 blueprint_helpers::parseIdentifierUnitOption(segment, macros, status);
765 return STATE_NULL;
766 case STATE_INCREMENT_PRECISION:
767 blueprint_helpers::parseIncrementOption(segment, macros, status);
768 return STATE_NULL;
769 case STATE_INTEGER_WIDTH:
770 blueprint_helpers::parseIntegerWidthOption(segment, macros, status);
771 return STATE_NULL;
772 case STATE_NUMBERING_SYSTEM:
773 blueprint_helpers::parseNumberingSystemOption(segment, macros, status);
774 return STATE_NULL;
775 case STATE_SCALE:
776 blueprint_helpers::parseScaleOption(segment, macros, status);
777 return STATE_NULL;
778 default:
779 break;
780 }
781
782 ///// Non-required options: /////
783
784 // Scientific options
785 switch (stem) {
786 case STATE_SCIENTIFIC:
787 if (blueprint_helpers::parseExponentWidthOption(segment, macros, status)) {
788 return STATE_SCIENTIFIC;
789 }
790 if (U_FAILURE(status)) {
791 return {};
792 }
793 if (blueprint_helpers::parseExponentSignOption(segment, macros, status)) {
794 return STATE_SCIENTIFIC;
795 }
796 if (U_FAILURE(status)) {
797 return {};
798 }
799 break;
800 default:
801 break;
802 }
803
804 // Frac-sig option
805 switch (stem) {
806 case STATE_FRACTION_PRECISION:
807 if (blueprint_helpers::parseFracSigOption(segment, macros, status)) {
808 return STATE_NULL;
809 }
810 if (U_FAILURE(status)) {
811 return {};
812 }
813 break;
814 default:
815 break;
816 }
817
818 // Unknown option
819 // throw new SkeletonSyntaxException("Invalid option", segment);
820 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
821 return STATE_NULL;
822}
823
824void GeneratorHelpers::generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
825 if (U_FAILURE(status)) { return; }
826
827 // Supported options
828 if (GeneratorHelpers::notation(macros, sb, status)) {
829 sb.append(u' ');
830 }
831 if (U_FAILURE(status)) { return; }
832 if (GeneratorHelpers::unit(macros, sb, status)) {
833 sb.append(u' ');
834 }
835 if (U_FAILURE(status)) { return; }
836 if (GeneratorHelpers::perUnit(macros, sb, status)) {
837 sb.append(u' ');
838 }
839 if (U_FAILURE(status)) { return; }
840 if (GeneratorHelpers::precision(macros, sb, status)) {
841 sb.append(u' ');
842 }
843 if (U_FAILURE(status)) { return; }
844 if (GeneratorHelpers::roundingMode(macros, sb, status)) {
845 sb.append(u' ');
846 }
847 if (U_FAILURE(status)) { return; }
848 if (GeneratorHelpers::grouping(macros, sb, status)) {
849 sb.append(u' ');
850 }
851 if (U_FAILURE(status)) { return; }
852 if (GeneratorHelpers::integerWidth(macros, sb, status)) {
853 sb.append(u' ');
854 }
855 if (U_FAILURE(status)) { return; }
856 if (GeneratorHelpers::symbols(macros, sb, status)) {
857 sb.append(u' ');
858 }
859 if (U_FAILURE(status)) { return; }
860 if (GeneratorHelpers::unitWidth(macros, sb, status)) {
861 sb.append(u' ');
862 }
863 if (U_FAILURE(status)) { return; }
864 if (GeneratorHelpers::sign(macros, sb, status)) {
865 sb.append(u' ');
866 }
867 if (U_FAILURE(status)) { return; }
868 if (GeneratorHelpers::decimal(macros, sb, status)) {
869 sb.append(u' ');
870 }
871 if (U_FAILURE(status)) { return; }
872 if (GeneratorHelpers::scale(macros, sb, status)) {
873 sb.append(u' ');
874 }
875 if (U_FAILURE(status)) { return; }
876
877 // Unsupported options
878 if (!macros.padder.isBogus()) {
879 status = U_UNSUPPORTED_ERROR;
880 return;
881 }
882 if (macros.affixProvider != nullptr) {
883 status = U_UNSUPPORTED_ERROR;
884 return;
885 }
886 if (macros.rules != nullptr) {
887 status = U_UNSUPPORTED_ERROR;
888 return;
889 }
890
891 // Remove the trailing space
892 if (sb.length() > 0) {
893 sb.truncate(sb.length() - 1);
894 }
895}
896
897
898bool blueprint_helpers::parseExponentWidthOption(const StringSegment& segment, MacroProps& macros,
899 UErrorCode&) {
900 if (!isWildcardChar(segment.charAt(0))) {
901 return false;
902 }
903 int32_t offset = 1;
904 int32_t minExp = 0;
905 for (; offset < segment.length(); offset++) {
906 if (segment.charAt(offset) == u'e') {
907 minExp++;
908 } else {
909 break;
910 }
911 }
912 if (offset < segment.length()) {
913 return false;
914 }
915 // Use the public APIs to enforce bounds checking
916 macros.notation = static_cast<ScientificNotation&>(macros.notation).withMinExponentDigits(minExp);
917 return true;
918}
919
920void
921blueprint_helpers::generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode&) {
922 sb.append(kWildcardChar);
923 appendMultiple(sb, u'e', minExponentDigits);
924}
925
926bool
927blueprint_helpers::parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode&) {
928 // Get the sign display type out of the CharsTrie data structure.
929 UCharsTrie tempStemTrie(kSerializedStemTrie);
930 UStringTrieResult result = tempStemTrie.next(
931 segment.toTempUnicodeString().getBuffer(),
932 segment.length());
933 if (result != USTRINGTRIE_INTERMEDIATE_VALUE && result != USTRINGTRIE_FINAL_VALUE) {
934 return false;
935 }
936 auto sign = stem_to_object::signDisplay(static_cast<StemEnum>(tempStemTrie.getValue()));
937 if (sign == UNUM_SIGN_COUNT) {
938 return false;
939 }
940 macros.notation = static_cast<ScientificNotation&>(macros.notation).withExponentSignDisplay(sign);
941 return true;
942}
943
944void blueprint_helpers::parseCurrencyOption(const StringSegment& segment, MacroProps& macros,
945 UErrorCode& status) {
946 // Unlike ICU4J, have to check length manually because ICU4C CurrencyUnit does not check it for us
947 if (segment.length() != 3) {
948 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
949 return;
950 }
951 const UChar* currencyCode = segment.toTempUnicodeString().getBuffer();
952 UErrorCode localStatus = U_ZERO_ERROR;
953 CurrencyUnit currency(currencyCode, localStatus);
954 if (U_FAILURE(localStatus)) {
955 // Not 3 ascii chars
956 // throw new SkeletonSyntaxException("Invalid currency", segment);
957 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
958 return;
959 }
960 // Slicing is OK
961 macros.unit = currency; // NOLINT
962}
963
964void
965blueprint_helpers::generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode&) {
966 sb.append(currency.getISOCurrency(), -1);
967}
968
969void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros,
970 UErrorCode& status) {
971 const UnicodeString stemString = segment.toTempUnicodeString();
972
973 // NOTE: The category (type) of the unit is guaranteed to be a valid subtag (alphanumeric)
974 // http://unicode.org/reports/tr35/#Validity_Data
975 int firstHyphen = 0;
976 while (firstHyphen < stemString.length() && stemString.charAt(firstHyphen) != '-') {
977 firstHyphen++;
978 }
979 if (firstHyphen == stemString.length()) {
980 // throw new SkeletonSyntaxException("Invalid measure unit option", segment);
981 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
982 return;
983 }
984
985 // Need to do char <-> UChar conversion...
986 U_ASSERT(U_SUCCESS(status));
987 CharString type;
988 SKELETON_UCHAR_TO_CHAR(type, stemString, 0, firstHyphen, status);
989 CharString subType;
990 SKELETON_UCHAR_TO_CHAR(subType, stemString, firstHyphen + 1, stemString.length(), status);
991
992 // Note: the largest type as of this writing (March 2018) is "volume", which has 24 units.
993 static constexpr int32_t CAPACITY = 30;
994 MeasureUnit units[CAPACITY];
995 UErrorCode localStatus = U_ZERO_ERROR;
996 int32_t numUnits = MeasureUnit::getAvailable(type.data(), units, CAPACITY, localStatus);
997 if (U_FAILURE(localStatus)) {
998 // More than 30 units in this type?
999 status = U_INTERNAL_PROGRAM_ERROR;
1000 return;
1001 }
1002 for (int32_t i = 0; i < numUnits; i++) {
1003 auto& unit = units[i];
1004 if (uprv_strcmp(subType.data(), unit.getSubtype()) == 0) {
1005 macros.unit = unit;
1006 return;
1007 }
1008 }
1009
1010 // throw new SkeletonSyntaxException("Unknown measure unit", segment);
1011 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1012}
1013
1014void blueprint_helpers::generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb,
1015 UErrorCode&) {
1016 // Need to do char <-> UChar conversion...
1017 sb.append(UnicodeString(measureUnit.getType(), -1, US_INV));
1018 sb.append(u'-');
1019 sb.append(UnicodeString(measureUnit.getSubtype(), -1, US_INV));
1020}
1021
1022void blueprint_helpers::parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros,
1023 UErrorCode& status) {
1024 // A little bit of a hack: save the current unit (numerator), call the main measure unit
1025 // parsing code, put back the numerator unit, and put the new unit into per-unit.
1026 MeasureUnit numerator = macros.unit;
1027 parseMeasureUnitOption(segment, macros, status);
1028 if (U_FAILURE(status)) { return; }
1029 macros.perUnit = macros.unit;
1030 macros.unit = numerator;
1031}
1032
1033void blueprint_helpers::parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros,
1034 UErrorCode& status) {
1035 // Need to do char <-> UChar conversion...
1036 U_ASSERT(U_SUCCESS(status));
1037 CharString buffer;
1038 SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status);
1039
1040 ErrorCode internalStatus;
1041 auto fullUnit = MeasureUnitImpl::forIdentifier(buffer.toStringPiece(), internalStatus);
1042 if (internalStatus.isFailure()) {
1043 // throw new SkeletonSyntaxException("Invalid core unit identifier", segment, e);
1044 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1045 return;
1046 }
1047
1048 // TODO(ICU-20941): Clean this up.
1049 for (int32_t i = 0; i < fullUnit.units.length(); i++) {
1050 SingleUnitImpl* subUnit = fullUnit.units[i];
1051 if (subUnit->dimensionality > 0) {
1052 macros.unit = macros.unit.product(subUnit->build(status), status);
1053 } else {
1054 subUnit->dimensionality *= -1;
1055 macros.perUnit = macros.perUnit.product(subUnit->build(status), status);
1056 }
1057 }
1058}
1059
1060void blueprint_helpers::parseFractionStem(const StringSegment& segment, MacroProps& macros,
1061 UErrorCode& status) {
1062 U_ASSERT(segment.charAt(0) == u'.');
1063 int32_t offset = 1;
1064 int32_t minFrac = 0;
1065 int32_t maxFrac;
1066 for (; offset < segment.length(); offset++) {
1067 if (segment.charAt(offset) == u'0') {
1068 minFrac++;
1069 } else {
1070 break;
1071 }
1072 }
1073 if (offset < segment.length()) {
1074 if (isWildcardChar(segment.charAt(offset))) {
1075 maxFrac = -1;
1076 offset++;
1077 } else {
1078 maxFrac = minFrac;
1079 for (; offset < segment.length(); offset++) {
1080 if (segment.charAt(offset) == u'#') {
1081 maxFrac++;
1082 } else {
1083 break;
1084 }
1085 }
1086 }
1087 } else {
1088 maxFrac = minFrac;
1089 }
1090 if (offset < segment.length()) {
1091 // throw new SkeletonSyntaxException("Invalid fraction stem", segment);
1092 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1093 return;
1094 }
1095 // Use the public APIs to enforce bounds checking
1096 if (maxFrac == -1) {
1097 if (minFrac == 0) {
1098 macros.precision = Precision::unlimited();
1099 } else {
1100 macros.precision = Precision::minFraction(minFrac);
1101 }
1102 } else {
1103 macros.precision = Precision::minMaxFraction(minFrac, maxFrac);
1104 }
1105}
1106
1107void
1108blueprint_helpers::generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode&) {
1109 if (minFrac == 0 && maxFrac == 0) {
1110 sb.append(u"precision-integer", -1);
1111 return;
1112 }
1113 sb.append(u'.');
1114 appendMultiple(sb, u'0', minFrac);
1115 if (maxFrac == -1) {
1116 sb.append(kWildcardChar);
1117 } else {
1118 appendMultiple(sb, u'#', maxFrac - minFrac);
1119 }
1120}
1121
1122void
1123blueprint_helpers::parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status) {
1124 U_ASSERT(segment.charAt(0) == u'@');
1125 int32_t offset = 0;
1126 int32_t minSig = 0;
1127 int32_t maxSig;
1128 for (; offset < segment.length(); offset++) {
1129 if (segment.charAt(offset) == u'@') {
1130 minSig++;
1131 } else {
1132 break;
1133 }
1134 }
1135 if (offset < segment.length()) {
1136 if (isWildcardChar(segment.charAt(offset))) {
1137 maxSig = -1;
1138 offset++;
1139 } else {
1140 maxSig = minSig;
1141 for (; offset < segment.length(); offset++) {
1142 if (segment.charAt(offset) == u'#') {
1143 maxSig++;
1144 } else {
1145 break;
1146 }
1147 }
1148 }
1149 } else {
1150 maxSig = minSig;
1151 }
1152 if (offset < segment.length()) {
1153 // throw new SkeletonSyntaxException("Invalid significant digits stem", segment);
1154 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1155 return;
1156 }
1157 // Use the public APIs to enforce bounds checking
1158 if (maxSig == -1) {
1159 macros.precision = Precision::minSignificantDigits(minSig);
1160 } else {
1161 macros.precision = Precision::minMaxSignificantDigits(minSig, maxSig);
1162 }
1163}
1164
1165void
1166blueprint_helpers::generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode&) {
1167 appendMultiple(sb, u'@', minSig);
1168 if (maxSig == -1) {
1169 sb.append(kWildcardChar);
1170 } else {
1171 appendMultiple(sb, u'#', maxSig - minSig);
1172 }
1173}
1174
1175void blueprint_helpers::parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status) {
1176 U_ASSERT(segment.charAt(0) == u'E');
1177 {
1178 int32_t offset = 1;
1179 if (segment.length() == offset) {
1180 goto fail;
1181 }
1182 bool isEngineering = false;
1183 if (segment.charAt(offset) == u'E') {
1184 isEngineering = true;
1185 offset++;
1186 if (segment.length() == offset) {
1187 goto fail;
1188 }
1189 }
1190 UNumberSignDisplay signDisplay = UNUM_SIGN_AUTO;
1191 if (segment.charAt(offset) == u'+') {
1192 offset++;
1193 if (segment.length() == offset) {
1194 goto fail;
1195 }
1196 if (segment.charAt(offset) == u'!') {
1197 signDisplay = UNUM_SIGN_ALWAYS;
1198 } else if (segment.charAt(offset) == u'?') {
1199 signDisplay = UNUM_SIGN_EXCEPT_ZERO;
1200 } else {
1201 goto fail;
1202 }
1203 offset++;
1204 if (segment.length() == offset) {
1205 goto fail;
1206 }
1207 }
1208 int32_t minDigits = 0;
1209 for (; offset < segment.length(); offset++) {
1210 if (segment.charAt(offset) != u'0') {
1211 goto fail;
1212 }
1213 minDigits++;
1214 }
1215 macros.notation = (isEngineering ? Notation::engineering() : Notation::scientific())
1216 .withExponentSignDisplay(signDisplay)
1217 .withMinExponentDigits(minDigits);
1218 return;
1219 }
1220 fail: void();
1221 // throw new SkeletonSyntaxException("Invalid scientific stem", segment);
1222 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1223 return;
1224}
1225
1226void blueprint_helpers::parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status) {
1227 U_ASSERT(segment.charAt(0) == u'0');
1228 int32_t offset = 1;
1229 for (; offset < segment.length(); offset++) {
1230 if (segment.charAt(offset) != u'0') {
1231 offset--;
1232 break;
1233 }
1234 }
1235 if (offset < segment.length()) {
1236 // throw new SkeletonSyntaxException("Invalid integer stem", segment);
1237 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1238 return;
1239 }
1240 macros.integerWidth = IntegerWidth::zeroFillTo(offset);
1241 return;
1242}
1243
1244bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroProps& macros,
1245 UErrorCode& status) {
1246 if (segment.charAt(0) != u'@') {
1247 return false;
1248 }
1249 int offset = 0;
1250 int minSig = 0;
1251 int maxSig;
1252 for (; offset < segment.length(); offset++) {
1253 if (segment.charAt(offset) == u'@') {
1254 minSig++;
1255 } else {
1256 break;
1257 }
1258 }
1259 // For the frac-sig option, there must be minSig or maxSig but not both.
1260 // Valid: @+, @@+, @@@+
1261 // Valid: @#, @##, @###
1262 // Invalid: @, @@, @@@
1263 // Invalid: @@#, @@##, @@@#
1264 if (offset < segment.length()) {
1265 if (isWildcardChar(segment.charAt(offset))) {
1266 maxSig = -1;
1267 offset++;
1268 } else if (minSig > 1) {
1269 // @@#, @@##, @@@#
1270 // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment);
1271 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1272 return false;
1273 } else {
1274 maxSig = minSig;
1275 for (; offset < segment.length(); offset++) {
1276 if (segment.charAt(offset) == u'#') {
1277 maxSig++;
1278 } else {
1279 break;
1280 }
1281 }
1282 }
1283 } else {
1284 // @, @@, @@@
1285 // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment);
1286 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1287 return false;
1288 }
1289 if (offset < segment.length()) {
1290 // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment);
1291 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1292 return false;
1293 }
1294
1295 auto& oldPrecision = static_cast<const FractionPrecision&>(macros.precision);
1296 if (maxSig == -1) {
1297 macros.precision = oldPrecision.withMinDigits(minSig);
1298 } else {
1299 macros.precision = oldPrecision.withMaxDigits(maxSig);
1300 }
1301 return true;
1302}
1303
1304void blueprint_helpers::parseIncrementOption(const StringSegment& segment, MacroProps& macros,
1305 UErrorCode& status) {
1306 // Need to do char <-> UChar conversion...
1307 U_ASSERT(U_SUCCESS(status));
1308 CharString buffer;
1309 SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status);
1310
1311 // Utilize DecimalQuantity/decNumber to parse this for us.
1312 DecimalQuantity dq;
1313 UErrorCode localStatus = U_ZERO_ERROR;
1314 dq.setToDecNumber({buffer.data(), buffer.length()}, localStatus);
1315 if (U_FAILURE(localStatus)) {
1316 // throw new SkeletonSyntaxException("Invalid rounding increment", segment, e);
1317 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1318 return;
1319 }
1320 double increment = dq.toDouble();
1321
1322 // We also need to figure out how many digits. Do a brute force string operation.
1323 int decimalOffset = 0;
1324 while (decimalOffset < segment.length() && segment.charAt(decimalOffset) != '.') {
1325 decimalOffset++;
1326 }
1327 if (decimalOffset == segment.length()) {
1328 macros.precision = Precision::increment(increment);
1329 } else {
1330 int32_t fractionLength = segment.length() - decimalOffset - 1;
1331 macros.precision = Precision::increment(increment).withMinFraction(fractionLength);
1332 }
1333}
1334
1335void blueprint_helpers::generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb,
1336 UErrorCode&) {
1337 // Utilize DecimalQuantity/double_conversion to format this for us.
1338 DecimalQuantity dq;
1339 dq.setToDouble(increment);
1340 dq.roundToInfinity();
1341 sb.append(dq.toPlainString());
1342
1343 // We might need to append extra trailing zeros for min fraction...
1344 if (trailingZeros > 0) {
1345 appendMultiple(sb, u'0', trailingZeros);
1346 }
1347}
1348
1349void blueprint_helpers::parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros,
1350 UErrorCode& status) {
1351 int32_t offset = 0;
1352 int32_t minInt = 0;
1353 int32_t maxInt;
1354 if (isWildcardChar(segment.charAt(0))) {
1355 maxInt = -1;
1356 offset++;
1357 } else {
1358 maxInt = 0;
1359 }
1360 for (; offset < segment.length(); offset++) {
1361 if (maxInt != -1 && segment.charAt(offset) == u'#') {
1362 maxInt++;
1363 } else {
1364 break;
1365 }
1366 }
1367 if (offset < segment.length()) {
1368 for (; offset < segment.length(); offset++) {
1369 if (segment.charAt(offset) == u'0') {
1370 minInt++;
1371 } else {
1372 break;
1373 }
1374 }
1375 }
1376 if (maxInt != -1) {
1377 maxInt += minInt;
1378 }
1379 if (offset < segment.length()) {
1380 // throw new SkeletonSyntaxException("Invalid integer width stem", segment);
1381 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1382 return;
1383 }
1384 // Use the public APIs to enforce bounds checking
1385 if (maxInt == -1) {
1386 macros.integerWidth = IntegerWidth::zeroFillTo(minInt);
1387 } else {
1388 macros.integerWidth = IntegerWidth::zeroFillTo(minInt).truncateAt(maxInt);
1389 }
1390}
1391
1392void blueprint_helpers::generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb,
1393 UErrorCode&) {
1394 if (maxInt == -1) {
1395 sb.append(kWildcardChar);
1396 } else {
1397 appendMultiple(sb, u'#', maxInt - minInt);
1398 }
1399 appendMultiple(sb, u'0', minInt);
1400}
1401
1402void blueprint_helpers::parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros,
1403 UErrorCode& status) {
1404 // Need to do char <-> UChar conversion...
1405 U_ASSERT(U_SUCCESS(status));
1406 CharString buffer;
1407 SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status);
1408
1409 NumberingSystem* ns = NumberingSystem::createInstanceByName(buffer.data(), status);
1410 if (ns == nullptr || U_FAILURE(status)) {
1411 // This is a skeleton syntax error; don't bubble up the low-level NumberingSystem error
1412 // throw new SkeletonSyntaxException("Unknown numbering system", segment);
1413 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1414 return;
1415 }
1416 macros.symbols.setTo(ns);
1417}
1418
1419void blueprint_helpers::generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb,
1420 UErrorCode&) {
1421 // Need to do char <-> UChar conversion...
1422 sb.append(UnicodeString(ns.getName(), -1, US_INV));
1423}
1424
1425void blueprint_helpers::parseScaleOption(const StringSegment& segment, MacroProps& macros,
1426 UErrorCode& status) {
1427 // Need to do char <-> UChar conversion...
1428 U_ASSERT(U_SUCCESS(status));
1429 CharString buffer;
1430 SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status);
1431
1432 LocalPointer<DecNum> decnum(new DecNum(), status);
1433 if (U_FAILURE(status)) { return; }
1434 decnum->setTo({buffer.data(), buffer.length()}, status);
1435 if (U_FAILURE(status)) {
1436 // This is a skeleton syntax error; don't let the low-level decnum error bubble up
1437 status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1438 return;
1439 }
1440
1441 // NOTE: The constructor will optimize the decnum for us if possible.
1442 macros.scale = {0, decnum.orphan()};
1443}
1444
1445void blueprint_helpers::generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
1446 UErrorCode& status) {
1447 // Utilize DecimalQuantity/double_conversion to format this for us.
1448 DecimalQuantity dq;
1449 if (arbitrary != nullptr) {
1450 dq.setToDecNum(*arbitrary, status);
1451 if (U_FAILURE(status)) { return; }
1452 } else {
1453 dq.setToInt(1);
1454 }
1455 dq.adjustMagnitude(magnitude);
1456 dq.roundToInfinity();
1457 sb.append(dq.toPlainString());
1458}
1459
1460
1461bool GeneratorHelpers::notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1462 if (macros.notation.fType == Notation::NTN_COMPACT) {
1463 UNumberCompactStyle style = macros.notation.fUnion.compactStyle;
1464 if (style == UNumberCompactStyle::UNUM_LONG) {
1465 sb.append(u"compact-long", -1);
1466 return true;
1467 } else if (style == UNumberCompactStyle::UNUM_SHORT) {
1468 sb.append(u"compact-short", -1);
1469 return true;
1470 } else {
1471 // Compact notation generated from custom data (not supported in skeleton)
1472 // The other compact notations are literals
1473 status = U_UNSUPPORTED_ERROR;
1474 return false;
1475 }
1476 } else if (macros.notation.fType == Notation::NTN_SCIENTIFIC) {
1477 const Notation::ScientificSettings& impl = macros.notation.fUnion.scientific;
1478 if (impl.fEngineeringInterval == 3) {
1479 sb.append(u"engineering", -1);
1480 } else {
1481 sb.append(u"scientific", -1);
1482 }
1483 if (impl.fMinExponentDigits > 1) {
1484 sb.append(u'/');
1485 blueprint_helpers::generateExponentWidthOption(impl.fMinExponentDigits, sb, status);
1486 if (U_FAILURE(status)) {
1487 return false;
1488 }
1489 }
1490 if (impl.fExponentSignDisplay != UNUM_SIGN_AUTO) {
1491 sb.append(u'/');
1492 enum_to_stem_string::signDisplay(impl.fExponentSignDisplay, sb);
1493 }
1494 return true;
1495 } else {
1496 // Default value is not shown in normalized form
1497 return false;
1498 }
1499}
1500
1501bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1502 if (utils::unitIsCurrency(macros.unit)) {
1503 sb.append(u"currency/", -1);
1504 CurrencyUnit currency(macros.unit, status);
1505 if (U_FAILURE(status)) {
1506 return false;
1507 }
1508 blueprint_helpers::generateCurrencyOption(currency, sb, status);
1509 return true;
1510 } else if (utils::unitIsNoUnit(macros.unit)) {
1511 if (utils::unitIsPercent(macros.unit)) {
1512 sb.append(u"percent", -1);
1513 return true;
1514 } else if (utils::unitIsPermille(macros.unit)) {
1515 sb.append(u"permille", -1);
1516 return true;
1517 } else {
1518 // Default value is not shown in normalized form
1519 return false;
1520 }
1521 } else {
1522 sb.append(u"measure-unit/", -1);
1523 blueprint_helpers::generateMeasureUnitOption(macros.unit, sb, status);
1524 return true;
1525 }
1526}
1527
1528bool GeneratorHelpers::perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1529 // Per-units are currently expected to be only MeasureUnits.
1530 if (utils::unitIsNoUnit(macros.perUnit)) {
1531 if (utils::unitIsPercent(macros.perUnit) || utils::unitIsPermille(macros.perUnit)) {
1532 status = U_UNSUPPORTED_ERROR;
1533 return false;
1534 } else {
1535 // Default value: ok to ignore
1536 return false;
1537 }
1538 } else if (utils::unitIsCurrency(macros.perUnit)) {
1539 status = U_UNSUPPORTED_ERROR;
1540 return false;
1541 } else {
1542 sb.append(u"per-measure-unit/", -1);
1543 blueprint_helpers::generateMeasureUnitOption(macros.perUnit, sb, status);
1544 return true;
1545 }
1546}
1547
1548bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1549 if (macros.precision.fType == Precision::RND_NONE) {
1550 sb.append(u"precision-unlimited", -1);
1551 } else if (macros.precision.fType == Precision::RND_FRACTION) {
1552 const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig;
1553 blueprint_helpers::generateFractionStem(impl.fMinFrac, impl.fMaxFrac, sb, status);
1554 } else if (macros.precision.fType == Precision::RND_SIGNIFICANT) {
1555 const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig;
1556 blueprint_helpers::generateDigitsStem(impl.fMinSig, impl.fMaxSig, sb, status);
1557 } else if (macros.precision.fType == Precision::RND_FRACTION_SIGNIFICANT) {
1558 const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig;
1559 blueprint_helpers::generateFractionStem(impl.fMinFrac, impl.fMaxFrac, sb, status);
1560 sb.append(u'/');
1561 if (impl.fMinSig == -1) {
1562 blueprint_helpers::generateDigitsStem(1, impl.fMaxSig, sb, status);
1563 } else {
1564 blueprint_helpers::generateDigitsStem(impl.fMinSig, -1, sb, status);
1565 }
1566 } else if (macros.precision.fType == Precision::RND_INCREMENT
1567 || macros.precision.fType == Precision::RND_INCREMENT_ONE
1568 || macros.precision.fType == Precision::RND_INCREMENT_FIVE) {
1569 const Precision::IncrementSettings& impl = macros.precision.fUnion.increment;
1570 sb.append(u"precision-increment/", -1);
1571 blueprint_helpers::generateIncrementOption(
1572 impl.fIncrement,
1573 impl.fMinFrac - impl.fMaxFrac,
1574 sb,
1575 status);
1576 } else if (macros.precision.fType == Precision::RND_CURRENCY) {
1577 UCurrencyUsage usage = macros.precision.fUnion.currencyUsage;
1578 if (usage == UCURR_USAGE_STANDARD) {
1579 sb.append(u"precision-currency-standard", -1);
1580 } else {
1581 sb.append(u"precision-currency-cash", -1);
1582 }
1583 } else {
1584 // Bogus or Error
1585 return false;
1586 }
1587
1588 // NOTE: Always return true for rounding because the default value depends on other options.
1589 return true;
1590}
1591
1592bool GeneratorHelpers::roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode&) {
1593 if (macros.roundingMode == kDefaultMode) {
1594 return false; // Default
1595 }
1596 enum_to_stem_string::roundingMode(macros.roundingMode, sb);
1597 return true;
1598}
1599
1600bool GeneratorHelpers::grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1601 if (macros.grouper.isBogus()) {
1602 return false; // No value
1603 } else if (macros.grouper.fStrategy == UNUM_GROUPING_COUNT) {
1604 status = U_UNSUPPORTED_ERROR;
1605 return false;
1606 } else if (macros.grouper.fStrategy == UNUM_GROUPING_AUTO) {
1607 return false; // Default value
1608 } else {
1609 enum_to_stem_string::groupingStrategy(macros.grouper.fStrategy, sb);
1610 return true;
1611 }
1612}
1613
1614bool GeneratorHelpers::integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1615 if (macros.integerWidth.fHasError || macros.integerWidth.isBogus() ||
1616 macros.integerWidth == IntegerWidth::standard()) {
1617 // Error or Default
1618 return false;
1619 }
1620 sb.append(u"integer-width/", -1);
1621 blueprint_helpers::generateIntegerWidthOption(
1622 macros.integerWidth.fUnion.minMaxInt.fMinInt,
1623 macros.integerWidth.fUnion.minMaxInt.fMaxInt,
1624 sb,
1625 status);
1626 return true;
1627}
1628
1629bool GeneratorHelpers::symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1630 if (macros.symbols.isNumberingSystem()) {
1631 const NumberingSystem& ns = *macros.symbols.getNumberingSystem();
1632 if (uprv_strcmp(ns.getName(), "latn") == 0) {
1633 sb.append(u"latin", -1);
1634 } else {
1635 sb.append(u"numbering-system/", -1);
1636 blueprint_helpers::generateNumberingSystemOption(ns, sb, status);
1637 }
1638 return true;
1639 } else if (macros.symbols.isDecimalFormatSymbols()) {
1640 status = U_UNSUPPORTED_ERROR;
1641 return false;
1642 } else {
1643 // No custom symbols
1644 return false;
1645 }
1646}
1647
1648bool GeneratorHelpers::unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode&) {
1649 if (macros.unitWidth == UNUM_UNIT_WIDTH_SHORT || macros.unitWidth == UNUM_UNIT_WIDTH_COUNT) {
1650 return false; // Default or Bogus
1651 }
1652 enum_to_stem_string::unitWidth(macros.unitWidth, sb);
1653 return true;
1654}
1655
1656bool GeneratorHelpers::sign(const MacroProps& macros, UnicodeString& sb, UErrorCode&) {
1657 if (macros.sign == UNUM_SIGN_AUTO || macros.sign == UNUM_SIGN_COUNT) {
1658 return false; // Default or Bogus
1659 }
1660 enum_to_stem_string::signDisplay(macros.sign, sb);
1661 return true;
1662}
1663
1664bool GeneratorHelpers::decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode&) {
1665 if (macros.decimal == UNUM_DECIMAL_SEPARATOR_AUTO || macros.decimal == UNUM_DECIMAL_SEPARATOR_COUNT) {
1666 return false; // Default or Bogus
1667 }
1668 enum_to_stem_string::decimalSeparatorDisplay(macros.decimal, sb);
1669 return true;
1670}
1671
1672bool GeneratorHelpers::scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1673 if (!macros.scale.isValid()) {
1674 return false; // Default or Bogus
1675 }
1676 sb.append(u"scale/", -1);
1677 blueprint_helpers::generateScaleOption(
1678 macros.scale.fMagnitude,
1679 macros.scale.fArbitrary,
1680 sb,
1681 status);
1682 return true;
1683}
1684
1685
1686// Definitions of public API methods (put here for dependency disentanglement)
1687
1688#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER)
1689// Ignore MSVC warning 4661. This is generated for NumberFormatterSettings<>::toSkeleton() as this method
1690// is defined elsewhere (in number_skeletons.cpp). The compiler is warning that the explicit template instantiation
1691// inside this single translation unit (CPP file) is incomplete, and thus it isn't sure if the template class is
1692// fully defined. However, since each translation unit explicitly instantiates all the necessary template classes,
1693// they will all be passed to the linker, and the linker will still find and export all the class members.
1694#pragma warning(push)
1695#pragma warning(disable: 4661)
1696#endif
1697
1698template<typename Derived>
1699UnicodeString NumberFormatterSettings<Derived>::toSkeleton(UErrorCode& status) const {
1700 if (U_FAILURE(status)) {
1701 return ICU_Utility::makeBogusString();
1702 }
1703 if (fMacros.copyErrorTo(status)) {
1704 return ICU_Utility::makeBogusString();
1705 }
1706 return skeleton::generate(fMacros, status);
1707}
1708
1709// Declare all classes that implement NumberFormatterSettings
1710// See https://stackoverflow.com/a/495056/1407170
1711template
1712class icu::number::NumberFormatterSettings<icu::number::UnlocalizedNumberFormatter>;
1713template
1714class icu::number::NumberFormatterSettings<icu::number::LocalizedNumberFormatter>;
1715
1716UnlocalizedNumberFormatter
1717NumberFormatter::forSkeleton(const UnicodeString& skeleton, UErrorCode& status) {
1718 return skeleton::create(skeleton, nullptr, status);
1719}
1720
1721UnlocalizedNumberFormatter
1722NumberFormatter::forSkeleton(const UnicodeString& skeleton, UParseError& perror, UErrorCode& status) {
1723 return skeleton::create(skeleton, &perror, status);
1724}
1725
1726#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER)
1727// Warning 4661.
1728#pragma warning(pop)
1729#endif
1730
1731#endif /* #if !UCONFIG_NO_FORMATTING */
1732