1 | // © 2018 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | #include "unicode/utypes.h" |
5 | |
6 | #if !UCONFIG_NO_FORMATTING |
7 | |
8 | // Allow implicit conversion from char16_t* to UnicodeString for this file: |
9 | // Helpful in toString methods and elsewhere. |
10 | #define UNISTR_FROM_STRING_EXPLICIT |
11 | |
12 | #include "number_decnum.h" |
13 | #include "number_skeletons.h" |
14 | #include "umutex.h" |
15 | #include "ucln_in.h" |
16 | #include "patternprops.h" |
17 | #include "unicode/ucharstriebuilder.h" |
18 | #include "number_utils.h" |
19 | #include "number_decimalquantity.h" |
20 | #include "unicode/numberformatter.h" |
21 | #include "uinvchar.h" |
22 | #include "charstr.h" |
23 | #include "string_segment.h" |
24 | |
25 | using namespace icu; |
26 | using namespace icu::number; |
27 | using namespace icu::number::impl; |
28 | using namespace icu::number::impl::skeleton; |
29 | |
30 | namespace { |
31 | |
32 | icu::UInitOnce gNumberSkeletonsInitOnce = U_INITONCE_INITIALIZER; |
33 | |
34 | char16_t* kSerializedStemTrie = nullptr; |
35 | |
36 | UBool U_CALLCONV cleanupNumberSkeletons() { |
37 | uprv_free(kSerializedStemTrie); |
38 | kSerializedStemTrie = nullptr; |
39 | gNumberSkeletonsInitOnce.reset(); |
40 | return TRUE; |
41 | } |
42 | |
43 | void U_CALLCONV initNumberSkeletons(UErrorCode& status) { |
44 | ucln_i18n_registerCleanup(UCLN_I18N_NUMBER_SKELETONS, cleanupNumberSkeletons); |
45 | |
46 | UCharsTrieBuilder b(status); |
47 | if (U_FAILURE(status)) { return; } |
48 | |
49 | // Section 1: |
50 | b.add(u"compact-short" , STEM_COMPACT_SHORT, status); |
51 | b.add(u"compact-long" , STEM_COMPACT_LONG, status); |
52 | b.add(u"scientific" , STEM_SCIENTIFIC, status); |
53 | b.add(u"engineering" , STEM_ENGINEERING, status); |
54 | b.add(u"notation-simple" , STEM_NOTATION_SIMPLE, status); |
55 | b.add(u"base-unit" , STEM_BASE_UNIT, status); |
56 | b.add(u"percent" , STEM_PERCENT, status); |
57 | b.add(u"permille" , STEM_PERMILLE, status); |
58 | b.add(u"precision-integer" , STEM_PRECISION_INTEGER, status); |
59 | b.add(u"precision-unlimited" , STEM_PRECISION_UNLIMITED, status); |
60 | b.add(u"precision-currency-standard" , STEM_PRECISION_CURRENCY_STANDARD, status); |
61 | b.add(u"precision-currency-cash" , STEM_PRECISION_CURRENCY_CASH, status); |
62 | b.add(u"rounding-mode-ceiling" , STEM_ROUNDING_MODE_CEILING, status); |
63 | b.add(u"rounding-mode-floor" , STEM_ROUNDING_MODE_FLOOR, status); |
64 | b.add(u"rounding-mode-down" , STEM_ROUNDING_MODE_DOWN, status); |
65 | b.add(u"rounding-mode-up" , STEM_ROUNDING_MODE_UP, status); |
66 | b.add(u"rounding-mode-half-even" , STEM_ROUNDING_MODE_HALF_EVEN, status); |
67 | b.add(u"rounding-mode-half-down" , STEM_ROUNDING_MODE_HALF_DOWN, status); |
68 | b.add(u"rounding-mode-half-up" , STEM_ROUNDING_MODE_HALF_UP, status); |
69 | b.add(u"rounding-mode-unnecessary" , STEM_ROUNDING_MODE_UNNECESSARY, status); |
70 | b.add(u"group-off" , STEM_GROUP_OFF, status); |
71 | b.add(u"group-min2" , STEM_GROUP_MIN2, status); |
72 | b.add(u"group-auto" , STEM_GROUP_AUTO, status); |
73 | b.add(u"group-on-aligned" , STEM_GROUP_ON_ALIGNED, status); |
74 | b.add(u"group-thousands" , STEM_GROUP_THOUSANDS, status); |
75 | b.add(u"latin" , STEM_LATIN, status); |
76 | b.add(u"unit-width-narrow" , STEM_UNIT_WIDTH_NARROW, status); |
77 | b.add(u"unit-width-short" , STEM_UNIT_WIDTH_SHORT, status); |
78 | b.add(u"unit-width-full-name" , STEM_UNIT_WIDTH_FULL_NAME, status); |
79 | b.add(u"unit-width-iso-code" , STEM_UNIT_WIDTH_ISO_CODE, status); |
80 | b.add(u"unit-width-hidden" , STEM_UNIT_WIDTH_HIDDEN, status); |
81 | b.add(u"sign-auto" , STEM_SIGN_AUTO, status); |
82 | b.add(u"sign-always" , STEM_SIGN_ALWAYS, status); |
83 | b.add(u"sign-never" , STEM_SIGN_NEVER, status); |
84 | b.add(u"sign-accounting" , STEM_SIGN_ACCOUNTING, status); |
85 | b.add(u"sign-accounting-always" , STEM_SIGN_ACCOUNTING_ALWAYS, status); |
86 | b.add(u"sign-except-zero" , STEM_SIGN_EXCEPT_ZERO, status); |
87 | b.add(u"sign-accounting-except-zero" , STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, status); |
88 | b.add(u"decimal-auto" , STEM_DECIMAL_AUTO, status); |
89 | b.add(u"decimal-always" , STEM_DECIMAL_ALWAYS, status); |
90 | if (U_FAILURE(status)) { return; } |
91 | |
92 | // Section 2: |
93 | b.add(u"precision-increment" , STEM_PRECISION_INCREMENT, status); |
94 | b.add(u"measure-unit" , STEM_MEASURE_UNIT, status); |
95 | b.add(u"per-measure-unit" , STEM_PER_MEASURE_UNIT, status); |
96 | b.add(u"currency" , STEM_CURRENCY, status); |
97 | b.add(u"integer-width" , STEM_INTEGER_WIDTH, status); |
98 | b.add(u"numbering-system" , STEM_NUMBERING_SYSTEM, status); |
99 | b.add(u"scale" , STEM_SCALE, status); |
100 | if (U_FAILURE(status)) { return; } |
101 | |
102 | // Build the CharsTrie |
103 | // TODO: Use SLOW or FAST here? |
104 | UnicodeString result; |
105 | b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status); |
106 | if (U_FAILURE(status)) { return; } |
107 | |
108 | // Copy the result into the global constant pointer |
109 | size_t numBytes = result.length() * sizeof(char16_t); |
110 | kSerializedStemTrie = static_cast<char16_t*>(uprv_malloc(numBytes)); |
111 | uprv_memcpy(kSerializedStemTrie, result.getBuffer(), numBytes); |
112 | } |
113 | |
114 | |
115 | inline void appendMultiple(UnicodeString& sb, UChar32 cp, int32_t count) { |
116 | for (int i = 0; i < count; i++) { |
117 | sb.append(cp); |
118 | } |
119 | } |
120 | |
121 | |
122 | #define CHECK_NULL(seen, field, status) (void)(seen); /* for auto-format line wrapping */ \ |
123 | UPRV_BLOCK_MACRO_BEGIN { \ |
124 | if ((seen).field) { \ |
125 | (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \ |
126 | return STATE_NULL; \ |
127 | } \ |
128 | (seen).field = true; \ |
129 | } UPRV_BLOCK_MACRO_END |
130 | |
131 | |
132 | #define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \ |
133 | UPRV_BLOCK_MACRO_BEGIN { \ |
134 | UErrorCode conversionStatus = U_ZERO_ERROR; \ |
135 | (dest).appendInvariantChars({FALSE, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \ |
136 | if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \ |
137 | /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \ |
138 | (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \ |
139 | return; \ |
140 | } else if (U_FAILURE(conversionStatus)) { \ |
141 | (status) = conversionStatus; \ |
142 | return; \ |
143 | } \ |
144 | } UPRV_BLOCK_MACRO_END |
145 | |
146 | |
147 | } // anonymous namespace |
148 | |
149 | |
150 | Notation stem_to_object::(skeleton::StemEnum stem) { |
151 | switch (stem) { |
152 | case STEM_COMPACT_SHORT: |
153 | return Notation::compactShort(); |
154 | case STEM_COMPACT_LONG: |
155 | return Notation::compactLong(); |
156 | case STEM_SCIENTIFIC: |
157 | return Notation::scientific(); |
158 | case STEM_ENGINEERING: |
159 | return Notation::engineering(); |
160 | case STEM_NOTATION_SIMPLE: |
161 | return Notation::simple(); |
162 | default: |
163 | UPRV_UNREACHABLE; |
164 | } |
165 | } |
166 | |
167 | MeasureUnit stem_to_object::(skeleton::StemEnum stem) { |
168 | switch (stem) { |
169 | case STEM_BASE_UNIT: |
170 | // Slicing is okay |
171 | return NoUnit::base(); // NOLINT |
172 | case STEM_PERCENT: |
173 | // Slicing is okay |
174 | return NoUnit::percent(); // NOLINT |
175 | case STEM_PERMILLE: |
176 | // Slicing is okay |
177 | return NoUnit::permille(); // NOLINT |
178 | default: |
179 | UPRV_UNREACHABLE; |
180 | } |
181 | } |
182 | |
183 | Precision stem_to_object::(skeleton::StemEnum stem) { |
184 | switch (stem) { |
185 | case STEM_PRECISION_INTEGER: |
186 | return Precision::integer(); |
187 | case STEM_PRECISION_UNLIMITED: |
188 | return Precision::unlimited(); |
189 | case STEM_PRECISION_CURRENCY_STANDARD: |
190 | return Precision::currency(UCURR_USAGE_STANDARD); |
191 | case STEM_PRECISION_CURRENCY_CASH: |
192 | return Precision::currency(UCURR_USAGE_CASH); |
193 | default: |
194 | UPRV_UNREACHABLE; |
195 | } |
196 | } |
197 | |
198 | UNumberFormatRoundingMode stem_to_object::(skeleton::StemEnum stem) { |
199 | switch (stem) { |
200 | case STEM_ROUNDING_MODE_CEILING: |
201 | return UNUM_ROUND_CEILING; |
202 | case STEM_ROUNDING_MODE_FLOOR: |
203 | return UNUM_ROUND_FLOOR; |
204 | case STEM_ROUNDING_MODE_DOWN: |
205 | return UNUM_ROUND_DOWN; |
206 | case STEM_ROUNDING_MODE_UP: |
207 | return UNUM_ROUND_UP; |
208 | case STEM_ROUNDING_MODE_HALF_EVEN: |
209 | return UNUM_ROUND_HALFEVEN; |
210 | case STEM_ROUNDING_MODE_HALF_DOWN: |
211 | return UNUM_ROUND_HALFDOWN; |
212 | case STEM_ROUNDING_MODE_HALF_UP: |
213 | return UNUM_ROUND_HALFUP; |
214 | case STEM_ROUNDING_MODE_UNNECESSARY: |
215 | return UNUM_ROUND_UNNECESSARY; |
216 | default: |
217 | UPRV_UNREACHABLE; |
218 | } |
219 | } |
220 | |
221 | UNumberGroupingStrategy stem_to_object::(skeleton::StemEnum stem) { |
222 | switch (stem) { |
223 | case STEM_GROUP_OFF: |
224 | return UNUM_GROUPING_OFF; |
225 | case STEM_GROUP_MIN2: |
226 | return UNUM_GROUPING_MIN2; |
227 | case STEM_GROUP_AUTO: |
228 | return UNUM_GROUPING_AUTO; |
229 | case STEM_GROUP_ON_ALIGNED: |
230 | return UNUM_GROUPING_ON_ALIGNED; |
231 | case STEM_GROUP_THOUSANDS: |
232 | return UNUM_GROUPING_THOUSANDS; |
233 | default: |
234 | return UNUM_GROUPING_COUNT; // for objects, throw; for enums, return COUNT |
235 | } |
236 | } |
237 | |
238 | UNumberUnitWidth stem_to_object::(skeleton::StemEnum stem) { |
239 | switch (stem) { |
240 | case STEM_UNIT_WIDTH_NARROW: |
241 | return UNUM_UNIT_WIDTH_NARROW; |
242 | case STEM_UNIT_WIDTH_SHORT: |
243 | return UNUM_UNIT_WIDTH_SHORT; |
244 | case STEM_UNIT_WIDTH_FULL_NAME: |
245 | return UNUM_UNIT_WIDTH_FULL_NAME; |
246 | case STEM_UNIT_WIDTH_ISO_CODE: |
247 | return UNUM_UNIT_WIDTH_ISO_CODE; |
248 | case STEM_UNIT_WIDTH_HIDDEN: |
249 | return UNUM_UNIT_WIDTH_HIDDEN; |
250 | default: |
251 | return UNUM_UNIT_WIDTH_COUNT; // for objects, throw; for enums, return COUNT |
252 | } |
253 | } |
254 | |
255 | UNumberSignDisplay stem_to_object::(skeleton::StemEnum stem) { |
256 | switch (stem) { |
257 | case STEM_SIGN_AUTO: |
258 | return UNUM_SIGN_AUTO; |
259 | case STEM_SIGN_ALWAYS: |
260 | return UNUM_SIGN_ALWAYS; |
261 | case STEM_SIGN_NEVER: |
262 | return UNUM_SIGN_NEVER; |
263 | case STEM_SIGN_ACCOUNTING: |
264 | return UNUM_SIGN_ACCOUNTING; |
265 | case STEM_SIGN_ACCOUNTING_ALWAYS: |
266 | return UNUM_SIGN_ACCOUNTING_ALWAYS; |
267 | case STEM_SIGN_EXCEPT_ZERO: |
268 | return UNUM_SIGN_EXCEPT_ZERO; |
269 | case STEM_SIGN_ACCOUNTING_EXCEPT_ZERO: |
270 | return UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO; |
271 | default: |
272 | return UNUM_SIGN_COUNT; // for objects, throw; for enums, return COUNT |
273 | } |
274 | } |
275 | |
276 | UNumberDecimalSeparatorDisplay stem_to_object::(skeleton::StemEnum stem) { |
277 | switch (stem) { |
278 | case STEM_DECIMAL_AUTO: |
279 | return UNUM_DECIMAL_SEPARATOR_AUTO; |
280 | case STEM_DECIMAL_ALWAYS: |
281 | return UNUM_DECIMAL_SEPARATOR_ALWAYS; |
282 | default: |
283 | return UNUM_DECIMAL_SEPARATOR_COUNT; // for objects, throw; for enums, return COUNT |
284 | } |
285 | } |
286 | |
287 | |
288 | void enum_to_stem_string::roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb) { |
289 | switch (value) { |
290 | case UNUM_ROUND_CEILING: |
291 | sb.append(u"rounding-mode-ceiling" , -1); |
292 | break; |
293 | case UNUM_ROUND_FLOOR: |
294 | sb.append(u"rounding-mode-floor" , -1); |
295 | break; |
296 | case UNUM_ROUND_DOWN: |
297 | sb.append(u"rounding-mode-down" , -1); |
298 | break; |
299 | case UNUM_ROUND_UP: |
300 | sb.append(u"rounding-mode-up" , -1); |
301 | break; |
302 | case UNUM_ROUND_HALFEVEN: |
303 | sb.append(u"rounding-mode-half-even" , -1); |
304 | break; |
305 | case UNUM_ROUND_HALFDOWN: |
306 | sb.append(u"rounding-mode-half-down" , -1); |
307 | break; |
308 | case UNUM_ROUND_HALFUP: |
309 | sb.append(u"rounding-mode-half-up" , -1); |
310 | break; |
311 | case UNUM_ROUND_UNNECESSARY: |
312 | sb.append(u"rounding-mode-unnecessary" , -1); |
313 | break; |
314 | default: |
315 | UPRV_UNREACHABLE; |
316 | } |
317 | } |
318 | |
319 | void enum_to_stem_string::groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb) { |
320 | switch (value) { |
321 | case UNUM_GROUPING_OFF: |
322 | sb.append(u"group-off" , -1); |
323 | break; |
324 | case UNUM_GROUPING_MIN2: |
325 | sb.append(u"group-min2" , -1); |
326 | break; |
327 | case UNUM_GROUPING_AUTO: |
328 | sb.append(u"group-auto" , -1); |
329 | break; |
330 | case UNUM_GROUPING_ON_ALIGNED: |
331 | sb.append(u"group-on-aligned" , -1); |
332 | break; |
333 | case UNUM_GROUPING_THOUSANDS: |
334 | sb.append(u"group-thousands" , -1); |
335 | break; |
336 | default: |
337 | UPRV_UNREACHABLE; |
338 | } |
339 | } |
340 | |
341 | void enum_to_stem_string::unitWidth(UNumberUnitWidth value, UnicodeString& sb) { |
342 | switch (value) { |
343 | case UNUM_UNIT_WIDTH_NARROW: |
344 | sb.append(u"unit-width-narrow" , -1); |
345 | break; |
346 | case UNUM_UNIT_WIDTH_SHORT: |
347 | sb.append(u"unit-width-short" , -1); |
348 | break; |
349 | case UNUM_UNIT_WIDTH_FULL_NAME: |
350 | sb.append(u"unit-width-full-name" , -1); |
351 | break; |
352 | case UNUM_UNIT_WIDTH_ISO_CODE: |
353 | sb.append(u"unit-width-iso-code" , -1); |
354 | break; |
355 | case UNUM_UNIT_WIDTH_HIDDEN: |
356 | sb.append(u"unit-width-hidden" , -1); |
357 | break; |
358 | default: |
359 | UPRV_UNREACHABLE; |
360 | } |
361 | } |
362 | |
363 | void enum_to_stem_string::signDisplay(UNumberSignDisplay value, UnicodeString& sb) { |
364 | switch (value) { |
365 | case UNUM_SIGN_AUTO: |
366 | sb.append(u"sign-auto" , -1); |
367 | break; |
368 | case UNUM_SIGN_ALWAYS: |
369 | sb.append(u"sign-always" , -1); |
370 | break; |
371 | case UNUM_SIGN_NEVER: |
372 | sb.append(u"sign-never" , -1); |
373 | break; |
374 | case UNUM_SIGN_ACCOUNTING: |
375 | sb.append(u"sign-accounting" , -1); |
376 | break; |
377 | case UNUM_SIGN_ACCOUNTING_ALWAYS: |
378 | sb.append(u"sign-accounting-always" , -1); |
379 | break; |
380 | case UNUM_SIGN_EXCEPT_ZERO: |
381 | sb.append(u"sign-except-zero" , -1); |
382 | break; |
383 | case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO: |
384 | sb.append(u"sign-accounting-except-zero" , -1); |
385 | break; |
386 | default: |
387 | UPRV_UNREACHABLE; |
388 | } |
389 | } |
390 | |
391 | void |
392 | enum_to_stem_string::decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb) { |
393 | switch (value) { |
394 | case UNUM_DECIMAL_SEPARATOR_AUTO: |
395 | sb.append(u"decimal-auto" , -1); |
396 | break; |
397 | case UNUM_DECIMAL_SEPARATOR_ALWAYS: |
398 | sb.append(u"decimal-always" , -1); |
399 | break; |
400 | default: |
401 | UPRV_UNREACHABLE; |
402 | } |
403 | } |
404 | |
405 | |
406 | UnlocalizedNumberFormatter skeleton::create( |
407 | const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status) { |
408 | |
409 | // Initialize perror |
410 | if (perror != nullptr) { |
411 | perror->line = 0; |
412 | perror->offset = -1; |
413 | perror->preContext[0] = 0; |
414 | perror->postContext[0] = 0; |
415 | } |
416 | |
417 | umtx_initOnce(gNumberSkeletonsInitOnce, &initNumberSkeletons, status); |
418 | if (U_FAILURE(status)) { |
419 | return {}; |
420 | } |
421 | |
422 | int32_t errOffset; |
423 | MacroProps macros = parseSkeleton(skeletonString, errOffset, status); |
424 | if (U_SUCCESS(status)) { |
425 | return NumberFormatter::with().macros(macros); |
426 | } |
427 | |
428 | if (perror == nullptr) { |
429 | return {}; |
430 | } |
431 | |
432 | // Populate the UParseError with the error location |
433 | perror->offset = errOffset; |
434 | int32_t contextStart = uprv_max(0, errOffset - U_PARSE_CONTEXT_LEN + 1); |
435 | int32_t contextEnd = uprv_min(skeletonString.length(), errOffset + U_PARSE_CONTEXT_LEN - 1); |
436 | skeletonString.extract(contextStart, errOffset - contextStart, perror->preContext, 0); |
437 | perror->preContext[errOffset - contextStart] = 0; |
438 | skeletonString.extract(errOffset, contextEnd - errOffset, perror->postContext, 0); |
439 | perror->postContext[contextEnd - errOffset] = 0; |
440 | return {}; |
441 | } |
442 | |
443 | UnicodeString skeleton::generate(const MacroProps& macros, UErrorCode& status) { |
444 | umtx_initOnce(gNumberSkeletonsInitOnce, &initNumberSkeletons, status); |
445 | UnicodeString sb; |
446 | GeneratorHelpers::generateSkeleton(macros, sb, status); |
447 | return sb; |
448 | } |
449 | |
450 | MacroProps skeleton::parseSkeleton( |
451 | const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status) { |
452 | U_ASSERT(U_SUCCESS(status)); |
453 | |
454 | // Add a trailing whitespace to the end of the skeleton string to make code cleaner. |
455 | UnicodeString tempSkeletonString(skeletonString); |
456 | tempSkeletonString.append(u' '); |
457 | |
458 | SeenMacroProps seen; |
459 | MacroProps macros; |
460 | StringSegment segment(tempSkeletonString, false); |
461 | UCharsTrie stemTrie(kSerializedStemTrie); |
462 | ParseState stem = STATE_NULL; |
463 | int32_t offset = 0; |
464 | |
465 | // Primary skeleton parse loop: |
466 | while (offset < segment.length()) { |
467 | UChar32 cp = segment.codePointAt(offset); |
468 | bool isTokenSeparator = PatternProps::isWhiteSpace(cp); |
469 | bool isOptionSeparator = (cp == u'/'); |
470 | |
471 | if (!isTokenSeparator && !isOptionSeparator) { |
472 | // Non-separator token; consume it. |
473 | offset += U16_LENGTH(cp); |
474 | if (stem == STATE_NULL) { |
475 | // We are currently consuming a stem. |
476 | // Go to the next state in the stem trie. |
477 | stemTrie.nextForCodePoint(cp); |
478 | } |
479 | continue; |
480 | } |
481 | |
482 | // We are looking at a token or option separator. |
483 | // If the segment is nonempty, parse it and reset the segment. |
484 | // Otherwise, make sure it is a valid repeating separator. |
485 | if (offset != 0) { |
486 | segment.setLength(offset); |
487 | if (stem == STATE_NULL) { |
488 | // The first separator after the start of a token. Parse it as a stem. |
489 | stem = parseStem(segment, stemTrie, seen, macros, status); |
490 | stemTrie.reset(); |
491 | } else { |
492 | // A separator after the first separator of a token. Parse it as an option. |
493 | stem = parseOption(stem, segment, macros, status); |
494 | } |
495 | segment.resetLength(); |
496 | if (U_FAILURE(status)) { |
497 | errOffset = segment.getOffset(); |
498 | return macros; |
499 | } |
500 | |
501 | // Consume the segment: |
502 | segment.adjustOffset(offset); |
503 | offset = 0; |
504 | |
505 | } else if (stem != STATE_NULL) { |
506 | // A separator ('/' or whitespace) following an option separator ('/') |
507 | // segment.setLength(U16_LENGTH(cp)); // for error message |
508 | // throw new SkeletonSyntaxException("Unexpected separator character", segment); |
509 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
510 | errOffset = segment.getOffset(); |
511 | return macros; |
512 | |
513 | } else { |
514 | // Two spaces in a row; this is OK. |
515 | } |
516 | |
517 | // Does the current stem forbid options? |
518 | if (isOptionSeparator && stem == STATE_NULL) { |
519 | // segment.setLength(U16_LENGTH(cp)); // for error message |
520 | // throw new SkeletonSyntaxException("Unexpected option separator", segment); |
521 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
522 | errOffset = segment.getOffset(); |
523 | return macros; |
524 | } |
525 | |
526 | // Does the current stem require an option? |
527 | if (isTokenSeparator && stem != STATE_NULL) { |
528 | switch (stem) { |
529 | case STATE_INCREMENT_PRECISION: |
530 | case STATE_MEASURE_UNIT: |
531 | case STATE_PER_MEASURE_UNIT: |
532 | case STATE_CURRENCY_UNIT: |
533 | case STATE_INTEGER_WIDTH: |
534 | case STATE_NUMBERING_SYSTEM: |
535 | case STATE_SCALE: |
536 | // segment.setLength(U16_LENGTH(cp)); // for error message |
537 | // throw new SkeletonSyntaxException("Stem requires an option", segment); |
538 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
539 | errOffset = segment.getOffset(); |
540 | return macros; |
541 | default: |
542 | break; |
543 | } |
544 | stem = STATE_NULL; |
545 | } |
546 | |
547 | // Consume the separator: |
548 | segment.adjustOffset(U16_LENGTH(cp)); |
549 | } |
550 | U_ASSERT(stem == STATE_NULL); |
551 | return macros; |
552 | } |
553 | |
554 | ParseState |
555 | skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen, |
556 | MacroProps& macros, UErrorCode& status) { |
557 | // First check for "blueprint" stems, which start with a "signal char" |
558 | switch (segment.charAt(0)) { |
559 | case u'.': |
560 | CHECK_NULL(seen, precision, status); |
561 | blueprint_helpers::parseFractionStem(segment, macros, status); |
562 | return STATE_FRACTION_PRECISION; |
563 | case u'@': |
564 | CHECK_NULL(seen, precision, status); |
565 | blueprint_helpers::parseDigitsStem(segment, macros, status); |
566 | return STATE_NULL; |
567 | default: |
568 | break; |
569 | } |
570 | |
571 | // Now look at the stemsTrie, which is already be pointing at our stem. |
572 | UStringTrieResult stemResult = stemTrie.current(); |
573 | |
574 | if (stemResult != USTRINGTRIE_INTERMEDIATE_VALUE && stemResult != USTRINGTRIE_FINAL_VALUE) { |
575 | // throw new SkeletonSyntaxException("Unknown stem", segment); |
576 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
577 | return STATE_NULL; |
578 | } |
579 | |
580 | auto stem = static_cast<StemEnum>(stemTrie.getValue()); |
581 | switch (stem) { |
582 | |
583 | // Stems with meaning on their own, not requiring an option: |
584 | |
585 | case STEM_COMPACT_SHORT: |
586 | case STEM_COMPACT_LONG: |
587 | case STEM_SCIENTIFIC: |
588 | case STEM_ENGINEERING: |
589 | case STEM_NOTATION_SIMPLE: |
590 | CHECK_NULL(seen, notation, status); |
591 | macros.notation = stem_to_object::notation(stem); |
592 | switch (stem) { |
593 | case STEM_SCIENTIFIC: |
594 | case STEM_ENGINEERING: |
595 | return STATE_SCIENTIFIC; // allows for scientific options |
596 | default: |
597 | return STATE_NULL; |
598 | } |
599 | |
600 | case STEM_BASE_UNIT: |
601 | case STEM_PERCENT: |
602 | case STEM_PERMILLE: |
603 | CHECK_NULL(seen, unit, status); |
604 | macros.unit = stem_to_object::unit(stem); |
605 | return STATE_NULL; |
606 | |
607 | case STEM_PRECISION_INTEGER: |
608 | case STEM_PRECISION_UNLIMITED: |
609 | case STEM_PRECISION_CURRENCY_STANDARD: |
610 | case STEM_PRECISION_CURRENCY_CASH: |
611 | CHECK_NULL(seen, precision, status); |
612 | macros.precision = stem_to_object::precision(stem); |
613 | switch (stem) { |
614 | case STEM_PRECISION_INTEGER: |
615 | return STATE_FRACTION_PRECISION; // allows for "precision-integer/@##" |
616 | default: |
617 | return STATE_NULL; |
618 | } |
619 | |
620 | case STEM_ROUNDING_MODE_CEILING: |
621 | case STEM_ROUNDING_MODE_FLOOR: |
622 | case STEM_ROUNDING_MODE_DOWN: |
623 | case STEM_ROUNDING_MODE_UP: |
624 | case STEM_ROUNDING_MODE_HALF_EVEN: |
625 | case STEM_ROUNDING_MODE_HALF_DOWN: |
626 | case STEM_ROUNDING_MODE_HALF_UP: |
627 | case STEM_ROUNDING_MODE_UNNECESSARY: |
628 | CHECK_NULL(seen, roundingMode, status); |
629 | macros.roundingMode = stem_to_object::roundingMode(stem); |
630 | return STATE_NULL; |
631 | |
632 | case STEM_GROUP_OFF: |
633 | case STEM_GROUP_MIN2: |
634 | case STEM_GROUP_AUTO: |
635 | case STEM_GROUP_ON_ALIGNED: |
636 | case STEM_GROUP_THOUSANDS: |
637 | CHECK_NULL(seen, grouper, status); |
638 | macros.grouper = Grouper::forStrategy(stem_to_object::groupingStrategy(stem)); |
639 | return STATE_NULL; |
640 | |
641 | case STEM_LATIN: |
642 | CHECK_NULL(seen, symbols, status); |
643 | macros.symbols.setTo(NumberingSystem::createInstanceByName("latn" , status)); |
644 | return STATE_NULL; |
645 | |
646 | case STEM_UNIT_WIDTH_NARROW: |
647 | case STEM_UNIT_WIDTH_SHORT: |
648 | case STEM_UNIT_WIDTH_FULL_NAME: |
649 | case STEM_UNIT_WIDTH_ISO_CODE: |
650 | case STEM_UNIT_WIDTH_HIDDEN: |
651 | CHECK_NULL(seen, unitWidth, status); |
652 | macros.unitWidth = stem_to_object::unitWidth(stem); |
653 | return STATE_NULL; |
654 | |
655 | case STEM_SIGN_AUTO: |
656 | case STEM_SIGN_ALWAYS: |
657 | case STEM_SIGN_NEVER: |
658 | case STEM_SIGN_ACCOUNTING: |
659 | case STEM_SIGN_ACCOUNTING_ALWAYS: |
660 | case STEM_SIGN_EXCEPT_ZERO: |
661 | case STEM_SIGN_ACCOUNTING_EXCEPT_ZERO: |
662 | CHECK_NULL(seen, sign, status); |
663 | macros.sign = stem_to_object::signDisplay(stem); |
664 | return STATE_NULL; |
665 | |
666 | case STEM_DECIMAL_AUTO: |
667 | case STEM_DECIMAL_ALWAYS: |
668 | CHECK_NULL(seen, decimal, status); |
669 | macros.decimal = stem_to_object::decimalSeparatorDisplay(stem); |
670 | return STATE_NULL; |
671 | |
672 | // Stems requiring an option: |
673 | |
674 | case STEM_PRECISION_INCREMENT: |
675 | CHECK_NULL(seen, precision, status); |
676 | return STATE_INCREMENT_PRECISION; |
677 | |
678 | case STEM_MEASURE_UNIT: |
679 | CHECK_NULL(seen, unit, status); |
680 | return STATE_MEASURE_UNIT; |
681 | |
682 | case STEM_PER_MEASURE_UNIT: |
683 | CHECK_NULL(seen, perUnit, status); |
684 | return STATE_PER_MEASURE_UNIT; |
685 | |
686 | case STEM_CURRENCY: |
687 | CHECK_NULL(seen, unit, status); |
688 | return STATE_CURRENCY_UNIT; |
689 | |
690 | case STEM_INTEGER_WIDTH: |
691 | CHECK_NULL(seen, integerWidth, status); |
692 | return STATE_INTEGER_WIDTH; |
693 | |
694 | case STEM_NUMBERING_SYSTEM: |
695 | CHECK_NULL(seen, symbols, status); |
696 | return STATE_NUMBERING_SYSTEM; |
697 | |
698 | case STEM_SCALE: |
699 | CHECK_NULL(seen, scale, status); |
700 | return STATE_SCALE; |
701 | |
702 | default: |
703 | UPRV_UNREACHABLE; |
704 | } |
705 | } |
706 | |
707 | ParseState skeleton::parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, |
708 | UErrorCode& status) { |
709 | |
710 | ///// Required options: ///// |
711 | |
712 | switch (stem) { |
713 | case STATE_CURRENCY_UNIT: |
714 | blueprint_helpers::parseCurrencyOption(segment, macros, status); |
715 | return STATE_NULL; |
716 | case STATE_MEASURE_UNIT: |
717 | blueprint_helpers::parseMeasureUnitOption(segment, macros, status); |
718 | return STATE_NULL; |
719 | case STATE_PER_MEASURE_UNIT: |
720 | blueprint_helpers::parseMeasurePerUnitOption(segment, macros, status); |
721 | return STATE_NULL; |
722 | case STATE_INCREMENT_PRECISION: |
723 | blueprint_helpers::parseIncrementOption(segment, macros, status); |
724 | return STATE_NULL; |
725 | case STATE_INTEGER_WIDTH: |
726 | blueprint_helpers::parseIntegerWidthOption(segment, macros, status); |
727 | return STATE_NULL; |
728 | case STATE_NUMBERING_SYSTEM: |
729 | blueprint_helpers::parseNumberingSystemOption(segment, macros, status); |
730 | return STATE_NULL; |
731 | case STATE_SCALE: |
732 | blueprint_helpers::parseScaleOption(segment, macros, status); |
733 | return STATE_NULL; |
734 | default: |
735 | break; |
736 | } |
737 | |
738 | ///// Non-required options: ///// |
739 | |
740 | // Scientific options |
741 | switch (stem) { |
742 | case STATE_SCIENTIFIC: |
743 | if (blueprint_helpers::parseExponentWidthOption(segment, macros, status)) { |
744 | return STATE_SCIENTIFIC; |
745 | } |
746 | if (U_FAILURE(status)) { |
747 | return {}; |
748 | } |
749 | if (blueprint_helpers::parseExponentSignOption(segment, macros, status)) { |
750 | return STATE_SCIENTIFIC; |
751 | } |
752 | if (U_FAILURE(status)) { |
753 | return {}; |
754 | } |
755 | break; |
756 | default: |
757 | break; |
758 | } |
759 | |
760 | // Frac-sig option |
761 | switch (stem) { |
762 | case STATE_FRACTION_PRECISION: |
763 | if (blueprint_helpers::parseFracSigOption(segment, macros, status)) { |
764 | return STATE_NULL; |
765 | } |
766 | if (U_FAILURE(status)) { |
767 | return {}; |
768 | } |
769 | break; |
770 | default: |
771 | break; |
772 | } |
773 | |
774 | // Unknown option |
775 | // throw new SkeletonSyntaxException("Invalid option", segment); |
776 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
777 | return STATE_NULL; |
778 | } |
779 | |
780 | void GeneratorHelpers::generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { |
781 | if (U_FAILURE(status)) { return; } |
782 | |
783 | // Supported options |
784 | if (GeneratorHelpers::notation(macros, sb, status)) { |
785 | sb.append(u' '); |
786 | } |
787 | if (U_FAILURE(status)) { return; } |
788 | if (GeneratorHelpers::unit(macros, sb, status)) { |
789 | sb.append(u' '); |
790 | } |
791 | if (U_FAILURE(status)) { return; } |
792 | if (GeneratorHelpers::perUnit(macros, sb, status)) { |
793 | sb.append(u' '); |
794 | } |
795 | if (U_FAILURE(status)) { return; } |
796 | if (GeneratorHelpers::precision(macros, sb, status)) { |
797 | sb.append(u' '); |
798 | } |
799 | if (U_FAILURE(status)) { return; } |
800 | if (GeneratorHelpers::roundingMode(macros, sb, status)) { |
801 | sb.append(u' '); |
802 | } |
803 | if (U_FAILURE(status)) { return; } |
804 | if (GeneratorHelpers::grouping(macros, sb, status)) { |
805 | sb.append(u' '); |
806 | } |
807 | if (U_FAILURE(status)) { return; } |
808 | if (GeneratorHelpers::integerWidth(macros, sb, status)) { |
809 | sb.append(u' '); |
810 | } |
811 | if (U_FAILURE(status)) { return; } |
812 | if (GeneratorHelpers::symbols(macros, sb, status)) { |
813 | sb.append(u' '); |
814 | } |
815 | if (U_FAILURE(status)) { return; } |
816 | if (GeneratorHelpers::unitWidth(macros, sb, status)) { |
817 | sb.append(u' '); |
818 | } |
819 | if (U_FAILURE(status)) { return; } |
820 | if (GeneratorHelpers::sign(macros, sb, status)) { |
821 | sb.append(u' '); |
822 | } |
823 | if (U_FAILURE(status)) { return; } |
824 | if (GeneratorHelpers::decimal(macros, sb, status)) { |
825 | sb.append(u' '); |
826 | } |
827 | if (U_FAILURE(status)) { return; } |
828 | if (GeneratorHelpers::scale(macros, sb, status)) { |
829 | sb.append(u' '); |
830 | } |
831 | if (U_FAILURE(status)) { return; } |
832 | |
833 | // Unsupported options |
834 | if (!macros.padder.isBogus()) { |
835 | status = U_UNSUPPORTED_ERROR; |
836 | return; |
837 | } |
838 | if (macros.affixProvider != nullptr) { |
839 | status = U_UNSUPPORTED_ERROR; |
840 | return; |
841 | } |
842 | if (macros.rules != nullptr) { |
843 | status = U_UNSUPPORTED_ERROR; |
844 | return; |
845 | } |
846 | if (macros.currencySymbols != nullptr) { |
847 | status = U_UNSUPPORTED_ERROR; |
848 | return; |
849 | } |
850 | |
851 | // Remove the trailing space |
852 | if (sb.length() > 0) { |
853 | sb.truncate(sb.length() - 1); |
854 | } |
855 | } |
856 | |
857 | |
858 | bool blueprint_helpers::parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, |
859 | UErrorCode&) { |
860 | if (segment.charAt(0) != u'+') { |
861 | return false; |
862 | } |
863 | int32_t offset = 1; |
864 | int32_t minExp = 0; |
865 | for (; offset < segment.length(); offset++) { |
866 | if (segment.charAt(offset) == u'e') { |
867 | minExp++; |
868 | } else { |
869 | break; |
870 | } |
871 | } |
872 | if (offset < segment.length()) { |
873 | return false; |
874 | } |
875 | // Use the public APIs to enforce bounds checking |
876 | macros.notation = static_cast<ScientificNotation&>(macros.notation).withMinExponentDigits(minExp); |
877 | return true; |
878 | } |
879 | |
880 | void |
881 | blueprint_helpers::generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode&) { |
882 | sb.append(u'+'); |
883 | appendMultiple(sb, u'e', minExponentDigits); |
884 | } |
885 | |
886 | bool |
887 | blueprint_helpers::parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode&) { |
888 | // Get the sign display type out of the CharsTrie data structure. |
889 | UCharsTrie tempStemTrie(kSerializedStemTrie); |
890 | UStringTrieResult result = tempStemTrie.next( |
891 | segment.toTempUnicodeString().getBuffer(), |
892 | segment.length()); |
893 | if (result != USTRINGTRIE_INTERMEDIATE_VALUE && result != USTRINGTRIE_FINAL_VALUE) { |
894 | return false; |
895 | } |
896 | auto sign = stem_to_object::signDisplay(static_cast<StemEnum>(tempStemTrie.getValue())); |
897 | if (sign == UNUM_SIGN_COUNT) { |
898 | return false; |
899 | } |
900 | macros.notation = static_cast<ScientificNotation&>(macros.notation).withExponentSignDisplay(sign); |
901 | return true; |
902 | } |
903 | |
904 | void blueprint_helpers::parseCurrencyOption(const StringSegment& segment, MacroProps& macros, |
905 | UErrorCode& status) { |
906 | // Unlike ICU4J, have to check length manually because ICU4C CurrencyUnit does not check it for us |
907 | if (segment.length() != 3) { |
908 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
909 | return; |
910 | } |
911 | const UChar* currencyCode = segment.toTempUnicodeString().getBuffer(); |
912 | UErrorCode localStatus = U_ZERO_ERROR; |
913 | CurrencyUnit currency(currencyCode, localStatus); |
914 | if (U_FAILURE(localStatus)) { |
915 | // Not 3 ascii chars |
916 | // throw new SkeletonSyntaxException("Invalid currency", segment); |
917 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
918 | return; |
919 | } |
920 | // Slicing is OK |
921 | macros.unit = currency; // NOLINT |
922 | } |
923 | |
924 | void |
925 | blueprint_helpers::generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode&) { |
926 | sb.append(currency.getISOCurrency(), -1); |
927 | } |
928 | |
929 | void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, |
930 | UErrorCode& status) { |
931 | const UnicodeString stemString = segment.toTempUnicodeString(); |
932 | |
933 | // NOTE: The category (type) of the unit is guaranteed to be a valid subtag (alphanumeric) |
934 | // http://unicode.org/reports/tr35/#Validity_Data |
935 | int firstHyphen = 0; |
936 | while (firstHyphen < stemString.length() && stemString.charAt(firstHyphen) != '-') { |
937 | firstHyphen++; |
938 | } |
939 | if (firstHyphen == stemString.length()) { |
940 | // throw new SkeletonSyntaxException("Invalid measure unit option", segment); |
941 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
942 | return; |
943 | } |
944 | |
945 | // Need to do char <-> UChar conversion... |
946 | U_ASSERT(U_SUCCESS(status)); |
947 | CharString type; |
948 | SKELETON_UCHAR_TO_CHAR(type, stemString, 0, firstHyphen, status); |
949 | CharString subType; |
950 | SKELETON_UCHAR_TO_CHAR(subType, stemString, firstHyphen + 1, stemString.length(), status); |
951 | |
952 | // Note: the largest type as of this writing (March 2018) is "volume", which has 24 units. |
953 | static constexpr int32_t CAPACITY = 30; |
954 | MeasureUnit units[CAPACITY]; |
955 | UErrorCode localStatus = U_ZERO_ERROR; |
956 | int32_t numUnits = MeasureUnit::getAvailable(type.data(), units, CAPACITY, localStatus); |
957 | if (U_FAILURE(localStatus)) { |
958 | // More than 30 units in this type? |
959 | status = U_INTERNAL_PROGRAM_ERROR; |
960 | return; |
961 | } |
962 | for (int32_t i = 0; i < numUnits; i++) { |
963 | auto& unit = units[i]; |
964 | if (uprv_strcmp(subType.data(), unit.getSubtype()) == 0) { |
965 | macros.unit = unit; |
966 | return; |
967 | } |
968 | } |
969 | |
970 | // throw new SkeletonSyntaxException("Unknown measure unit", segment); |
971 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
972 | } |
973 | |
974 | void blueprint_helpers::generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, |
975 | UErrorCode&) { |
976 | // Need to do char <-> UChar conversion... |
977 | sb.append(UnicodeString(measureUnit.getType(), -1, US_INV)); |
978 | sb.append(u'-'); |
979 | sb.append(UnicodeString(measureUnit.getSubtype(), -1, US_INV)); |
980 | } |
981 | |
982 | void blueprint_helpers::parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, |
983 | UErrorCode& status) { |
984 | // A little bit of a hack: safe the current unit (numerator), call the main measure unit |
985 | // parsing code, put back the numerator unit, and put the new unit into per-unit. |
986 | MeasureUnit numerator = macros.unit; |
987 | parseMeasureUnitOption(segment, macros, status); |
988 | if (U_FAILURE(status)) { return; } |
989 | macros.perUnit = macros.unit; |
990 | macros.unit = numerator; |
991 | } |
992 | |
993 | void blueprint_helpers::parseFractionStem(const StringSegment& segment, MacroProps& macros, |
994 | UErrorCode& status) { |
995 | U_ASSERT(segment.charAt(0) == u'.'); |
996 | int32_t offset = 1; |
997 | int32_t minFrac = 0; |
998 | int32_t maxFrac; |
999 | for (; offset < segment.length(); offset++) { |
1000 | if (segment.charAt(offset) == u'0') { |
1001 | minFrac++; |
1002 | } else { |
1003 | break; |
1004 | } |
1005 | } |
1006 | if (offset < segment.length()) { |
1007 | if (segment.charAt(offset) == u'+') { |
1008 | maxFrac = -1; |
1009 | offset++; |
1010 | } else { |
1011 | maxFrac = minFrac; |
1012 | for (; offset < segment.length(); offset++) { |
1013 | if (segment.charAt(offset) == u'#') { |
1014 | maxFrac++; |
1015 | } else { |
1016 | break; |
1017 | } |
1018 | } |
1019 | } |
1020 | } else { |
1021 | maxFrac = minFrac; |
1022 | } |
1023 | if (offset < segment.length()) { |
1024 | // throw new SkeletonSyntaxException("Invalid fraction stem", segment); |
1025 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
1026 | return; |
1027 | } |
1028 | // Use the public APIs to enforce bounds checking |
1029 | if (maxFrac == -1) { |
1030 | macros.precision = Precision::minFraction(minFrac); |
1031 | } else { |
1032 | macros.precision = Precision::minMaxFraction(minFrac, maxFrac); |
1033 | } |
1034 | } |
1035 | |
1036 | void |
1037 | blueprint_helpers::generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode&) { |
1038 | if (minFrac == 0 && maxFrac == 0) { |
1039 | sb.append(u"precision-integer" , -1); |
1040 | return; |
1041 | } |
1042 | sb.append(u'.'); |
1043 | appendMultiple(sb, u'0', minFrac); |
1044 | if (maxFrac == -1) { |
1045 | sb.append(u'+'); |
1046 | } else { |
1047 | appendMultiple(sb, u'#', maxFrac - minFrac); |
1048 | } |
1049 | } |
1050 | |
1051 | void |
1052 | blueprint_helpers::parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status) { |
1053 | U_ASSERT(segment.charAt(0) == u'@'); |
1054 | int offset = 0; |
1055 | int minSig = 0; |
1056 | int maxSig; |
1057 | for (; offset < segment.length(); offset++) { |
1058 | if (segment.charAt(offset) == u'@') { |
1059 | minSig++; |
1060 | } else { |
1061 | break; |
1062 | } |
1063 | } |
1064 | if (offset < segment.length()) { |
1065 | if (segment.charAt(offset) == u'+') { |
1066 | maxSig = -1; |
1067 | offset++; |
1068 | } else { |
1069 | maxSig = minSig; |
1070 | for (; offset < segment.length(); offset++) { |
1071 | if (segment.charAt(offset) == u'#') { |
1072 | maxSig++; |
1073 | } else { |
1074 | break; |
1075 | } |
1076 | } |
1077 | } |
1078 | } else { |
1079 | maxSig = minSig; |
1080 | } |
1081 | if (offset < segment.length()) { |
1082 | // throw new SkeletonSyntaxException("Invalid significant digits stem", segment); |
1083 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
1084 | return; |
1085 | } |
1086 | // Use the public APIs to enforce bounds checking |
1087 | if (maxSig == -1) { |
1088 | macros.precision = Precision::minSignificantDigits(minSig); |
1089 | } else { |
1090 | macros.precision = Precision::minMaxSignificantDigits(minSig, maxSig); |
1091 | } |
1092 | } |
1093 | |
1094 | void |
1095 | blueprint_helpers::generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode&) { |
1096 | appendMultiple(sb, u'@', minSig); |
1097 | if (maxSig == -1) { |
1098 | sb.append(u'+'); |
1099 | } else { |
1100 | appendMultiple(sb, u'#', maxSig - minSig); |
1101 | } |
1102 | } |
1103 | |
1104 | bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroProps& macros, |
1105 | UErrorCode& status) { |
1106 | if (segment.charAt(0) != u'@') { |
1107 | return false; |
1108 | } |
1109 | int offset = 0; |
1110 | int minSig = 0; |
1111 | int maxSig; |
1112 | for (; offset < segment.length(); offset++) { |
1113 | if (segment.charAt(offset) == u'@') { |
1114 | minSig++; |
1115 | } else { |
1116 | break; |
1117 | } |
1118 | } |
1119 | // For the frac-sig option, there must be minSig or maxSig but not both. |
1120 | // Valid: @+, @@+, @@@+ |
1121 | // Valid: @#, @##, @### |
1122 | // Invalid: @, @@, @@@ |
1123 | // Invalid: @@#, @@##, @@@# |
1124 | if (offset < segment.length()) { |
1125 | if (segment.charAt(offset) == u'+') { |
1126 | maxSig = -1; |
1127 | offset++; |
1128 | } else if (minSig > 1) { |
1129 | // @@#, @@##, @@@# |
1130 | // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment); |
1131 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
1132 | return false; |
1133 | } else { |
1134 | maxSig = minSig; |
1135 | for (; offset < segment.length(); offset++) { |
1136 | if (segment.charAt(offset) == u'#') { |
1137 | maxSig++; |
1138 | } else { |
1139 | break; |
1140 | } |
1141 | } |
1142 | } |
1143 | } else { |
1144 | // @, @@, @@@ |
1145 | // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment); |
1146 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
1147 | return false; |
1148 | } |
1149 | if (offset < segment.length()) { |
1150 | // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment); |
1151 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
1152 | return false; |
1153 | } |
1154 | |
1155 | auto& oldPrecision = static_cast<const FractionPrecision&>(macros.precision); |
1156 | if (maxSig == -1) { |
1157 | macros.precision = oldPrecision.withMinDigits(minSig); |
1158 | } else { |
1159 | macros.precision = oldPrecision.withMaxDigits(maxSig); |
1160 | } |
1161 | return true; |
1162 | } |
1163 | |
1164 | void blueprint_helpers::parseIncrementOption(const StringSegment& segment, MacroProps& macros, |
1165 | UErrorCode& status) { |
1166 | // Need to do char <-> UChar conversion... |
1167 | U_ASSERT(U_SUCCESS(status)); |
1168 | CharString buffer; |
1169 | SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); |
1170 | |
1171 | // Utilize DecimalQuantity/decNumber to parse this for us. |
1172 | DecimalQuantity dq; |
1173 | UErrorCode localStatus = U_ZERO_ERROR; |
1174 | dq.setToDecNumber({buffer.data(), buffer.length()}, localStatus); |
1175 | if (U_FAILURE(localStatus)) { |
1176 | // throw new SkeletonSyntaxException("Invalid rounding increment", segment, e); |
1177 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
1178 | return; |
1179 | } |
1180 | double increment = dq.toDouble(); |
1181 | |
1182 | // We also need to figure out how many digits. Do a brute force string operation. |
1183 | int decimalOffset = 0; |
1184 | while (decimalOffset < segment.length() && segment.charAt(decimalOffset) != '.') { |
1185 | decimalOffset++; |
1186 | } |
1187 | if (decimalOffset == segment.length()) { |
1188 | macros.precision = Precision::increment(increment); |
1189 | } else { |
1190 | int32_t fractionLength = segment.length() - decimalOffset - 1; |
1191 | macros.precision = Precision::increment(increment).withMinFraction(fractionLength); |
1192 | } |
1193 | } |
1194 | |
1195 | void blueprint_helpers::generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, |
1196 | UErrorCode&) { |
1197 | // Utilize DecimalQuantity/double_conversion to format this for us. |
1198 | DecimalQuantity dq; |
1199 | dq.setToDouble(increment); |
1200 | dq.roundToInfinity(); |
1201 | sb.append(dq.toPlainString()); |
1202 | |
1203 | // We might need to append extra trailing zeros for min fraction... |
1204 | if (trailingZeros > 0) { |
1205 | appendMultiple(sb, u'0', trailingZeros); |
1206 | } |
1207 | } |
1208 | |
1209 | void blueprint_helpers::parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, |
1210 | UErrorCode& status) { |
1211 | int32_t offset = 0; |
1212 | int32_t minInt = 0; |
1213 | int32_t maxInt; |
1214 | if (segment.charAt(0) == u'+') { |
1215 | maxInt = -1; |
1216 | offset++; |
1217 | } else { |
1218 | maxInt = 0; |
1219 | } |
1220 | for (; offset < segment.length(); offset++) { |
1221 | if (maxInt != -1 && segment.charAt(offset) == u'#') { |
1222 | maxInt++; |
1223 | } else { |
1224 | break; |
1225 | } |
1226 | } |
1227 | if (offset < segment.length()) { |
1228 | for (; offset < segment.length(); offset++) { |
1229 | if (segment.charAt(offset) == u'0') { |
1230 | minInt++; |
1231 | } else { |
1232 | break; |
1233 | } |
1234 | } |
1235 | } |
1236 | if (maxInt != -1) { |
1237 | maxInt += minInt; |
1238 | } |
1239 | if (offset < segment.length()) { |
1240 | // throw new SkeletonSyntaxException("Invalid integer width stem", segment); |
1241 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
1242 | return; |
1243 | } |
1244 | // Use the public APIs to enforce bounds checking |
1245 | if (maxInt == -1) { |
1246 | macros.integerWidth = IntegerWidth::zeroFillTo(minInt); |
1247 | } else { |
1248 | macros.integerWidth = IntegerWidth::zeroFillTo(minInt).truncateAt(maxInt); |
1249 | } |
1250 | } |
1251 | |
1252 | void blueprint_helpers::generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, |
1253 | UErrorCode&) { |
1254 | if (maxInt == -1) { |
1255 | sb.append(u'+'); |
1256 | } else { |
1257 | appendMultiple(sb, u'#', maxInt - minInt); |
1258 | } |
1259 | appendMultiple(sb, u'0', minInt); |
1260 | } |
1261 | |
1262 | void blueprint_helpers::parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, |
1263 | UErrorCode& status) { |
1264 | // Need to do char <-> UChar conversion... |
1265 | U_ASSERT(U_SUCCESS(status)); |
1266 | CharString buffer; |
1267 | SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); |
1268 | |
1269 | NumberingSystem* ns = NumberingSystem::createInstanceByName(buffer.data(), status); |
1270 | if (ns == nullptr || U_FAILURE(status)) { |
1271 | // This is a skeleton syntax error; don't bubble up the low-level NumberingSystem error |
1272 | // throw new SkeletonSyntaxException("Unknown numbering system", segment); |
1273 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
1274 | return; |
1275 | } |
1276 | macros.symbols.setTo(ns); |
1277 | } |
1278 | |
1279 | void blueprint_helpers::generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, |
1280 | UErrorCode&) { |
1281 | // Need to do char <-> UChar conversion... |
1282 | sb.append(UnicodeString(ns.getName(), -1, US_INV)); |
1283 | } |
1284 | |
1285 | void blueprint_helpers::parseScaleOption(const StringSegment& segment, MacroProps& macros, |
1286 | UErrorCode& status) { |
1287 | // Need to do char <-> UChar conversion... |
1288 | U_ASSERT(U_SUCCESS(status)); |
1289 | CharString buffer; |
1290 | SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); |
1291 | |
1292 | LocalPointer<DecNum> decnum(new DecNum(), status); |
1293 | if (U_FAILURE(status)) { return; } |
1294 | decnum->setTo({buffer.data(), buffer.length()}, status); |
1295 | if (U_FAILURE(status)) { |
1296 | // This is a skeleton syntax error; don't let the low-level decnum error bubble up |
1297 | status = U_NUMBER_SKELETON_SYNTAX_ERROR; |
1298 | return; |
1299 | } |
1300 | |
1301 | // NOTE: The constructor will optimize the decnum for us if possible. |
1302 | macros.scale = {0, decnum.orphan()}; |
1303 | } |
1304 | |
1305 | void blueprint_helpers::generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb, |
1306 | UErrorCode& status) { |
1307 | // Utilize DecimalQuantity/double_conversion to format this for us. |
1308 | DecimalQuantity dq; |
1309 | if (arbitrary != nullptr) { |
1310 | dq.setToDecNum(*arbitrary, status); |
1311 | if (U_FAILURE(status)) { return; } |
1312 | } else { |
1313 | dq.setToInt(1); |
1314 | } |
1315 | dq.adjustMagnitude(magnitude); |
1316 | dq.roundToInfinity(); |
1317 | sb.append(dq.toPlainString()); |
1318 | } |
1319 | |
1320 | |
1321 | bool GeneratorHelpers::notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { |
1322 | if (macros.notation.fType == Notation::NTN_COMPACT) { |
1323 | UNumberCompactStyle style = macros.notation.fUnion.compactStyle; |
1324 | if (style == UNumberCompactStyle::UNUM_LONG) { |
1325 | sb.append(u"compact-long" , -1); |
1326 | return true; |
1327 | } else if (style == UNumberCompactStyle::UNUM_SHORT) { |
1328 | sb.append(u"compact-short" , -1); |
1329 | return true; |
1330 | } else { |
1331 | // Compact notation generated from custom data (not supported in skeleton) |
1332 | // The other compact notations are literals |
1333 | status = U_UNSUPPORTED_ERROR; |
1334 | return false; |
1335 | } |
1336 | } else if (macros.notation.fType == Notation::NTN_SCIENTIFIC) { |
1337 | const Notation::ScientificSettings& impl = macros.notation.fUnion.scientific; |
1338 | if (impl.fEngineeringInterval == 3) { |
1339 | sb.append(u"engineering" , -1); |
1340 | } else { |
1341 | sb.append(u"scientific" , -1); |
1342 | } |
1343 | if (impl.fMinExponentDigits > 1) { |
1344 | sb.append(u'/'); |
1345 | blueprint_helpers::generateExponentWidthOption(impl.fMinExponentDigits, sb, status); |
1346 | if (U_FAILURE(status)) { |
1347 | return false; |
1348 | } |
1349 | } |
1350 | if (impl.fExponentSignDisplay != UNUM_SIGN_AUTO) { |
1351 | sb.append(u'/'); |
1352 | enum_to_stem_string::signDisplay(impl.fExponentSignDisplay, sb); |
1353 | } |
1354 | return true; |
1355 | } else { |
1356 | // Default value is not shown in normalized form |
1357 | return false; |
1358 | } |
1359 | } |
1360 | |
1361 | bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { |
1362 | if (utils::unitIsCurrency(macros.unit)) { |
1363 | sb.append(u"currency/" , -1); |
1364 | CurrencyUnit currency(macros.unit, status); |
1365 | if (U_FAILURE(status)) { |
1366 | return false; |
1367 | } |
1368 | blueprint_helpers::generateCurrencyOption(currency, sb, status); |
1369 | return true; |
1370 | } else if (utils::unitIsNoUnit(macros.unit)) { |
1371 | if (utils::unitIsPercent(macros.unit)) { |
1372 | sb.append(u"percent" , -1); |
1373 | return true; |
1374 | } else if (utils::unitIsPermille(macros.unit)) { |
1375 | sb.append(u"permille" , -1); |
1376 | return true; |
1377 | } else { |
1378 | // Default value is not shown in normalized form |
1379 | return false; |
1380 | } |
1381 | } else { |
1382 | sb.append(u"measure-unit/" , -1); |
1383 | blueprint_helpers::generateMeasureUnitOption(macros.unit, sb, status); |
1384 | return true; |
1385 | } |
1386 | } |
1387 | |
1388 | bool GeneratorHelpers::perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { |
1389 | // Per-units are currently expected to be only MeasureUnits. |
1390 | if (utils::unitIsNoUnit(macros.perUnit)) { |
1391 | if (utils::unitIsPercent(macros.perUnit) || utils::unitIsPermille(macros.perUnit)) { |
1392 | status = U_UNSUPPORTED_ERROR; |
1393 | return false; |
1394 | } else { |
1395 | // Default value: ok to ignore |
1396 | return false; |
1397 | } |
1398 | } else if (utils::unitIsCurrency(macros.perUnit)) { |
1399 | status = U_UNSUPPORTED_ERROR; |
1400 | return false; |
1401 | } else { |
1402 | sb.append(u"per-measure-unit/" , -1); |
1403 | blueprint_helpers::generateMeasureUnitOption(macros.perUnit, sb, status); |
1404 | return true; |
1405 | } |
1406 | } |
1407 | |
1408 | bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { |
1409 | if (macros.precision.fType == Precision::RND_NONE) { |
1410 | sb.append(u"precision-unlimited" , -1); |
1411 | } else if (macros.precision.fType == Precision::RND_FRACTION) { |
1412 | const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig; |
1413 | blueprint_helpers::generateFractionStem(impl.fMinFrac, impl.fMaxFrac, sb, status); |
1414 | } else if (macros.precision.fType == Precision::RND_SIGNIFICANT) { |
1415 | const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig; |
1416 | blueprint_helpers::generateDigitsStem(impl.fMinSig, impl.fMaxSig, sb, status); |
1417 | } else if (macros.precision.fType == Precision::RND_FRACTION_SIGNIFICANT) { |
1418 | const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig; |
1419 | blueprint_helpers::generateFractionStem(impl.fMinFrac, impl.fMaxFrac, sb, status); |
1420 | sb.append(u'/'); |
1421 | if (impl.fMinSig == -1) { |
1422 | blueprint_helpers::generateDigitsStem(1, impl.fMaxSig, sb, status); |
1423 | } else { |
1424 | blueprint_helpers::generateDigitsStem(impl.fMinSig, -1, sb, status); |
1425 | } |
1426 | } else if (macros.precision.fType == Precision::RND_INCREMENT |
1427 | || macros.precision.fType == Precision::RND_INCREMENT_ONE |
1428 | || macros.precision.fType == Precision::RND_INCREMENT_FIVE) { |
1429 | const Precision::IncrementSettings& impl = macros.precision.fUnion.increment; |
1430 | sb.append(u"precision-increment/" , -1); |
1431 | blueprint_helpers::generateIncrementOption( |
1432 | impl.fIncrement, |
1433 | impl.fMinFrac - impl.fMaxFrac, |
1434 | sb, |
1435 | status); |
1436 | } else if (macros.precision.fType == Precision::RND_CURRENCY) { |
1437 | UCurrencyUsage usage = macros.precision.fUnion.currencyUsage; |
1438 | if (usage == UCURR_USAGE_STANDARD) { |
1439 | sb.append(u"precision-currency-standard" , -1); |
1440 | } else { |
1441 | sb.append(u"precision-currency-cash" , -1); |
1442 | } |
1443 | } else { |
1444 | // Bogus or Error |
1445 | return false; |
1446 | } |
1447 | |
1448 | // NOTE: Always return true for rounding because the default value depends on other options. |
1449 | return true; |
1450 | } |
1451 | |
1452 | bool GeneratorHelpers::roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode&) { |
1453 | if (macros.roundingMode == kDefaultMode) { |
1454 | return false; // Default |
1455 | } |
1456 | enum_to_stem_string::roundingMode(macros.roundingMode, sb); |
1457 | return true; |
1458 | } |
1459 | |
1460 | bool GeneratorHelpers::grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { |
1461 | if (macros.grouper.isBogus()) { |
1462 | return false; // No value |
1463 | } else if (macros.grouper.fStrategy == UNUM_GROUPING_COUNT) { |
1464 | status = U_UNSUPPORTED_ERROR; |
1465 | return false; |
1466 | } else if (macros.grouper.fStrategy == UNUM_GROUPING_AUTO) { |
1467 | return false; // Default value |
1468 | } else { |
1469 | enum_to_stem_string::groupingStrategy(macros.grouper.fStrategy, sb); |
1470 | return true; |
1471 | } |
1472 | } |
1473 | |
1474 | bool GeneratorHelpers::integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { |
1475 | if (macros.integerWidth.fHasError || macros.integerWidth.isBogus() || |
1476 | macros.integerWidth == IntegerWidth::standard()) { |
1477 | // Error or Default |
1478 | return false; |
1479 | } |
1480 | sb.append(u"integer-width/" , -1); |
1481 | blueprint_helpers::generateIntegerWidthOption( |
1482 | macros.integerWidth.fUnion.minMaxInt.fMinInt, |
1483 | macros.integerWidth.fUnion.minMaxInt.fMaxInt, |
1484 | sb, |
1485 | status); |
1486 | return true; |
1487 | } |
1488 | |
1489 | bool GeneratorHelpers::symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { |
1490 | if (macros.symbols.isNumberingSystem()) { |
1491 | const NumberingSystem& ns = *macros.symbols.getNumberingSystem(); |
1492 | if (uprv_strcmp(ns.getName(), "latn" ) == 0) { |
1493 | sb.append(u"latin" , -1); |
1494 | } else { |
1495 | sb.append(u"numbering-system/" , -1); |
1496 | blueprint_helpers::generateNumberingSystemOption(ns, sb, status); |
1497 | } |
1498 | return true; |
1499 | } else if (macros.symbols.isDecimalFormatSymbols()) { |
1500 | status = U_UNSUPPORTED_ERROR; |
1501 | return false; |
1502 | } else { |
1503 | // No custom symbols |
1504 | return false; |
1505 | } |
1506 | } |
1507 | |
1508 | bool GeneratorHelpers::unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode&) { |
1509 | if (macros.unitWidth == UNUM_UNIT_WIDTH_SHORT || macros.unitWidth == UNUM_UNIT_WIDTH_COUNT) { |
1510 | return false; // Default or Bogus |
1511 | } |
1512 | enum_to_stem_string::unitWidth(macros.unitWidth, sb); |
1513 | return true; |
1514 | } |
1515 | |
1516 | bool GeneratorHelpers::sign(const MacroProps& macros, UnicodeString& sb, UErrorCode&) { |
1517 | if (macros.sign == UNUM_SIGN_AUTO || macros.sign == UNUM_SIGN_COUNT) { |
1518 | return false; // Default or Bogus |
1519 | } |
1520 | enum_to_stem_string::signDisplay(macros.sign, sb); |
1521 | return true; |
1522 | } |
1523 | |
1524 | bool GeneratorHelpers::decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode&) { |
1525 | if (macros.decimal == UNUM_DECIMAL_SEPARATOR_AUTO || macros.decimal == UNUM_DECIMAL_SEPARATOR_COUNT) { |
1526 | return false; // Default or Bogus |
1527 | } |
1528 | enum_to_stem_string::decimalSeparatorDisplay(macros.decimal, sb); |
1529 | return true; |
1530 | } |
1531 | |
1532 | bool GeneratorHelpers::scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { |
1533 | if (!macros.scale.isValid()) { |
1534 | return false; // Default or Bogus |
1535 | } |
1536 | sb.append(u"scale/" , -1); |
1537 | blueprint_helpers::generateScaleOption( |
1538 | macros.scale.fMagnitude, |
1539 | macros.scale.fArbitrary, |
1540 | sb, |
1541 | status); |
1542 | return true; |
1543 | } |
1544 | |
1545 | |
1546 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
1547 | |