1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | #include "unicode/utypes.h" |
5 | |
6 | #if !UCONFIG_NO_FORMATTING |
7 | |
8 | // Allow implicit conversion from char16_t* to UnicodeString for this file: |
9 | // Helpful in toString methods and elsewhere. |
10 | #define UNISTR_FROM_STRING_EXPLICIT |
11 | #define UNISTR_FROM_CHAR_EXPLICIT |
12 | |
13 | #include "uassert.h" |
14 | #include "number_patternstring.h" |
15 | #include "unicode/utf16.h" |
16 | #include "number_utils.h" |
17 | #include "number_roundingutils.h" |
18 | #include "number_mapper.h" |
19 | |
20 | using namespace icu; |
21 | using namespace icu::number; |
22 | using namespace icu::number::impl; |
23 | |
24 | |
25 | void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, |
26 | UErrorCode& status) { |
27 | patternInfo.consumePattern(patternString, status); |
28 | } |
29 | |
30 | DecimalFormatProperties |
31 | PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, |
32 | UErrorCode& status) { |
33 | DecimalFormatProperties properties; |
34 | parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); |
35 | return properties; |
36 | } |
37 | |
38 | DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern, |
39 | UErrorCode& status) { |
40 | return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status); |
41 | } |
42 | |
43 | void |
44 | PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties, |
45 | IgnoreRounding ignoreRounding, UErrorCode& status) { |
46 | parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); |
47 | } |
48 | |
49 | |
50 | char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const { |
51 | const Endpoints& endpoints = getEndpoints(flags); |
52 | if (index < 0 || index >= endpoints.end - endpoints.start) { |
53 | UPRV_UNREACHABLE; |
54 | } |
55 | return pattern.charAt(endpoints.start + index); |
56 | } |
57 | |
58 | int32_t ParsedPatternInfo::length(int32_t flags) const { |
59 | return getLengthFromEndpoints(getEndpoints(flags)); |
60 | } |
61 | |
62 | int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) { |
63 | return endpoints.end - endpoints.start; |
64 | } |
65 | |
66 | UnicodeString ParsedPatternInfo::getString(int32_t flags) const { |
67 | const Endpoints& endpoints = getEndpoints(flags); |
68 | if (endpoints.start == endpoints.end) { |
69 | return UnicodeString(); |
70 | } |
71 | // Create a new UnicodeString |
72 | return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start); |
73 | } |
74 | |
75 | const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const { |
76 | bool prefix = (flags & AFFIX_PREFIX) != 0; |
77 | bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0; |
78 | bool padding = (flags & AFFIX_PADDING) != 0; |
79 | if (isNegative && padding) { |
80 | return negative.paddingEndpoints; |
81 | } else if (padding) { |
82 | return positive.paddingEndpoints; |
83 | } else if (prefix && isNegative) { |
84 | return negative.prefixEndpoints; |
85 | } else if (prefix) { |
86 | return positive.prefixEndpoints; |
87 | } else if (isNegative) { |
88 | return negative.suffixEndpoints; |
89 | } else { |
90 | return positive.suffixEndpoints; |
91 | } |
92 | } |
93 | |
94 | bool ParsedPatternInfo::positiveHasPlusSign() const { |
95 | return positive.hasPlusSign; |
96 | } |
97 | |
98 | bool ParsedPatternInfo::hasNegativeSubpattern() const { |
99 | return fHasNegativeSubpattern; |
100 | } |
101 | |
102 | bool ParsedPatternInfo::negativeHasMinusSign() const { |
103 | return negative.hasMinusSign; |
104 | } |
105 | |
106 | bool ParsedPatternInfo::hasCurrencySign() const { |
107 | return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign); |
108 | } |
109 | |
110 | bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const { |
111 | return AffixUtils::containsType(pattern, type, status); |
112 | } |
113 | |
114 | bool ParsedPatternInfo::hasBody() const { |
115 | return positive.integerTotal > 0; |
116 | } |
117 | |
118 | ///////////////////////////////////////////////////// |
119 | /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION /// |
120 | ///////////////////////////////////////////////////// |
121 | |
122 | UChar32 ParsedPatternInfo::ParserState::peek() { |
123 | if (offset == pattern.length()) { |
124 | return -1; |
125 | } else { |
126 | return pattern.char32At(offset); |
127 | } |
128 | } |
129 | |
130 | UChar32 ParsedPatternInfo::ParserState::next() { |
131 | int codePoint = peek(); |
132 | offset += U16_LENGTH(codePoint); |
133 | return codePoint; |
134 | } |
135 | |
136 | void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) { |
137 | if (U_FAILURE(status)) { return; } |
138 | this->pattern = patternString; |
139 | |
140 | // This class is not intended for writing twice! |
141 | // Use move assignment to overwrite instead. |
142 | U_ASSERT(state.offset == 0); |
143 | |
144 | // pattern := subpattern (';' subpattern)? |
145 | currentSubpattern = &positive; |
146 | consumeSubpattern(status); |
147 | if (U_FAILURE(status)) { return; } |
148 | if (state.peek() == u';') { |
149 | state.next(); // consume the ';' |
150 | // Don't consume the negative subpattern if it is empty (trailing ';') |
151 | if (state.peek() != -1) { |
152 | fHasNegativeSubpattern = true; |
153 | currentSubpattern = &negative; |
154 | consumeSubpattern(status); |
155 | if (U_FAILURE(status)) { return; } |
156 | } |
157 | } |
158 | if (state.peek() != -1) { |
159 | state.toParseException(u"Found unquoted special character" ); |
160 | status = U_UNQUOTED_SPECIAL; |
161 | } |
162 | } |
163 | |
164 | void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) { |
165 | // subpattern := literals? number exponent? literals? |
166 | consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status); |
167 | if (U_FAILURE(status)) { return; } |
168 | consumeAffix(currentSubpattern->prefixEndpoints, status); |
169 | if (U_FAILURE(status)) { return; } |
170 | consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status); |
171 | if (U_FAILURE(status)) { return; } |
172 | consumeFormat(status); |
173 | if (U_FAILURE(status)) { return; } |
174 | consumeExponent(status); |
175 | if (U_FAILURE(status)) { return; } |
176 | consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status); |
177 | if (U_FAILURE(status)) { return; } |
178 | consumeAffix(currentSubpattern->suffixEndpoints, status); |
179 | if (U_FAILURE(status)) { return; } |
180 | consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status); |
181 | if (U_FAILURE(status)) { return; } |
182 | } |
183 | |
184 | void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) { |
185 | if (state.peek() != u'*') { |
186 | return; |
187 | } |
188 | if (currentSubpattern->hasPadding) { |
189 | state.toParseException(u"Cannot have multiple pad specifiers" ); |
190 | status = U_MULTIPLE_PAD_SPECIFIERS; |
191 | return; |
192 | } |
193 | currentSubpattern->paddingLocation = paddingLocation; |
194 | currentSubpattern->hasPadding = true; |
195 | state.next(); // consume the '*' |
196 | currentSubpattern->paddingEndpoints.start = state.offset; |
197 | consumeLiteral(status); |
198 | currentSubpattern->paddingEndpoints.end = state.offset; |
199 | } |
200 | |
201 | void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) { |
202 | // literals := { literal } |
203 | endpoints.start = state.offset; |
204 | while (true) { |
205 | switch (state.peek()) { |
206 | case u'#': |
207 | case u'@': |
208 | case u';': |
209 | case u'*': |
210 | case u'.': |
211 | case u',': |
212 | case u'0': |
213 | case u'1': |
214 | case u'2': |
215 | case u'3': |
216 | case u'4': |
217 | case u'5': |
218 | case u'6': |
219 | case u'7': |
220 | case u'8': |
221 | case u'9': |
222 | case -1: |
223 | // Characters that cannot appear unquoted in a literal |
224 | // break outer; |
225 | goto after_outer; |
226 | |
227 | case u'%': |
228 | currentSubpattern->hasPercentSign = true; |
229 | break; |
230 | |
231 | case u'‰': |
232 | currentSubpattern->hasPerMilleSign = true; |
233 | break; |
234 | |
235 | case u'¤': |
236 | currentSubpattern->hasCurrencySign = true; |
237 | break; |
238 | |
239 | case u'-': |
240 | currentSubpattern->hasMinusSign = true; |
241 | break; |
242 | |
243 | case u'+': |
244 | currentSubpattern->hasPlusSign = true; |
245 | break; |
246 | |
247 | default: |
248 | break; |
249 | } |
250 | consumeLiteral(status); |
251 | if (U_FAILURE(status)) { return; } |
252 | } |
253 | after_outer: |
254 | endpoints.end = state.offset; |
255 | } |
256 | |
257 | void ParsedPatternInfo::consumeLiteral(UErrorCode& status) { |
258 | if (state.peek() == -1) { |
259 | state.toParseException(u"Expected unquoted literal but found EOL" ); |
260 | status = U_PATTERN_SYNTAX_ERROR; |
261 | return; |
262 | } else if (state.peek() == u'\'') { |
263 | state.next(); // consume the starting quote |
264 | while (state.peek() != u'\'') { |
265 | if (state.peek() == -1) { |
266 | state.toParseException(u"Expected quoted literal but found EOL" ); |
267 | status = U_PATTERN_SYNTAX_ERROR; |
268 | return; |
269 | } else { |
270 | state.next(); // consume a quoted character |
271 | } |
272 | } |
273 | state.next(); // consume the ending quote |
274 | } else { |
275 | // consume a non-quoted literal character |
276 | state.next(); |
277 | } |
278 | } |
279 | |
280 | void ParsedPatternInfo::consumeFormat(UErrorCode& status) { |
281 | consumeIntegerFormat(status); |
282 | if (U_FAILURE(status)) { return; } |
283 | if (state.peek() == u'.') { |
284 | state.next(); // consume the decimal point |
285 | currentSubpattern->hasDecimal = true; |
286 | currentSubpattern->widthExceptAffixes += 1; |
287 | consumeFractionFormat(status); |
288 | if (U_FAILURE(status)) { return; } |
289 | } |
290 | } |
291 | |
292 | void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) { |
293 | // Convenience reference: |
294 | ParsedSubpatternInfo& result = *currentSubpattern; |
295 | |
296 | while (true) { |
297 | switch (state.peek()) { |
298 | case u',': |
299 | result.widthExceptAffixes += 1; |
300 | result.groupingSizes <<= 16; |
301 | break; |
302 | |
303 | case u'#': |
304 | if (result.integerNumerals > 0) { |
305 | state.toParseException(u"# cannot follow 0 before decimal point" ); |
306 | status = U_UNEXPECTED_TOKEN; |
307 | return; |
308 | } |
309 | result.widthExceptAffixes += 1; |
310 | result.groupingSizes += 1; |
311 | if (result.integerAtSigns > 0) { |
312 | result.integerTrailingHashSigns += 1; |
313 | } else { |
314 | result.integerLeadingHashSigns += 1; |
315 | } |
316 | result.integerTotal += 1; |
317 | break; |
318 | |
319 | case u'@': |
320 | if (result.integerNumerals > 0) { |
321 | state.toParseException(u"Cannot mix 0 and @" ); |
322 | status = U_UNEXPECTED_TOKEN; |
323 | return; |
324 | } |
325 | if (result.integerTrailingHashSigns > 0) { |
326 | state.toParseException(u"Cannot nest # inside of a run of @" ); |
327 | status = U_UNEXPECTED_TOKEN; |
328 | return; |
329 | } |
330 | result.widthExceptAffixes += 1; |
331 | result.groupingSizes += 1; |
332 | result.integerAtSigns += 1; |
333 | result.integerTotal += 1; |
334 | break; |
335 | |
336 | case u'0': |
337 | case u'1': |
338 | case u'2': |
339 | case u'3': |
340 | case u'4': |
341 | case u'5': |
342 | case u'6': |
343 | case u'7': |
344 | case u'8': |
345 | case u'9': |
346 | if (result.integerAtSigns > 0) { |
347 | state.toParseException(u"Cannot mix @ and 0" ); |
348 | status = U_UNEXPECTED_TOKEN; |
349 | return; |
350 | } |
351 | result.widthExceptAffixes += 1; |
352 | result.groupingSizes += 1; |
353 | result.integerNumerals += 1; |
354 | result.integerTotal += 1; |
355 | if (!result.rounding.isZeroish() || state.peek() != u'0') { |
356 | result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u'0'), 0, true); |
357 | } |
358 | break; |
359 | |
360 | default: |
361 | goto after_outer; |
362 | } |
363 | state.next(); // consume the symbol |
364 | } |
365 | |
366 | after_outer: |
367 | // Disallow patterns with a trailing ',' or with two ',' next to each other |
368 | auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff); |
369 | auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff); |
370 | auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff); |
371 | if (grouping1 == 0 && grouping2 != -1) { |
372 | state.toParseException(u"Trailing grouping separator is invalid" ); |
373 | status = U_UNEXPECTED_TOKEN; |
374 | return; |
375 | } |
376 | if (grouping2 == 0 && grouping3 != -1) { |
377 | state.toParseException(u"Grouping width of zero is invalid" ); |
378 | status = U_PATTERN_SYNTAX_ERROR; |
379 | return; |
380 | } |
381 | } |
382 | |
383 | void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) { |
384 | // Convenience reference: |
385 | ParsedSubpatternInfo& result = *currentSubpattern; |
386 | |
387 | int32_t zeroCounter = 0; |
388 | while (true) { |
389 | switch (state.peek()) { |
390 | case u'#': |
391 | result.widthExceptAffixes += 1; |
392 | result.fractionHashSigns += 1; |
393 | result.fractionTotal += 1; |
394 | zeroCounter++; |
395 | break; |
396 | |
397 | case u'0': |
398 | case u'1': |
399 | case u'2': |
400 | case u'3': |
401 | case u'4': |
402 | case u'5': |
403 | case u'6': |
404 | case u'7': |
405 | case u'8': |
406 | case u'9': |
407 | if (result.fractionHashSigns > 0) { |
408 | state.toParseException(u"0 cannot follow # after decimal point" ); |
409 | status = U_UNEXPECTED_TOKEN; |
410 | return; |
411 | } |
412 | result.widthExceptAffixes += 1; |
413 | result.fractionNumerals += 1; |
414 | result.fractionTotal += 1; |
415 | if (state.peek() == u'0') { |
416 | zeroCounter++; |
417 | } else { |
418 | result.rounding |
419 | .appendDigit(static_cast<int8_t>(state.peek() - u'0'), zeroCounter, false); |
420 | zeroCounter = 0; |
421 | } |
422 | break; |
423 | |
424 | default: |
425 | return; |
426 | } |
427 | state.next(); // consume the symbol |
428 | } |
429 | } |
430 | |
431 | void ParsedPatternInfo::consumeExponent(UErrorCode& status) { |
432 | // Convenience reference: |
433 | ParsedSubpatternInfo& result = *currentSubpattern; |
434 | |
435 | if (state.peek() != u'E') { |
436 | return; |
437 | } |
438 | if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) { |
439 | state.toParseException(u"Cannot have grouping separator in scientific notation" ); |
440 | status = U_MALFORMED_EXPONENTIAL_PATTERN; |
441 | return; |
442 | } |
443 | state.next(); // consume the E |
444 | result.widthExceptAffixes++; |
445 | if (state.peek() == u'+') { |
446 | state.next(); // consume the + |
447 | result.exponentHasPlusSign = true; |
448 | result.widthExceptAffixes++; |
449 | } |
450 | while (state.peek() == u'0') { |
451 | state.next(); // consume the 0 |
452 | result.exponentZeros += 1; |
453 | result.widthExceptAffixes++; |
454 | } |
455 | } |
456 | |
457 | /////////////////////////////////////////////////// |
458 | /// END RECURSIVE DESCENT PARSER IMPLEMENTATION /// |
459 | /////////////////////////////////////////////////// |
460 | |
461 | void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, |
462 | DecimalFormatProperties& properties, |
463 | IgnoreRounding ignoreRounding, UErrorCode& status) { |
464 | if (pattern.length() == 0) { |
465 | // Backwards compatibility requires that we reset to the default values. |
466 | // TODO: Only overwrite the properties that "saveToProperties" normally touches? |
467 | properties.clear(); |
468 | return; |
469 | } |
470 | |
471 | ParsedPatternInfo patternInfo; |
472 | parseToPatternInfo(pattern, patternInfo, status); |
473 | if (U_FAILURE(status)) { return; } |
474 | patternInfoToProperties(properties, patternInfo, ignoreRounding, status); |
475 | } |
476 | |
477 | void |
478 | PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo, |
479 | IgnoreRounding _ignoreRounding, UErrorCode& status) { |
480 | // Translate from PatternParseResult to Properties. |
481 | // Note that most data from "negative" is ignored per the specification of DecimalFormat. |
482 | |
483 | const ParsedSubpatternInfo& positive = patternInfo.positive; |
484 | |
485 | bool ignoreRounding; |
486 | if (_ignoreRounding == IGNORE_ROUNDING_NEVER) { |
487 | ignoreRounding = false; |
488 | } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) { |
489 | ignoreRounding = positive.hasCurrencySign; |
490 | } else { |
491 | U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS); |
492 | ignoreRounding = true; |
493 | } |
494 | |
495 | // Grouping settings |
496 | auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff); |
497 | auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff); |
498 | auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff); |
499 | if (grouping2 != -1) { |
500 | properties.groupingSize = grouping1; |
501 | properties.groupingUsed = true; |
502 | } else { |
503 | properties.groupingSize = -1; |
504 | properties.groupingUsed = false; |
505 | } |
506 | if (grouping3 != -1) { |
507 | properties.secondaryGroupingSize = grouping2; |
508 | } else { |
509 | properties.secondaryGroupingSize = -1; |
510 | } |
511 | |
512 | // For backwards compatibility, require that the pattern emit at least one min digit. |
513 | int minInt, minFrac; |
514 | if (positive.integerTotal == 0 && positive.fractionTotal > 0) { |
515 | // patterns like ".##" |
516 | minInt = 0; |
517 | minFrac = uprv_max(1, positive.fractionNumerals); |
518 | } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) { |
519 | // patterns like "#.##" |
520 | minInt = 1; |
521 | minFrac = 0; |
522 | } else { |
523 | minInt = positive.integerNumerals; |
524 | minFrac = positive.fractionNumerals; |
525 | } |
526 | |
527 | // Rounding settings |
528 | // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage |
529 | if (positive.integerAtSigns > 0) { |
530 | properties.minimumFractionDigits = -1; |
531 | properties.maximumFractionDigits = -1; |
532 | properties.roundingIncrement = 0.0; |
533 | properties.minimumSignificantDigits = positive.integerAtSigns; |
534 | properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns; |
535 | } else if (!positive.rounding.isZeroish()) { |
536 | if (!ignoreRounding) { |
537 | properties.minimumFractionDigits = minFrac; |
538 | properties.maximumFractionDigits = positive.fractionTotal; |
539 | properties.roundingIncrement = positive.rounding.toDouble(); |
540 | } else { |
541 | properties.minimumFractionDigits = -1; |
542 | properties.maximumFractionDigits = -1; |
543 | properties.roundingIncrement = 0.0; |
544 | } |
545 | properties.minimumSignificantDigits = -1; |
546 | properties.maximumSignificantDigits = -1; |
547 | } else { |
548 | if (!ignoreRounding) { |
549 | properties.minimumFractionDigits = minFrac; |
550 | properties.maximumFractionDigits = positive.fractionTotal; |
551 | properties.roundingIncrement = 0.0; |
552 | } else { |
553 | properties.minimumFractionDigits = -1; |
554 | properties.maximumFractionDigits = -1; |
555 | properties.roundingIncrement = 0.0; |
556 | } |
557 | properties.minimumSignificantDigits = -1; |
558 | properties.maximumSignificantDigits = -1; |
559 | } |
560 | |
561 | // If the pattern ends with a '.' then force the decimal point. |
562 | if (positive.hasDecimal && positive.fractionTotal == 0) { |
563 | properties.decimalSeparatorAlwaysShown = true; |
564 | } else { |
565 | properties.decimalSeparatorAlwaysShown = false; |
566 | } |
567 | |
568 | // Scientific notation settings |
569 | if (positive.exponentZeros > 0) { |
570 | properties.exponentSignAlwaysShown = positive.exponentHasPlusSign; |
571 | properties.minimumExponentDigits = positive.exponentZeros; |
572 | if (positive.integerAtSigns == 0) { |
573 | // patterns without '@' can define max integer digits, used for engineering notation |
574 | properties.minimumIntegerDigits = positive.integerNumerals; |
575 | properties.maximumIntegerDigits = positive.integerTotal; |
576 | } else { |
577 | // patterns with '@' cannot define max integer digits |
578 | properties.minimumIntegerDigits = 1; |
579 | properties.maximumIntegerDigits = -1; |
580 | } |
581 | } else { |
582 | properties.exponentSignAlwaysShown = false; |
583 | properties.minimumExponentDigits = -1; |
584 | properties.minimumIntegerDigits = minInt; |
585 | properties.maximumIntegerDigits = -1; |
586 | } |
587 | |
588 | // Compute the affix patterns (required for both padding and affixes) |
589 | UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX); |
590 | UnicodeString posSuffix = patternInfo.getString(0); |
591 | |
592 | // Padding settings |
593 | if (positive.hasPadding) { |
594 | // The width of the positive prefix and suffix templates are included in the padding |
595 | int paddingWidth = positive.widthExceptAffixes + |
596 | AffixUtils::estimateLength(posPrefix, status) + |
597 | AffixUtils::estimateLength(posSuffix, status); |
598 | properties.formatWidth = paddingWidth; |
599 | UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING); |
600 | if (rawPaddingString.length() == 1) { |
601 | properties.padString = rawPaddingString; |
602 | } else if (rawPaddingString.length() == 2) { |
603 | if (rawPaddingString.charAt(0) == u'\'') { |
604 | properties.padString.setTo(u"'" , -1); |
605 | } else { |
606 | properties.padString = rawPaddingString; |
607 | } |
608 | } else { |
609 | properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2); |
610 | } |
611 | properties.padPosition = positive.paddingLocation; |
612 | } else { |
613 | properties.formatWidth = -1; |
614 | properties.padString.setToBogus(); |
615 | properties.padPosition.nullify(); |
616 | } |
617 | |
618 | // Set the affixes |
619 | // Always call the setter, even if the prefixes are empty, especially in the case of the |
620 | // negative prefix pattern, to prevent default values from overriding the pattern. |
621 | properties.positivePrefixPattern = posPrefix; |
622 | properties.positiveSuffixPattern = posSuffix; |
623 | if (patternInfo.fHasNegativeSubpattern) { |
624 | properties.negativePrefixPattern = patternInfo.getString( |
625 | AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX); |
626 | properties.negativeSuffixPattern = patternInfo.getString( |
627 | AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN); |
628 | } else { |
629 | properties.negativePrefixPattern.setToBogus(); |
630 | properties.negativeSuffixPattern.setToBogus(); |
631 | } |
632 | |
633 | // Set the magnitude multiplier |
634 | if (positive.hasPercentSign) { |
635 | properties.magnitudeMultiplier = 2; |
636 | } else if (positive.hasPerMilleSign) { |
637 | properties.magnitudeMultiplier = 3; |
638 | } else { |
639 | properties.magnitudeMultiplier = 0; |
640 | } |
641 | } |
642 | |
643 | /////////////////////////////////////////////////////////////////// |
644 | /// End PatternStringParser.java; begin PatternStringUtils.java /// |
645 | /////////////////////////////////////////////////////////////////// |
646 | |
647 | // Determine whether a given roundingIncrement should be ignored for formatting |
648 | // based on the current maxFrac value (maximum fraction digits). For example a |
649 | // roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac |
650 | // is 2 or more. Note that roundingIncrements are rounded in significance, so |
651 | // a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e. |
652 | // it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of |
653 | // 0.005 is treated like 0.001 for significance). This is the reason for the |
654 | // initial doubling below. |
655 | // roundIncr must be non-zero. |
656 | bool PatternStringUtils::ignoreRoundingIncrement(double roundIncr, int32_t maxFrac) { |
657 | if (maxFrac < 0) { |
658 | return false; |
659 | } |
660 | int32_t frac = 0; |
661 | roundIncr *= 2.0; |
662 | for (frac = 0; frac <= maxFrac && roundIncr <= 1.0; frac++, roundIncr *= 10.0); |
663 | return (frac > maxFrac); |
664 | } |
665 | |
666 | UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties, |
667 | UErrorCode& status) { |
668 | UnicodeString sb; |
669 | |
670 | // Convenience references |
671 | // The uprv_min() calls prevent DoS |
672 | int32_t dosMax = 100; |
673 | int32_t grouping1 = uprv_max(0, uprv_min(properties.groupingSize, dosMax)); |
674 | int32_t grouping2 = uprv_max(0, uprv_min(properties.secondaryGroupingSize, dosMax)); |
675 | bool useGrouping = properties.groupingUsed; |
676 | int32_t paddingWidth = uprv_min(properties.formatWidth, dosMax); |
677 | NullableValue<PadPosition> paddingLocation = properties.padPosition; |
678 | UnicodeString paddingString = properties.padString; |
679 | int32_t minInt = uprv_max(0, uprv_min(properties.minimumIntegerDigits, dosMax)); |
680 | int32_t maxInt = uprv_min(properties.maximumIntegerDigits, dosMax); |
681 | int32_t minFrac = uprv_max(0, uprv_min(properties.minimumFractionDigits, dosMax)); |
682 | int32_t maxFrac = uprv_min(properties.maximumFractionDigits, dosMax); |
683 | int32_t minSig = uprv_min(properties.minimumSignificantDigits, dosMax); |
684 | int32_t maxSig = uprv_min(properties.maximumSignificantDigits, dosMax); |
685 | bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown; |
686 | int32_t exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax); |
687 | bool exponentShowPlusSign = properties.exponentSignAlwaysShown; |
688 | |
689 | PropertiesAffixPatternProvider affixes(properties, status); |
690 | |
691 | // Prefixes |
692 | sb.append(affixes.getString(AffixPatternProvider::AFFIX_POS_PREFIX)); |
693 | int32_t afterPrefixPos = sb.length(); |
694 | |
695 | // Figure out the grouping sizes. |
696 | if (!useGrouping) { |
697 | grouping1 = 0; |
698 | grouping2 = 0; |
699 | } else if (grouping1 == grouping2) { |
700 | grouping1 = 0; |
701 | } |
702 | int32_t groupingLength = grouping1 + grouping2 + 1; |
703 | |
704 | // Figure out the digits we need to put in the pattern. |
705 | double roundingInterval = properties.roundingIncrement; |
706 | UnicodeString digitsString; |
707 | int32_t digitsStringScale = 0; |
708 | if (maxSig != uprv_min(dosMax, -1)) { |
709 | // Significant Digits. |
710 | while (digitsString.length() < minSig) { |
711 | digitsString.append(u'@'); |
712 | } |
713 | while (digitsString.length() < maxSig) { |
714 | digitsString.append(u'#'); |
715 | } |
716 | } else if (roundingInterval != 0.0 && !ignoreRoundingIncrement(roundingInterval,maxFrac)) { |
717 | // Rounding Interval. |
718 | digitsStringScale = -roundingutils::doubleFractionLength(roundingInterval, nullptr); |
719 | // TODO: Check for DoS here? |
720 | DecimalQuantity incrementQuantity; |
721 | incrementQuantity.setToDouble(roundingInterval); |
722 | incrementQuantity.adjustMagnitude(-digitsStringScale); |
723 | incrementQuantity.roundToMagnitude(0, kDefaultMode, status); |
724 | UnicodeString str = incrementQuantity.toPlainString(); |
725 | if (str.charAt(0) == u'-') { |
726 | // TODO: Unsupported operation exception or fail silently? |
727 | digitsString.append(str, 1, str.length() - 1); |
728 | } else { |
729 | digitsString.append(str); |
730 | } |
731 | } |
732 | while (digitsString.length() + digitsStringScale < minInt) { |
733 | digitsString.insert(0, u'0'); |
734 | } |
735 | while (-digitsStringScale < minFrac) { |
736 | digitsString.append(u'0'); |
737 | digitsStringScale--; |
738 | } |
739 | |
740 | // Write the digits to the string builder |
741 | int32_t m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale); |
742 | m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1; |
743 | int32_t mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale; |
744 | for (int32_t magnitude = m0; magnitude >= mN; magnitude--) { |
745 | int32_t di = digitsString.length() + digitsStringScale - magnitude - 1; |
746 | if (di < 0 || di >= digitsString.length()) { |
747 | sb.append(u'#'); |
748 | } else { |
749 | sb.append(digitsString.charAt(di)); |
750 | } |
751 | // Decimal separator |
752 | if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) { |
753 | sb.append(u'.'); |
754 | } |
755 | if (!useGrouping) { |
756 | continue; |
757 | } |
758 | // Least-significant grouping separator |
759 | if (magnitude > 0 && magnitude == grouping1) { |
760 | sb.append(u','); |
761 | } |
762 | // All other grouping separators |
763 | if (magnitude > grouping1 && grouping2 > 0 && (magnitude - grouping1) % grouping2 == 0) { |
764 | sb.append(u','); |
765 | } |
766 | } |
767 | |
768 | // Exponential notation |
769 | if (exponentDigits != uprv_min(dosMax, -1)) { |
770 | sb.append(u'E'); |
771 | if (exponentShowPlusSign) { |
772 | sb.append(u'+'); |
773 | } |
774 | for (int32_t i = 0; i < exponentDigits; i++) { |
775 | sb.append(u'0'); |
776 | } |
777 | } |
778 | |
779 | // Suffixes |
780 | int32_t beforeSuffixPos = sb.length(); |
781 | sb.append(affixes.getString(AffixPatternProvider::AFFIX_POS_SUFFIX)); |
782 | |
783 | // Resolve Padding |
784 | if (paddingWidth > 0 && !paddingLocation.isNull()) { |
785 | while (paddingWidth - sb.length() > 0) { |
786 | sb.insert(afterPrefixPos, u'#'); |
787 | beforeSuffixPos++; |
788 | } |
789 | int32_t addedLength; |
790 | switch (paddingLocation.get(status)) { |
791 | case PadPosition::UNUM_PAD_BEFORE_PREFIX: |
792 | addedLength = escapePaddingString(paddingString, sb, 0, status); |
793 | sb.insert(0, u'*'); |
794 | afterPrefixPos += addedLength + 1; |
795 | beforeSuffixPos += addedLength + 1; |
796 | break; |
797 | case PadPosition::UNUM_PAD_AFTER_PREFIX: |
798 | addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status); |
799 | sb.insert(afterPrefixPos, u'*'); |
800 | afterPrefixPos += addedLength + 1; |
801 | beforeSuffixPos += addedLength + 1; |
802 | break; |
803 | case PadPosition::UNUM_PAD_BEFORE_SUFFIX: |
804 | escapePaddingString(paddingString, sb, beforeSuffixPos, status); |
805 | sb.insert(beforeSuffixPos, u'*'); |
806 | break; |
807 | case PadPosition::UNUM_PAD_AFTER_SUFFIX: |
808 | sb.append(u'*'); |
809 | escapePaddingString(paddingString, sb, sb.length(), status); |
810 | break; |
811 | } |
812 | if (U_FAILURE(status)) { return sb; } |
813 | } |
814 | |
815 | // Negative affixes |
816 | // Ignore if the negative prefix pattern is "-" and the negative suffix is empty |
817 | if (affixes.hasNegativeSubpattern()) { |
818 | sb.append(u';'); |
819 | sb.append(affixes.getString(AffixPatternProvider::AFFIX_NEG_PREFIX)); |
820 | // Copy the positive digit format into the negative. |
821 | // This is optional; the pattern is the same as if '#' were appended here instead. |
822 | // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy. |
823 | // See http://bugs.icu-project.org/trac/ticket/13707 |
824 | UnicodeString copy(sb); |
825 | sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos); |
826 | sb.append(affixes.getString(AffixPatternProvider::AFFIX_NEG_SUFFIX)); |
827 | } |
828 | |
829 | return sb; |
830 | } |
831 | |
832 | int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex, |
833 | UErrorCode& status) { |
834 | (void) status; |
835 | if (input.length() == 0) { |
836 | input.setTo(kFallbackPaddingString, -1); |
837 | } |
838 | int startLength = output.length(); |
839 | if (input.length() == 1) { |
840 | if (input.compare(u"'" , -1) == 0) { |
841 | output.insert(startIndex, u"''" , -1); |
842 | } else { |
843 | output.insert(startIndex, input); |
844 | } |
845 | } else { |
846 | output.insert(startIndex, u'\''); |
847 | int offset = 1; |
848 | for (int i = 0; i < input.length(); i++) { |
849 | // it's okay to deal in chars here because the quote mark is the only interesting thing. |
850 | char16_t ch = input.charAt(i); |
851 | if (ch == u'\'') { |
852 | output.insert(startIndex + offset, u"''" , -1); |
853 | offset += 2; |
854 | } else { |
855 | output.insert(startIndex + offset, ch); |
856 | offset += 1; |
857 | } |
858 | } |
859 | output.insert(startIndex + offset, u'\''); |
860 | } |
861 | return output.length() - startLength; |
862 | } |
863 | |
864 | UnicodeString |
865 | PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols, |
866 | bool toLocalized, UErrorCode& status) { |
867 | // Construct a table of strings to be converted between localized and standard. |
868 | static constexpr int32_t LEN = 21; |
869 | UnicodeString table[LEN][2]; |
870 | int standIdx = toLocalized ? 0 : 1; |
871 | int localIdx = toLocalized ? 1 : 0; |
872 | table[0][standIdx] = u"%" ; |
873 | table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); |
874 | table[1][standIdx] = u"‰" ; |
875 | table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); |
876 | table[2][standIdx] = u"." ; |
877 | table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); |
878 | table[3][standIdx] = u"," ; |
879 | table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); |
880 | table[4][standIdx] = u"-" ; |
881 | table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); |
882 | table[5][standIdx] = u"+" ; |
883 | table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); |
884 | table[6][standIdx] = u";" ; |
885 | table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol); |
886 | table[7][standIdx] = u"@" ; |
887 | table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol); |
888 | table[8][standIdx] = u"E" ; |
889 | table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol); |
890 | table[9][standIdx] = u"*" ; |
891 | table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol); |
892 | table[10][standIdx] = u"#" ; |
893 | table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol); |
894 | for (int i = 0; i < 10; i++) { |
895 | table[11 + i][standIdx] = u'0' + i; |
896 | table[11 + i][localIdx] = symbols.getConstDigitSymbol(i); |
897 | } |
898 | |
899 | // Special case: quotes are NOT allowed to be in any localIdx strings. |
900 | // Substitute them with '’' instead. |
901 | for (int32_t i = 0; i < LEN; i++) { |
902 | table[i][localIdx].findAndReplace(u'\'', u'’'); |
903 | } |
904 | |
905 | // Iterate through the string and convert. |
906 | // State table: |
907 | // 0 => base state |
908 | // 1 => first char inside a quoted sequence in input and output string |
909 | // 2 => inside a quoted sequence in input and output string |
910 | // 3 => first char after a close quote in input string; |
911 | // close quote still needs to be written to output string |
912 | // 4 => base state in input string; inside quoted sequence in output string |
913 | // 5 => first char inside a quoted sequence in input string; |
914 | // inside quoted sequence in output string |
915 | UnicodeString result; |
916 | int state = 0; |
917 | for (int offset = 0; offset < input.length(); offset++) { |
918 | UChar ch = input.charAt(offset); |
919 | |
920 | // Handle a quote character (state shift) |
921 | if (ch == u'\'') { |
922 | if (state == 0) { |
923 | result.append(u'\''); |
924 | state = 1; |
925 | continue; |
926 | } else if (state == 1) { |
927 | result.append(u'\''); |
928 | state = 0; |
929 | continue; |
930 | } else if (state == 2) { |
931 | state = 3; |
932 | continue; |
933 | } else if (state == 3) { |
934 | result.append(u'\''); |
935 | result.append(u'\''); |
936 | state = 1; |
937 | continue; |
938 | } else if (state == 4) { |
939 | state = 5; |
940 | continue; |
941 | } else { |
942 | U_ASSERT(state == 5); |
943 | result.append(u'\''); |
944 | result.append(u'\''); |
945 | state = 4; |
946 | continue; |
947 | } |
948 | } |
949 | |
950 | if (state == 0 || state == 3 || state == 4) { |
951 | for (auto& pair : table) { |
952 | // Perform a greedy match on this symbol string |
953 | UnicodeString temp = input.tempSubString(offset, pair[0].length()); |
954 | if (temp == pair[0]) { |
955 | // Skip ahead past this region for the next iteration |
956 | offset += pair[0].length() - 1; |
957 | if (state == 3 || state == 4) { |
958 | result.append(u'\''); |
959 | state = 0; |
960 | } |
961 | result.append(pair[1]); |
962 | goto continue_outer; |
963 | } |
964 | } |
965 | // No replacement found. Check if a special quote is necessary |
966 | for (auto& pair : table) { |
967 | UnicodeString temp = input.tempSubString(offset, pair[1].length()); |
968 | if (temp == pair[1]) { |
969 | if (state == 0) { |
970 | result.append(u'\''); |
971 | state = 4; |
972 | } |
973 | result.append(ch); |
974 | goto continue_outer; |
975 | } |
976 | } |
977 | // Still nothing. Copy the char verbatim. (Add a close quote if necessary) |
978 | if (state == 3 || state == 4) { |
979 | result.append(u'\''); |
980 | state = 0; |
981 | } |
982 | result.append(ch); |
983 | } else { |
984 | U_ASSERT(state == 1 || state == 2 || state == 5); |
985 | result.append(ch); |
986 | state = 2; |
987 | } |
988 | continue_outer:; |
989 | } |
990 | // Resolve final quotes |
991 | if (state == 3 || state == 4) { |
992 | result.append(u'\''); |
993 | state = 0; |
994 | } |
995 | if (state != 0) { |
996 | // Malformed localized pattern: unterminated quote |
997 | status = U_PATTERN_SYNTAX_ERROR; |
998 | } |
999 | return result; |
1000 | } |
1001 | |
1002 | void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, |
1003 | PatternSignType patternSignType, |
1004 | StandardPlural::Form plural, |
1005 | bool perMilleReplacesPercent, UnicodeString& output) { |
1006 | |
1007 | // Should the output render '+' where '-' would normally appear in the pattern? |
1008 | bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN) |
1009 | && !patternInfo.positiveHasPlusSign(); |
1010 | |
1011 | // Should we use the affix from the negative subpattern? |
1012 | // (If not, we will use the positive subpattern.) |
1013 | bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() |
1014 | && (patternSignType == PATTERN_SIGN_TYPE_NEG |
1015 | || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign)); |
1016 | |
1017 | // Resolve the flags for the affix pattern. |
1018 | int flags = 0; |
1019 | if (useNegativeAffixPattern) { |
1020 | flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN; |
1021 | } |
1022 | if (isPrefix) { |
1023 | flags |= AffixPatternProvider::AFFIX_PREFIX; |
1024 | } |
1025 | if (plural != StandardPlural::Form::COUNT) { |
1026 | U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural)); |
1027 | flags |= plural; |
1028 | } |
1029 | |
1030 | // Should we prepend a sign to the pattern? |
1031 | bool prependSign; |
1032 | if (!isPrefix || useNegativeAffixPattern) { |
1033 | prependSign = false; |
1034 | } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) { |
1035 | prependSign = true; |
1036 | } else { |
1037 | prependSign = plusReplacesMinusSign; |
1038 | } |
1039 | |
1040 | // Compute the length of the affix pattern. |
1041 | int length = patternInfo.length(flags) + (prependSign ? 1 : 0); |
1042 | |
1043 | // Finally, set the result into the StringBuilder. |
1044 | output.remove(); |
1045 | for (int index = 0; index < length; index++) { |
1046 | char16_t candidate; |
1047 | if (prependSign && index == 0) { |
1048 | candidate = u'-'; |
1049 | } else if (prependSign) { |
1050 | candidate = patternInfo.charAt(flags, index - 1); |
1051 | } else { |
1052 | candidate = patternInfo.charAt(flags, index); |
1053 | } |
1054 | if (plusReplacesMinusSign && candidate == u'-') { |
1055 | candidate = u'+'; |
1056 | } |
1057 | if (perMilleReplacesPercent && candidate == u'%') { |
1058 | candidate = u'‰'; |
1059 | } |
1060 | output.append(candidate); |
1061 | } |
1062 | } |
1063 | |
1064 | PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum) { |
1065 | switch (signDisplay) { |
1066 | case UNUM_SIGN_AUTO: |
1067 | case UNUM_SIGN_ACCOUNTING: |
1068 | switch (signum) { |
1069 | case SIGNUM_NEG: |
1070 | case SIGNUM_NEG_ZERO: |
1071 | return PATTERN_SIGN_TYPE_NEG; |
1072 | case SIGNUM_POS_ZERO: |
1073 | case SIGNUM_POS: |
1074 | return PATTERN_SIGN_TYPE_POS; |
1075 | default: |
1076 | break; |
1077 | } |
1078 | break; |
1079 | |
1080 | case UNUM_SIGN_ALWAYS: |
1081 | case UNUM_SIGN_ACCOUNTING_ALWAYS: |
1082 | switch (signum) { |
1083 | case SIGNUM_NEG: |
1084 | case SIGNUM_NEG_ZERO: |
1085 | return PATTERN_SIGN_TYPE_NEG; |
1086 | case SIGNUM_POS_ZERO: |
1087 | case SIGNUM_POS: |
1088 | return PATTERN_SIGN_TYPE_POS_SIGN; |
1089 | default: |
1090 | break; |
1091 | } |
1092 | break; |
1093 | |
1094 | case UNUM_SIGN_EXCEPT_ZERO: |
1095 | case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO: |
1096 | switch (signum) { |
1097 | case SIGNUM_NEG: |
1098 | return PATTERN_SIGN_TYPE_NEG; |
1099 | case SIGNUM_NEG_ZERO: |
1100 | case SIGNUM_POS_ZERO: |
1101 | return PATTERN_SIGN_TYPE_POS; |
1102 | case SIGNUM_POS: |
1103 | return PATTERN_SIGN_TYPE_POS_SIGN; |
1104 | default: |
1105 | break; |
1106 | } |
1107 | break; |
1108 | |
1109 | case UNUM_SIGN_NEVER: |
1110 | return PATTERN_SIGN_TYPE_POS; |
1111 | |
1112 | default: |
1113 | break; |
1114 | } |
1115 | |
1116 | UPRV_UNREACHABLE; |
1117 | return PATTERN_SIGN_TYPE_POS; |
1118 | } |
1119 | |
1120 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
1121 | |