number_patternstring.cpp source code [ClickHouse/contrib/icu/icu4c/source/i18n/number_patternstring.cpp]

1	// © 2017 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3
4	#include "unicode/utypes.h"
5
6	#if !UCONFIG_NO_FORMATTING
7
8	// Allow implicit conversion from char16_t to UnicodeString for this file:*
9	// Helpful in toString methods and elsewhere.
10	#define UNISTR_FROM_STRING_EXPLICIT
11	#define UNISTR_FROM_CHAR_EXPLICIT
12
13	#include "uassert.h"
14	#include "number_patternstring.h"
15	#include "unicode/utf16.h"
16	#include "number_utils.h"
17	#include "number_roundingutils.h"
18	#include "number_mapper.h"
19
20	using namespace icu;
21	using namespace icu::number;
22	using namespace icu::number::impl;
23
24
25	void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
26	UErrorCode& status) {
27	patternInfo.consumePattern(patternString, status);
28	}
29
30	DecimalFormatProperties
31	PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding,
32	UErrorCode& status) {
33	DecimalFormatProperties properties;
34	parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
35	return properties;
36	}
37
38	DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern,
39	UErrorCode& status) {
40	return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status);
41	}
42
43	void
44	PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
45	IgnoreRounding ignoreRounding, UErrorCode& status) {
46	parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
47	}
48
49
50	char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const {
51	const Endpoints& endpoints = getEndpoints(flags);
52	if (index < `0` \|\| index >= endpoints.end - endpoints.start) {
53	UPRV_UNREACHABLE;
54	}
55	return pattern.charAt(endpoints.start + index);
56	}
57
58	int32_t ParsedPatternInfo::length(int32_t flags) const {
59	return getLengthFromEndpoints(getEndpoints(flags));
60	}
61
62	int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) {
63	return endpoints.end - endpoints.start;
64	}
65
66	UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
67	const Endpoints& endpoints = getEndpoints(flags);
68	if (endpoints.start == endpoints.end) {
69	return UnicodeString ();
70	}
71	// Create a new UnicodeString
72	return UnicodeString (pattern, endpoints.start, endpoints.end - endpoints.start);
73	}
74
75	const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const {
76	bool prefix = (flags & AFFIX_PREFIX) != `0`;
77	bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != `0`;
78	bool padding = (flags & AFFIX_PADDING) != `0`;
79	if (isNegative && padding) {
80	return negative.paddingEndpoints;
81	} else if (padding) {
82	return positive.paddingEndpoints;
83	} else if (prefix && isNegative) {
84	return negative.prefixEndpoints;
85	} else if (prefix) {
86	return positive.prefixEndpoints;
87	} else if (isNegative) {
88	return negative.suffixEndpoints;
89	} else {
90	return positive.suffixEndpoints;
91	}
92	}
93
94	bool ParsedPatternInfo::positiveHasPlusSign() const {
95	return positive.hasPlusSign;
96	}
97
98	bool ParsedPatternInfo::hasNegativeSubpattern() const {
99	return fHasNegativeSubpattern;
100	}
101
102	bool ParsedPatternInfo::negativeHasMinusSign() const {
103	return negative.hasMinusSign;
104	}
105
106	bool ParsedPatternInfo::hasCurrencySign() const {
107	return positive.hasCurrencySign \|\| (fHasNegativeSubpattern && negative.hasCurrencySign);
108	}
109
110	bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const {
111	return AffixUtils::containsType(pattern, type, status);
112	}
113
114	bool ParsedPatternInfo::hasBody() const {
115	return positive.integerTotal > `0`;
116	}
117
118	/////////////////////////////////////////////////////
119	/// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION ///
120	/////////////////////////////////////////////////////
121
122	UChar32 ParsedPatternInfo::ParserState::peek() {
123	if (offset == pattern.length()) {
124	return -`1`;
125	} else {
126	return pattern.char32At(offset);
127	}
128	}
129
130	UChar32 ParsedPatternInfo::ParserState::next() {
131	int codePoint = peek();
132	offset += U16_LENGTH(codePoint);
133	return codePoint;
134	}
135
136	void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) {
137	if (U_FAILURE(status)) { return; }
138	this->pattern = patternString;
139
140	// This class is not intended for writing twice!
141	// Use move assignment to overwrite instead.
142	U_ASSERT(state.offset == `0`);
143
144	// pattern := subpattern (';' subpattern)?
145	currentSubpattern = &positive;
146	consumeSubpattern(status);
147	if (U_FAILURE(status)) { return; }
148	if (state.peek() == u`';'`) {
149	state.next(); // consume the ';'
150	// Don't consume the negative subpattern if it is empty (trailing ';')
151	if (state.peek() != -`1`) {
152	fHasNegativeSubpattern = true;
153	currentSubpattern = &negative;
154	consumeSubpattern(status);
155	if (U_FAILURE(status)) { return; }
156	}
157	}
158	if (state.peek() != -`1`) {
159	state.toParseException(u"Found unquoted special character");
160	status = U_UNQUOTED_SPECIAL;
161	}
162	}
163
164	void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) {
165	// subpattern := literals? number exponent? literals?
166	consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status);
167	if (U_FAILURE(status)) { return; }
168	consumeAffix(currentSubpattern->prefixEndpoints, status);
169	if (U_FAILURE(status)) { return; }
170	consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status);
171	if (U_FAILURE(status)) { return; }
172	consumeFormat(status);
173	if (U_FAILURE(status)) { return; }
174	consumeExponent(status);
175	if (U_FAILURE(status)) { return; }
176	consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status);
177	if (U_FAILURE(status)) { return; }
178	consumeAffix(currentSubpattern->suffixEndpoints, status);
179	if (U_FAILURE(status)) { return; }
180	consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status);
181	if (U_FAILURE(status)) { return; }
182	}
183
184	void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) {
185	if (state.peek() != u`'*'`) {
186	return;
187	}
188	if (currentSubpattern->hasPadding) {
189	state.toParseException(u"Cannot have multiple pad specifiers");
190	status = U_MULTIPLE_PAD_SPECIFIERS;
191	return;
192	}
193	currentSubpattern->paddingLocation = paddingLocation;
194	currentSubpattern->hasPadding = true;
195	state.next(); // consume the ''*
196	currentSubpattern->paddingEndpoints.start = state.offset;
197	consumeLiteral(status);
198	currentSubpattern->paddingEndpoints.end = state.offset;
199	}
200
201	void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) {
202	// literals := { literal }
203	endpoints.start = state.offset;
204	while (true) {
205	switch (state.peek()) {
206	case u`'#'`:
207	case u`'@'`:
208	case u`';'`:
209	case u`'*'`:
210	case u`'.'`:
211	case u`','`:
212	case u`'0'`:
213	case u`'1'`:
214	case u`'2'`:
215	case u`'3'`:
216	case u`'4'`:
217	case u`'5'`:
218	case u`'6'`:
219	case u`'7'`:
220	case u`'8'`:
221	case u`'9'`:
222	case -`1`:
223	// Characters that cannot appear unquoted in a literal
224	// break outer;
225	goto after_outer;
226
227	case u`'%'`:
228	currentSubpattern->hasPercentSign = true;
229	break;
230
231	case u`'‰'`:
232	currentSubpattern->hasPerMilleSign = true;
233	break;
234
235	case u`'¤'`:
236	currentSubpattern->hasCurrencySign = true;
237	break;
238
239	case u`'-'`:
240	currentSubpattern->hasMinusSign = true;
241	break;
242
243	case u`'+'`:
244	currentSubpattern->hasPlusSign = true;
245	break;
246
247	default:
248	break;
249	}
250	consumeLiteral(status);
251	if (U_FAILURE(status)) { return; }
252	}
253	after_outer:
254	endpoints.end = state.offset;
255	}
256
257	void ParsedPatternInfo::consumeLiteral(UErrorCode& status) {
258	if (state.peek() == -`1`) {
259	state.toParseException(u"Expected unquoted literal but found EOL");
260	status = U_PATTERN_SYNTAX_ERROR;
261	return;
262	} else if (state.peek() == u`'\''`) {
263	state.next(); // consume the starting quote
264	while (state.peek() != u`'\''`) {
265	if (state.peek() == -`1`) {
266	state.toParseException(u"Expected quoted literal but found EOL");
267	status = U_PATTERN_SYNTAX_ERROR;
268	return;
269	} else {
270	state.next(); // consume a quoted character
271	}
272	}
273	state.next(); // consume the ending quote
274	} else {
275	// consume a non-quoted literal character
276	state.next();
277	}
278	}
279
280	void ParsedPatternInfo::consumeFormat(UErrorCode& status) {
281	consumeIntegerFormat(status);
282	if (U_FAILURE(status)) { return; }
283	if (state.peek() == u`'.'`) {
284	state.next(); // consume the decimal point
285	currentSubpattern->hasDecimal = true;
286	currentSubpattern->widthExceptAffixes += `1`;
287	consumeFractionFormat(status);
288	if (U_FAILURE(status)) { return; }
289	}
290	}
291
292	void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) {
293	// Convenience reference:
294	ParsedSubpatternInfo& result = *currentSubpattern;
295
296	while (true) {
297	switch (state.peek()) {
298	case u`','`:
299	result.widthExceptAffixes += `1`;
300	result.groupingSizes <<= `16`;
301	break;
302
303	case u`'#'`:
304	if (result.integerNumerals > `0`) {
305	state.toParseException(u"# cannot follow 0 before decimal point");
306	status = U_UNEXPECTED_TOKEN;
307	return;
308	}
309	result.widthExceptAffixes += `1`;
310	result.groupingSizes += `1`;
311	if (result.integerAtSigns > `0`) {
312	result.integerTrailingHashSigns += `1`;
313	} else {
314	result.integerLeadingHashSigns += `1`;
315	}
316	result.integerTotal += `1`;
317	break;
318
319	case u`'@'`:
320	if (result.integerNumerals > `0`) {
321	state.toParseException(u"Cannot mix 0 and @");
322	status = U_UNEXPECTED_TOKEN;
323	return;
324	}
325	if (result.integerTrailingHashSigns > `0`) {
326	state.toParseException(u"Cannot nest # inside of a run of @");
327	status = U_UNEXPECTED_TOKEN;
328	return;
329	}
330	result.widthExceptAffixes += `1`;
331	result.groupingSizes += `1`;
332	result.integerAtSigns += `1`;
333	result.integerTotal += `1`;
334	break;
335
336	case u`'0'`:
337	case u`'1'`:
338	case u`'2'`:
339	case u`'3'`:
340	case u`'4'`:
341	case u`'5'`:
342	case u`'6'`:
343	case u`'7'`:
344	case u`'8'`:
345	case u`'9'`:
346	if (result.integerAtSigns > `0`) {
347	state.toParseException(u"Cannot mix @ and 0");
348	status = U_UNEXPECTED_TOKEN;
349	return;
350	}
351	result.widthExceptAffixes += `1`;
352	result.groupingSizes += `1`;
353	result.integerNumerals += `1`;
354	result.integerTotal += `1`;
355	if (!result.rounding.isZeroish() \|\| state.peek() != u`'0'`) {
356	result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u`'0'`), `0`, true);
357	}
358	break;
359
360	default:
361	goto after_outer;
362	}
363	state.next(); // consume the symbol
364	}
365
366	after_outer:
367	// Disallow patterns with a trailing ',' or with two ',' next to each other
368	auto grouping1 = static_cast<int16_t> (result.groupingSizes & `0xffff`);
369	auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> `16`) & `0xffff`);
370	auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> `32`) & `0xffff`);
371	if (grouping1 == `0` && grouping2 != -`1`) {
372	state.toParseException(u"Trailing grouping separator is invalid");
373	status = U_UNEXPECTED_TOKEN;
374	return;
375	}
376	if (grouping2 == `0` && grouping3 != -`1`) {
377	state.toParseException(u"Grouping width of zero is invalid");
378	status = U_PATTERN_SYNTAX_ERROR;
379	return;
380	}
381	}
382
383	void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) {
384	// Convenience reference:
385	ParsedSubpatternInfo& result = *currentSubpattern;
386
387	int32_t zeroCounter = `0`;
388	while (true) {
389	switch (state.peek()) {
390	case u`'#'`:
391	result.widthExceptAffixes += `1`;
392	result.fractionHashSigns += `1`;
393	result.fractionTotal += `1`;
394	zeroCounter++;
395	break;
396
397	case u`'0'`:
398	case u`'1'`:
399	case u`'2'`:
400	case u`'3'`:
401	case u`'4'`:
402	case u`'5'`:
403	case u`'6'`:
404	case u`'7'`:
405	case u`'8'`:
406	case u`'9'`:
407	if (result.fractionHashSigns > `0`) {
408	state.toParseException(u"0 cannot follow # after decimal point");
409	status = U_UNEXPECTED_TOKEN;
410	return;
411	}
412	result.widthExceptAffixes += `1`;
413	result.fractionNumerals += `1`;
414	result.fractionTotal += `1`;
415	if (state.peek() == u`'0'`) {
416	zeroCounter++;
417	} else {
418	result.rounding
419	.appendDigit(static_cast<int8_t>(state.peek() - u`'0'`), zeroCounter, false);
420	zeroCounter = `0`;
421	}
422	break;
423
424	default:
425	return;
426	}
427	state.next(); // consume the symbol
428	}
429	}
430
431	void ParsedPatternInfo::consumeExponent(UErrorCode& status) {
432	// Convenience reference:
433	ParsedSubpatternInfo& result = *currentSubpattern;
434
435	if (state.peek() != u`'E'`) {
436	return;
437	}
438	if ((result.groupingSizes & `0xffff0000L`) != `0xffff0000L`) {
439	state.toParseException(u"Cannot have grouping separator in scientific notation");
440	status = U_MALFORMED_EXPONENTIAL_PATTERN;
441	return;
442	}
443	state.next(); // consume the E
444	result.widthExceptAffixes++;
445	if (state.peek() == u`'+'`) {
446	state.next(); // consume the +
447	result.exponentHasPlusSign = true;
448	result.widthExceptAffixes++;
449	}
450	while (state.peek() == u`'0'`) {
451	state.next(); // consume the 0
452	result.exponentZeros += `1`;
453	result.widthExceptAffixes++;
454	}
455	}
456
457	///////////////////////////////////////////////////
458	/// END RECURSIVE DESCENT PARSER IMPLEMENTATION ///
459	///////////////////////////////////////////////////
460
461	void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern,
462	DecimalFormatProperties& properties,
463	IgnoreRounding ignoreRounding, UErrorCode& status) {
464	if (pattern.length() == `0`) {
465	// Backwards compatibility requires that we reset to the default values.
466	// TODO: Only overwrite the properties that "saveToProperties" normally touches?
467	properties.clear();
468	return;
469	}
470
471	ParsedPatternInfo patternInfo;
472	parseToPatternInfo(pattern, patternInfo, status);
473	if (U_FAILURE(status)) { return; }
474	patternInfoToProperties(properties, patternInfo, ignoreRounding, status);
475	}
476
477	void
478	PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo,
479	IgnoreRounding _ignoreRounding, UErrorCode& status) {
480	// Translate from PatternParseResult to Properties.
481	// Note that most data from "negative" is ignored per the specification of DecimalFormat.
482
483	const ParsedSubpatternInfo& positive = patternInfo.positive;
484
485	bool ignoreRounding;
486	if (_ignoreRounding == IGNORE_ROUNDING_NEVER) {
487	ignoreRounding = false;
488	} else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) {
489	ignoreRounding = positive.hasCurrencySign;
490	} else {
491	U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS);
492	ignoreRounding = true;
493	}
494
495	// Grouping settings
496	auto grouping1 = static_cast<int16_t> (positive.groupingSizes & `0xffff`);
497	auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> `16`) & `0xffff`);
498	auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> `32`) & `0xffff`);
499	if (grouping2 != -`1`) {
500	properties.groupingSize = grouping1;
501	properties.groupingUsed = true;
502	} else {
503	properties.groupingSize = -`1`;
504	properties.groupingUsed = false;
505	}
506	if (grouping3 != -`1`) {
507	properties.secondaryGroupingSize = grouping2;
508	} else {
509	properties.secondaryGroupingSize = -`1`;
510	}
511
512	// For backwards compatibility, require that the pattern emit at least one min digit.
513	int minInt, minFrac;
514	if (positive.integerTotal == `0` && positive.fractionTotal > `0`) {
515	// patterns like ".##"
516	minInt = `0`;
517	minFrac = uprv_max(`1`, positive.fractionNumerals);
518	} else if (positive.integerNumerals == `0` && positive.fractionNumerals == `0`) {
519	// patterns like "#.##"
520	minInt = `1`;
521	minFrac = `0`;
522	} else {
523	minInt = positive.integerNumerals;
524	minFrac = positive.fractionNumerals;
525	}
526
527	// Rounding settings
528	// Don't set basic rounding when there is a currency sign; defer to CurrencyUsage
529	if (positive.integerAtSigns > `0`) {
530	properties.minimumFractionDigits = -`1`;
531	properties.maximumFractionDigits = -`1`;
532	properties.roundingIncrement = `0.0`;
533	properties.minimumSignificantDigits = positive.integerAtSigns;
534	properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns;
535	} else if (!positive.rounding.isZeroish()) {
536	if (!ignoreRounding) {
537	properties.minimumFractionDigits = minFrac;
538	properties.maximumFractionDigits = positive.fractionTotal;
539	properties.roundingIncrement = positive.rounding.toDouble();
540	} else {
541	properties.minimumFractionDigits = -`1`;
542	properties.maximumFractionDigits = -`1`;
543	properties.roundingIncrement = `0.0`;
544	}
545	properties.minimumSignificantDigits = -`1`;
546	properties.maximumSignificantDigits = -`1`;
547	} else {
548	if (!ignoreRounding) {
549	properties.minimumFractionDigits = minFrac;
550	properties.maximumFractionDigits = positive.fractionTotal;
551	properties.roundingIncrement = `0.0`;
552	} else {
553	properties.minimumFractionDigits = -`1`;
554	properties.maximumFractionDigits = -`1`;
555	properties.roundingIncrement = `0.0`;
556	}
557	properties.minimumSignificantDigits = -`1`;
558	properties.maximumSignificantDigits = -`1`;
559	}
560
561	// If the pattern ends with a '.' then force the decimal point.
562	if (positive.hasDecimal && positive.fractionTotal == `0`) {
563	properties.decimalSeparatorAlwaysShown = true;
564	} else {
565	properties.decimalSeparatorAlwaysShown = false;
566	}
567
568	// Scientific notation settings
569	if (positive.exponentZeros > `0`) {
570	properties.exponentSignAlwaysShown = positive.exponentHasPlusSign;
571	properties.minimumExponentDigits = positive.exponentZeros;
572	if (positive.integerAtSigns == `0`) {
573	// patterns without '@' can define max integer digits, used for engineering notation
574	properties.minimumIntegerDigits = positive.integerNumerals;
575	properties.maximumIntegerDigits = positive.integerTotal;
576	} else {
577	// patterns with '@' cannot define max integer digits
578	properties.minimumIntegerDigits = `1`;
579	properties.maximumIntegerDigits = -`1`;
580	}
581	} else {
582	properties.exponentSignAlwaysShown = false;
583	properties.minimumExponentDigits = -`1`;
584	properties.minimumIntegerDigits = minInt;
585	properties.maximumIntegerDigits = -`1`;
586	}
587
588	// Compute the affix patterns (required for both padding and affixes)
589	UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX);
590	UnicodeString posSuffix = patternInfo.getString(`0`);
591
592	// Padding settings
593	if (positive.hasPadding) {
594	// The width of the positive prefix and suffix templates are included in the padding
595	int paddingWidth = positive.widthExceptAffixes +
596	AffixUtils::estimateLength(posPrefix, status) +
597	AffixUtils::estimateLength(posSuffix, status);
598	properties.formatWidth = paddingWidth;
599	UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING);
600	if (rawPaddingString.length() == `1`) {
601	properties.padString = rawPaddingString;
602	} else if (rawPaddingString.length() == `2`) {
603	if (rawPaddingString.charAt(`0`) == u`'\''`) {
604	properties.padString.setTo(u"'", -`1`);
605	} else {
606	properties.padString = rawPaddingString;
607	}
608	} else {
609	properties.padString = UnicodeString (rawPaddingString, `1`, rawPaddingString.length() - `2`);
610	}
611	properties.padPosition = positive.paddingLocation;
612	} else {
613	properties.formatWidth = -`1`;
614	properties.padString.setToBogus();
615	properties.padPosition.nullify();
616	}
617
618	// Set the affixes
619	// Always call the setter, even if the prefixes are empty, especially in the case of the
620	// negative prefix pattern, to prevent default values from overriding the pattern.
621	properties.positivePrefixPattern = posPrefix;
622	properties.positiveSuffixPattern = posSuffix;
623	if (patternInfo.fHasNegativeSubpattern) {
624	properties.negativePrefixPattern = patternInfo.getString(
625	AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN \| AffixPatternProvider::AFFIX_PREFIX);
626	properties.negativeSuffixPattern = patternInfo.getString(
627	AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN);
628	} else {
629	properties.negativePrefixPattern.setToBogus();
630	properties.negativeSuffixPattern.setToBogus();
631	}
632
633	// Set the magnitude multiplier
634	if (positive.hasPercentSign) {
635	properties.magnitudeMultiplier = `2`;
636	} else if (positive.hasPerMilleSign) {
637	properties.magnitudeMultiplier = `3`;
638	} else {
639	properties.magnitudeMultiplier = `0`;
640	}
641	}
642
643	///////////////////////////////////////////////////////////////////
644	/// End PatternStringParser.java; begin PatternStringUtils.java ///
645	///////////////////////////////////////////////////////////////////
646
647	// Determine whether a given roundingIncrement should be ignored for formatting
648	// based on the current maxFrac value (maximum fraction digits). For example a
649	// roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac
650	// is 2 or more. Note that roundingIncrements are rounded in significance, so
651	// a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e.
652	// it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of
653	// 0.005 is treated like 0.001 for significance). This is the reason for the
654	// initial doubling below.
655	// roundIncr must be non-zero.
656	bool PatternStringUtils::ignoreRoundingIncrement(double roundIncr, int32_t maxFrac) {
657	if (maxFrac < `0`) {
658	return false;
659	}
660	int32_t frac = `0`;
661	roundIncr *= `2.0`;
662	for (frac = `0`; frac <= maxFrac && roundIncr <= `1.0`; frac++, roundIncr *= `10.0`);
663	return (frac > maxFrac);
664	}
665
666	UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties,
667	UErrorCode& status) {
668	UnicodeString sb;
669
670	// Convenience references
671	// The uprv_min() calls prevent DoS
672	int32_t dosMax = `100`;
673	int32_t grouping1 = uprv_max(`0`, uprv_min(properties.groupingSize, dosMax));
674	int32_t grouping2 = uprv_max(`0`, uprv_min(properties.secondaryGroupingSize, dosMax));
675	bool useGrouping = properties.groupingUsed;
676	int32_t paddingWidth = uprv_min(properties.formatWidth, dosMax);
677	NullableValue<PadPosition> paddingLocation = properties.padPosition;
678	UnicodeString paddingString = properties.padString;
679	int32_t minInt = uprv_max(`0`, uprv_min(properties.minimumIntegerDigits, dosMax));
680	int32_t maxInt = uprv_min(properties.maximumIntegerDigits, dosMax);
681	int32_t minFrac = uprv_max(`0`, uprv_min(properties.minimumFractionDigits, dosMax));
682	int32_t maxFrac = uprv_min(properties.maximumFractionDigits, dosMax);
683	int32_t minSig = uprv_min(properties.minimumSignificantDigits, dosMax);
684	int32_t maxSig = uprv_min(properties.maximumSignificantDigits, dosMax);
685	bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown;
686	int32_t exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax);
687	bool exponentShowPlusSign = properties.exponentSignAlwaysShown;
688
689	PropertiesAffixPatternProvider affixes(properties, status);
690
691	// Prefixes
692	sb.append(affixes.getString(AffixPatternProvider::AFFIX_POS_PREFIX));
693	int32_t afterPrefixPos = sb.length();
694
695	// Figure out the grouping sizes.
696	if (!useGrouping) {
697	grouping1 = `0`;
698	grouping2 = `0`;
699	} else if (grouping1 == grouping2) {
700	grouping1 = `0`;
701	}
702	int32_t groupingLength = grouping1 + grouping2 + `1`;
703
704	// Figure out the digits we need to put in the pattern.
705	double roundingInterval = properties.roundingIncrement;
706	UnicodeString digitsString;
707	int32_t digitsStringScale = `0`;
708	if (maxSig != uprv_min(dosMax, -`1`)) {
709	// Significant Digits.
710	while (digitsString.length() < minSig) {
711	digitsString.append(u`'@'`);
712	}
713	while (digitsString.length() < maxSig) {
714	digitsString.append(u`'#'`);
715	}
716	} else if (roundingInterval != `0.0` && !ignoreRoundingIncrement(roundingInterval,maxFrac)) {
717	// Rounding Interval.
718	digitsStringScale = -roundingutils::doubleFractionLength(roundingInterval, nullptr);
719	// TODO: Check for DoS here?
720	DecimalQuantity incrementQuantity;
721	incrementQuantity.setToDouble(roundingInterval);
722	incrementQuantity.adjustMagnitude(-digitsStringScale);
723	incrementQuantity.roundToMagnitude(`0`, kDefaultMode, status);
724	UnicodeString str = incrementQuantity.toPlainString();
725	if (str.charAt(`0`) == u`'-'`) {
726	// TODO: Unsupported operation exception or fail silently?
727	digitsString.append(str, `1`, str.length() - `1`);
728	} else {
729	digitsString.append(str);
730	}
731	}
732	while (digitsString.length() + digitsStringScale < minInt) {
733	digitsString.insert(`0`, u`'0'`);
734	}
735	while (-digitsStringScale < minFrac) {
736	digitsString.append(u`'0'`);
737	digitsStringScale--;
738	}
739
740	// Write the digits to the string builder
741	int32_t m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale);
742	m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - `1` : m0 - `1`;
743	int32_t mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale;
744	for (int32_t magnitude = m0; magnitude >= mN; magnitude--) {
745	int32_t di = digitsString.length() + digitsStringScale - magnitude - `1`;
746	if (di < `0` \|\| di >= digitsString.length()) {
747	sb.append(u`'#'`);
748	} else {
749	sb.append(digitsString.charAt(di));
750	}
751	// Decimal separator
752	if (magnitude == `0` && (alwaysShowDecimal \|\| mN < `0`)) {
753	sb.append(u`'.'`);
754	}
755	if (!useGrouping) {
756	continue;
757	}
758	// Least-significant grouping separator
759	if (magnitude > `0` && magnitude == grouping1) {
760	sb.append(u`','`);
761	}
762	// All other grouping separators
763	if (magnitude > grouping1 && grouping2 > `0` && (magnitude - grouping1) % grouping2 == `0`) {
764	sb.append(u`','`);
765	}
766	}
767
768	// Exponential notation
769	if (exponentDigits != uprv_min(dosMax, -`1`)) {
770	sb.append(u`'E'`);
771	if (exponentShowPlusSign) {
772	sb.append(u`'+'`);
773	}
774	for (int32_t i = `0`; i < exponentDigits; i++) {
775	sb.append(u`'0'`);
776	}
777	}
778
779	// Suffixes
780	int32_t beforeSuffixPos = sb.length();
781	sb.append(affixes.getString(AffixPatternProvider::AFFIX_POS_SUFFIX));
782
783	// Resolve Padding
784	if (paddingWidth > `0` && !paddingLocation.isNull()) {
785	while (paddingWidth - sb.length() > `0`) {
786	sb.insert(afterPrefixPos, u`'#'`);
787	beforeSuffixPos++;
788	}
789	int32_t addedLength;
790	switch (paddingLocation.get(status)) {
791	case PadPosition::UNUM_PAD_BEFORE_PREFIX:
792	addedLength = escapePaddingString(paddingString, sb, `0`, status);
793	sb.insert(`0`, u`'*'`);
794	afterPrefixPos += addedLength + `1`;
795	beforeSuffixPos += addedLength + `1`;
796	break;
797	case PadPosition::UNUM_PAD_AFTER_PREFIX:
798	addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status);
799	sb.insert(afterPrefixPos, u`'*'`);
800	afterPrefixPos += addedLength + `1`;
801	beforeSuffixPos += addedLength + `1`;
802	break;
803	case PadPosition::UNUM_PAD_BEFORE_SUFFIX:
804	escapePaddingString(paddingString, sb, beforeSuffixPos, status);
805	sb.insert(beforeSuffixPos, u`'*'`);
806	break;
807	case PadPosition::UNUM_PAD_AFTER_SUFFIX:
808	sb.append(u`'*'`);
809	escapePaddingString(paddingString, sb, sb.length(), status);
810	break;
811	}
812	if (U_FAILURE(status)) { return sb; }
813	}
814
815	// Negative affixes
816	// Ignore if the negative prefix pattern is "-" and the negative suffix is empty
817	if (affixes.hasNegativeSubpattern()) {
818	sb.append(u`';'`);
819	sb.append(affixes.getString(AffixPatternProvider::AFFIX_NEG_PREFIX));
820	// Copy the positive digit format into the negative.
821	// This is optional; the pattern is the same as if '#' were appended here instead.
822	// NOTE: It is not safe to append the UnicodeString to itself, so we need to copy.
823	// See http://bugs.icu-project.org/trac/ticket/13707
824	UnicodeString copy(sb);
825	sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos);
826	sb.append(affixes.getString(AffixPatternProvider::AFFIX_NEG_SUFFIX));
827	}
828
829	return sb;
830	}
831
832	int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
833	UErrorCode& status) {
834	(void) status;
835	if (input.length() == `0`) {
836	input.setTo(kFallbackPaddingString, -`1`);
837	}
838	int startLength = output.length();
839	if (input.length() == `1`) {
840	if (input.compare(u"'", -`1`) == `0`) {
841	output.insert(startIndex, u"''", -`1`);
842	} else {
843	output.insert(startIndex, input);
844	}
845	} else {
846	output.insert(startIndex, u`'\''`);
847	int offset = `1`;
848	for (int i = `0`; i < input.length(); i++) {
849	// it's okay to deal in chars here because the quote mark is the only interesting thing.
850	char16_t ch = input.charAt(i);
851	if (ch == u`'\''`) {
852	output.insert(startIndex + offset, u"''", -`1`);
853	offset += `2`;
854	} else {
855	output.insert(startIndex + offset, ch);
856	offset += `1`;
857	}
858	}
859	output.insert(startIndex + offset, u`'\''`);
860	}
861	return output.length() - startLength;
862	}
863
864	UnicodeString
865	PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols,
866	bool toLocalized, UErrorCode& status) {
867	// Construct a table of strings to be converted between localized and standard.
868	static constexpr int32_t LEN = `21`;
869	UnicodeString table[LEN][`2`];
870	int standIdx = toLocalized ? `0` : `1`;
871	int localIdx = toLocalized ? `1` : `0`;
872	table[`0`][standIdx] = u"%";
873	table[`0`][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);
874	table[`1`][standIdx] = u"‰";
875	table[`1`][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);
876	table[`2`][standIdx] = u".";
877	table[`2`][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
878	table[`3`][standIdx] = u",";
879	table[`3`][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
880	table[`4`][standIdx] = u"-";
881	table[`4`][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
882	table[`5`][standIdx] = u"+";
883	table[`5`][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
884	table[`6`][standIdx] = u";";
885	table[`6`][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol);
886	table[`7`][standIdx] = u"@";
887	table[`7`][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol);
888	table[`8`][standIdx] = u"E";
889	table[`8`][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol);
890	table[`9`][standIdx] = u"*";
891	table[`9`][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol);
892	table[`10`][standIdx] = u"#";
893	table[`10`][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol);
894	for (int i = `0`; i < `10`; i++) {
895	table[`11` + i][standIdx] = u`'0'` + i;
896	table[`11` + i][localIdx] = symbols.getConstDigitSymbol(i);
897	}
898
899	// Special case: quotes are NOT allowed to be in any localIdx strings.
900	// Substitute them with '’' instead.
901	for (int32_t i = `0`; i < LEN; i++) {
902	table[i][localIdx].findAndReplace(u`'\''`, u`'’'`);
903	}
904
905	// Iterate through the string and convert.
906	// State table:
907	// 0 => base state
908	// 1 => first char inside a quoted sequence in input and output string
909	// 2 => inside a quoted sequence in input and output string
910	// 3 => first char after a close quote in input string;
911	// close quote still needs to be written to output string
912	// 4 => base state in input string; inside quoted sequence in output string
913	// 5 => first char inside a quoted sequence in input string;
914	// inside quoted sequence in output string
915	UnicodeString result;
916	int state = `0`;
917	for (int offset = `0`; offset < input.length(); offset++) {
918	UChar ch = input.charAt(offset);
919
920	// Handle a quote character (state shift)
921	if (ch == u`'\''`) {
922	if (state == `0`) {
923	result.append(u`'\''`);
924	state = `1`;
925	continue;
926	} else if (state == `1`) {
927	result.append(u`'\''`);
928	state = `0`;
929	continue;
930	} else if (state == `2`) {
931	state = `3`;
932	continue;
933	} else if (state == `3`) {
934	result.append(u`'\''`);
935	result.append(u`'\''`);
936	state = `1`;
937	continue;
938	} else if (state == `4`) {
939	state = `5`;
940	continue;
941	} else {
942	U_ASSERT(state == `5`);
943	result.append(u`'\''`);
944	result.append(u`'\''`);
945	state = `4`;
946	continue;
947	}
948	}
949
950	if (state == `0` \|\| state == `3` \|\| state == `4`) {
951	for (auto& pair : table) {
952	// Perform a greedy match on this symbol string
953	UnicodeString temp = input.tempSubString(offset, pair[`0`].length());
954	if (temp == pair[`0`]) {
955	// Skip ahead past this region for the next iteration
956	offset += pair[`0`].length() - `1`;
957	if (state == `3` \|\| state == `4`) {
958	result.append(u`'\''`);
959	state = `0`;
960	}
961	result.append(pair[`1`]);
962	goto continue_outer;
963	}
964	}
965	// No replacement found. Check if a special quote is necessary
966	for (auto& pair : table) {
967	UnicodeString temp = input.tempSubString(offset, pair[`1`].length());
968	if (temp == pair[`1`]) {
969	if (state == `0`) {
970	result.append(u`'\''`);
971	state = `4`;
972	}
973	result.append(ch);
974	goto continue_outer;
975	}
976	}
977	// Still nothing. Copy the char verbatim. (Add a close quote if necessary)
978	if (state == `3` \|\| state == `4`) {
979	result.append(u`'\''`);
980	state = `0`;
981	}
982	result.append(ch);
983	} else {
984	U_ASSERT(state == `1` \|\| state == `2` \|\| state == `5`);
985	result.append(ch);
986	state = `2`;
987	}
988	continue_outer:;
989	}
990	// Resolve final quotes
991	if (state == `3` \|\| state == `4`) {
992	result.append(u`'\''`);
993	state = `0`;
994	}
995	if (state != `0`) {
996	// Malformed localized pattern: unterminated quote
997	status = U_PATTERN_SYNTAX_ERROR;
998	}
999	return result;
1000	}
1001
1002	void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
1003	PatternSignType patternSignType,
1004	StandardPlural::Form plural,
1005	bool perMilleReplacesPercent, UnicodeString& output) {
1006
1007	// Should the output render '+' where '-' would normally appear in the pattern?
1008	bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN)
1009	&& !patternInfo.positiveHasPlusSign();
1010
1011	// Should we use the affix from the negative subpattern?
1012	// (If not, we will use the positive subpattern.)
1013	bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern()
1014	&& (patternSignType == PATTERN_SIGN_TYPE_NEG
1015	\|\| (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign));
1016
1017	// Resolve the flags for the affix pattern.
1018	int flags = `0`;
1019	if (useNegativeAffixPattern) {
1020	flags \|= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
1021	}
1022	if (isPrefix) {
1023	flags \|= AffixPatternProvider::AFFIX_PREFIX;
1024	}
1025	if (plural != StandardPlural::Form::COUNT) {
1026	U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
1027	flags \|= plural;
1028	}
1029
1030	// Should we prepend a sign to the pattern?
1031	bool prependSign;
1032	if (!isPrefix \|\| useNegativeAffixPattern) {
1033	prependSign = false;
1034	} else if (patternSignType == PATTERN_SIGN_TYPE_NEG) {
1035	prependSign = true;
1036	} else {
1037	prependSign = plusReplacesMinusSign;
1038	}
1039
1040	// Compute the length of the affix pattern.
1041	int length = patternInfo.length(flags) + (prependSign ? `1` : `0`);
1042
1043	// Finally, set the result into the StringBuilder.
1044	output.remove();
1045	for (int index = `0`; index < length; index++) {
1046	char16_t candidate;
1047	if (prependSign && index == `0`) {
1048	candidate = u`'-'`;
1049	} else if (prependSign) {
1050	candidate = patternInfo.charAt(flags, index - `1`);
1051	} else {
1052	candidate = patternInfo.charAt(flags, index);
1053	}
1054	if (plusReplacesMinusSign && candidate == u`'-'`) {
1055	candidate = u`'+'`;
1056	}
1057	if (perMilleReplacesPercent && candidate == u`'%'`) {
1058	candidate = u`'‰'`;
1059	}
1060	output.append(candidate);
1061	}
1062	}
1063
1064	PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum) {
1065	switch (signDisplay) {
1066	case UNUM_SIGN_AUTO:
1067	case UNUM_SIGN_ACCOUNTING:
1068	switch (signum) {
1069	case SIGNUM_NEG:
1070	case SIGNUM_NEG_ZERO:
1071	return PATTERN_SIGN_TYPE_NEG;
1072	case SIGNUM_POS_ZERO:
1073	case SIGNUM_POS:
1074	return PATTERN_SIGN_TYPE_POS;
1075	default:
1076	break;
1077	}
1078	break;
1079
1080	case UNUM_SIGN_ALWAYS:
1081	case UNUM_SIGN_ACCOUNTING_ALWAYS:
1082	switch (signum) {
1083	case SIGNUM_NEG:
1084	case SIGNUM_NEG_ZERO:
1085	return PATTERN_SIGN_TYPE_NEG;
1086	case SIGNUM_POS_ZERO:
1087	case SIGNUM_POS:
1088	return PATTERN_SIGN_TYPE_POS_SIGN;
1089	default:
1090	break;
1091	}
1092	break;
1093
1094	case UNUM_SIGN_EXCEPT_ZERO:
1095	case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO:
1096	switch (signum) {
1097	case SIGNUM_NEG:
1098	return PATTERN_SIGN_TYPE_NEG;
1099	case SIGNUM_NEG_ZERO:
1100	case SIGNUM_POS_ZERO:
1101	return PATTERN_SIGN_TYPE_POS;
1102	case SIGNUM_POS:
1103	return PATTERN_SIGN_TYPE_POS_SIGN;
1104	default:
1105	break;
1106	}
1107	break;
1108
1109	case UNUM_SIGN_NEVER:
1110	return PATTERN_SIGN_TYPE_POS;
1111
1112	default:
1113	break;
1114	}
1115
1116	UPRV_UNREACHABLE;
1117	return PATTERN_SIGN_TYPE_POS;
1118	}
1119
1120	#endif /* #if !UCONFIG_NO_FORMATTING */
1121

Browse the source code of ClickHouse/contrib/icu/icu4c/source/i18n/number_patternstring.cpp