numparse_scientific.cpp source code [ClickHouse/contrib/icu/icu4c/source/i18n/numparse_scientific.cpp]

1	// © 2018 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3
4	#include "unicode/utypes.h"
5
6	#if !UCONFIG_NO_FORMATTING
7
8	// Allow implicit conversion from char16_t to UnicodeString for this file:*
9	// Helpful in toString methods and elsewhere.
10	#define UNISTR_FROM_STRING_EXPLICIT
11
12	#include "numparse_types.h"
13	#include "numparse_scientific.h"
14	#include "static_unicode_sets.h"
15	#include "string_segment.h"
16
17	using namespace icu;
18	using namespace icu::numparse;
19	using namespace icu::numparse::impl;
20
21
22	namespace {
23
24	inline const UnicodeSet& minusSignSet() {
25	return *unisets::get(unisets::MINUS_SIGN);
26	}
27
28	inline const UnicodeSet& plusSignSet() {
29	return *unisets::get(unisets::PLUS_SIGN);
30	}
31
32	} // namespace
33
34
35	ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
36	: fExponentSeparatorString (dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
37	fExponentMatcher (dfs, grouper, PARSE_FLAG_INTEGER_ONLY \| PARSE_FLAG_GROUPING_DISABLED),
38	fIgnorablesMatcher (PARSE_FLAG_STRICT_IGNORABLES) {
39
40	const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
41	if (minusSignSet().contains(minusSign)) {
42	fCustomMinusSign.setToBogus();
43	} else {
44	fCustomMinusSign = minusSign;
45	}
46
47	const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
48	if (plusSignSet().contains(plusSign)) {
49	fCustomPlusSign.setToBogus();
50	} else {
51	fCustomPlusSign = plusSign;
52	}
53	}
54
55	bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
56	// Only accept scientific notation after the mantissa.
57	if (!result.seenNumber()) {
58	return false;
59	}
60
61	// Only accept one exponent per string.
62	if (`0` != (result.flags & FLAG_HAS_EXPONENT)) {
63	return false;
64	}
65
66	// First match the scientific separator, and then match another number after it.
67	// NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
68	int32_t initialOffset = segment.getOffset();
69	int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
70	if (overlap == fExponentSeparatorString.length()) {
71	// Full exponent separator match.
72
73	// First attempt to get a code point, returning true if we can't get one.
74	if (segment.length() == overlap) {
75	return true;
76	}
77	segment.adjustOffset(overlap);
78
79	// Allow ignorables before the sign.
80	// Note: call site is guarded by the segment.length() check above.
81	// Note: the ignorables matcher should not touch the result.
82	fIgnorablesMatcher.match(segment, result, status);
83	if (segment.length() == `0`) {
84	segment.setOffset(initialOffset);
85	return true;
86	}
87
88	// Allow a sign, and then try to match digits.
89	int8_t exponentSign = `1`;
90	if (segment.startsWith(minusSignSet())) {
91	exponentSign = -`1`;
92	segment.adjustOffsetByCodePoint();
93	} else if (segment.startsWith(plusSignSet())) {
94	segment.adjustOffsetByCodePoint();
95	} else if (segment.startsWith(fCustomMinusSign)) {
96	overlap = segment.getCommonPrefixLength(fCustomMinusSign);
97	if (overlap != fCustomMinusSign.length()) {
98	// Partial custom sign match
99	segment.setOffset(initialOffset);
100	return true;
101	}
102	exponentSign = -`1`;
103	segment.adjustOffset(overlap);
104	} else if (segment.startsWith(fCustomPlusSign)) {
105	overlap = segment.getCommonPrefixLength(fCustomPlusSign);
106	if (overlap != fCustomPlusSign.length()) {
107	// Partial custom sign match
108	segment.setOffset(initialOffset);
109	return true;
110	}
111	segment.adjustOffset(overlap);
112	}
113
114	// Return true if the segment is empty.
115	if (segment.length() == `0`) {
116	segment.setOffset(initialOffset);
117	return true;
118	}
119
120	// Allow ignorables after the sign.
121	// Note: call site is guarded by the segment.length() check above.
122	// Note: the ignorables matcher should not touch the result.
123	fIgnorablesMatcher.match(segment, result, status);
124	if (segment.length() == `0`) {
125	segment.setOffset(initialOffset);
126	return true;
127	}
128
129	// We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
130	bool wasBogus = result.quantity.bogus;
131	result.quantity.bogus = false;
132	int digitsOffset = segment.getOffset();
133	bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
134	result.quantity.bogus = wasBogus;
135
136	if (segment.getOffset() != digitsOffset) {
137	// At least one exponent digit was matched.
138	result.flags \|= FLAG_HAS_EXPONENT;
139	} else {
140	// No exponent digits were matched
141	segment.setOffset(initialOffset);
142	}
143	return digitsReturnValue;
144
145	} else if (overlap == segment.length()) {
146	// Partial exponent separator match
147	return true;
148	}
149
150	// No match
151	return false;
152	}
153
154	bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
155	return segment.startsWith(fExponentSeparatorString);
156	}
157
158	UnicodeString ScientificMatcher::toString() const {
159	return u"<Scientific>";
160	}
161
162
163	#endif /* #if !UCONFIG_NO_FORMATTING */
164

Browse the source code of ClickHouse/contrib/icu/icu4c/source/i18n/numparse_scientific.cpp