1// © 2018 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7
8// Allow implicit conversion from char16_t* to UnicodeString for this file:
9// Helpful in toString methods and elsewhere.
10#define UNISTR_FROM_STRING_EXPLICIT
11
12#include "numparse_types.h"
13#include "numparse_scientific.h"
14#include "static_unicode_sets.h"
15#include "string_segment.h"
16
17using namespace icu;
18using namespace icu::numparse;
19using namespace icu::numparse::impl;
20
21
22namespace {
23
24inline const UnicodeSet& minusSignSet() {
25 return *unisets::get(unisets::MINUS_SIGN);
26}
27
28inline const UnicodeSet& plusSignSet() {
29 return *unisets::get(unisets::PLUS_SIGN);
30}
31
32} // namespace
33
34
35ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
36 : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
37 fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
38 fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
39
40 const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
41 if (minusSignSet().contains(minusSign)) {
42 fCustomMinusSign.setToBogus();
43 } else {
44 fCustomMinusSign = minusSign;
45 }
46
47 const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
48 if (plusSignSet().contains(plusSign)) {
49 fCustomPlusSign.setToBogus();
50 } else {
51 fCustomPlusSign = plusSign;
52 }
53}
54
55bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
56 // Only accept scientific notation after the mantissa.
57 if (!result.seenNumber()) {
58 return false;
59 }
60
61 // Only accept one exponent per string.
62 if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
63 return false;
64 }
65
66 // First match the scientific separator, and then match another number after it.
67 // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
68 int32_t initialOffset = segment.getOffset();
69 int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
70 if (overlap == fExponentSeparatorString.length()) {
71 // Full exponent separator match.
72
73 // First attempt to get a code point, returning true if we can't get one.
74 if (segment.length() == overlap) {
75 return true;
76 }
77 segment.adjustOffset(overlap);
78
79 // Allow ignorables before the sign.
80 // Note: call site is guarded by the segment.length() check above.
81 // Note: the ignorables matcher should not touch the result.
82 fIgnorablesMatcher.match(segment, result, status);
83 if (segment.length() == 0) {
84 segment.setOffset(initialOffset);
85 return true;
86 }
87
88 // Allow a sign, and then try to match digits.
89 int8_t exponentSign = 1;
90 if (segment.startsWith(minusSignSet())) {
91 exponentSign = -1;
92 segment.adjustOffsetByCodePoint();
93 } else if (segment.startsWith(plusSignSet())) {
94 segment.adjustOffsetByCodePoint();
95 } else if (segment.startsWith(fCustomMinusSign)) {
96 overlap = segment.getCommonPrefixLength(fCustomMinusSign);
97 if (overlap != fCustomMinusSign.length()) {
98 // Partial custom sign match
99 segment.setOffset(initialOffset);
100 return true;
101 }
102 exponentSign = -1;
103 segment.adjustOffset(overlap);
104 } else if (segment.startsWith(fCustomPlusSign)) {
105 overlap = segment.getCommonPrefixLength(fCustomPlusSign);
106 if (overlap != fCustomPlusSign.length()) {
107 // Partial custom sign match
108 segment.setOffset(initialOffset);
109 return true;
110 }
111 segment.adjustOffset(overlap);
112 }
113
114 // Return true if the segment is empty.
115 if (segment.length() == 0) {
116 segment.setOffset(initialOffset);
117 return true;
118 }
119
120 // Allow ignorables after the sign.
121 // Note: call site is guarded by the segment.length() check above.
122 // Note: the ignorables matcher should not touch the result.
123 fIgnorablesMatcher.match(segment, result, status);
124 if (segment.length() == 0) {
125 segment.setOffset(initialOffset);
126 return true;
127 }
128
129 // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
130 bool wasBogus = result.quantity.bogus;
131 result.quantity.bogus = false;
132 int digitsOffset = segment.getOffset();
133 bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
134 result.quantity.bogus = wasBogus;
135
136 if (segment.getOffset() != digitsOffset) {
137 // At least one exponent digit was matched.
138 result.flags |= FLAG_HAS_EXPONENT;
139 } else {
140 // No exponent digits were matched
141 segment.setOffset(initialOffset);
142 }
143 return digitsReturnValue;
144
145 } else if (overlap == segment.length()) {
146 // Partial exponent separator match
147 return true;
148 }
149
150 // No match
151 return false;
152}
153
154bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
155 return segment.startsWith(fExponentSeparatorString);
156}
157
158UnicodeString ScientificMatcher::toString() const {
159 return u"<Scientific>";
160}
161
162
163#endif /* #if !UCONFIG_NO_FORMATTING */
164