1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * Copyright (C) 1997-2013, International Business Machines Corporation and * |
6 | * others. All Rights Reserved. * |
7 | ******************************************************************************* |
8 | * |
9 | * File CHOICFMT.CPP |
10 | * |
11 | * Modification History: |
12 | * |
13 | * Date Name Description |
14 | * 02/19/97 aliu Converted from java. |
15 | * 03/20/97 helena Finished first cut of implementation and got rid |
16 | * of nextDouble/previousDouble and replaced with |
17 | * boolean array. |
18 | * 4/10/97 aliu Clean up. Modified to work on AIX. |
19 | * 06/04/97 helena Fixed applyPattern(), toPattern() and not to include |
20 | * wchar.h. |
21 | * 07/09/97 helena Made ParsePosition into a class. |
22 | * 08/06/97 nos removed overloaded constructor, fixed 'format(array)' |
23 | * 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags) |
24 | * 02/22/99 stephen Removed character literals for EBCDIC safety |
25 | ******************************************************************************** |
26 | */ |
27 | |
28 | #include "unicode/utypes.h" |
29 | |
30 | #if !UCONFIG_NO_FORMATTING |
31 | |
32 | #include "unicode/choicfmt.h" |
33 | #include "unicode/numfmt.h" |
34 | #include "unicode/locid.h" |
35 | #include "cpputils.h" |
36 | #include "cstring.h" |
37 | #include "messageimpl.h" |
38 | #include "putilimp.h" |
39 | #include "uassert.h" |
40 | #include <stdio.h> |
41 | #include <float.h> |
42 | |
43 | // ***************************************************************************** |
44 | // class ChoiceFormat |
45 | // ***************************************************************************** |
46 | |
47 | U_NAMESPACE_BEGIN |
48 | |
49 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat) |
50 | |
51 | // Special characters used by ChoiceFormat. There are two characters |
52 | // used interchangeably to indicate <=. Either is parsed, but only |
53 | // LESS_EQUAL is generated by toPattern(). |
54 | #define SINGLE_QUOTE ((UChar)0x0027) /*'*/ |
55 | #define LESS_THAN ((UChar)0x003C) /*<*/ |
56 | #define LESS_EQUAL ((UChar)0x0023) /*#*/ |
57 | #define LESS_EQUAL2 ((UChar)0x2264) |
58 | #define VERTICAL_BAR ((UChar)0x007C) /*|*/ |
59 | #define MINUS ((UChar)0x002D) /*-*/ |
60 | |
61 | static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/ |
62 | static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/ |
63 | |
64 | #ifdef INFINITY |
65 | #undef INFINITY |
66 | #endif |
67 | #define INFINITY ((UChar)0x221E) |
68 | |
69 | //static const UChar gPositiveInfinity[] = {INFINITY, 0}; |
70 | //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0}; |
71 | #define POSITIVE_INF_STRLEN 1 |
72 | #define NEGATIVE_INF_STRLEN 2 |
73 | |
74 | // ------------------------------------- |
75 | // Creates a ChoiceFormat instance based on the pattern. |
76 | |
77 | ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, |
78 | UErrorCode& status) |
79 | : constructorErrorCode(status), |
80 | msgPattern(status) |
81 | { |
82 | applyPattern(newPattern, status); |
83 | } |
84 | |
85 | // ------------------------------------- |
86 | // Creates a ChoiceFormat instance with the limit array and |
87 | // format strings for each limit. |
88 | |
89 | ChoiceFormat::ChoiceFormat(const double* limits, |
90 | const UnicodeString* formats, |
91 | int32_t cnt ) |
92 | : constructorErrorCode(U_ZERO_ERROR), |
93 | msgPattern(constructorErrorCode) |
94 | { |
95 | setChoices(limits, NULL, formats, cnt, constructorErrorCode); |
96 | } |
97 | |
98 | // ------------------------------------- |
99 | |
100 | ChoiceFormat::ChoiceFormat(const double* limits, |
101 | const UBool* closures, |
102 | const UnicodeString* formats, |
103 | int32_t cnt ) |
104 | : constructorErrorCode(U_ZERO_ERROR), |
105 | msgPattern(constructorErrorCode) |
106 | { |
107 | setChoices(limits, closures, formats, cnt, constructorErrorCode); |
108 | } |
109 | |
110 | // ------------------------------------- |
111 | // copy constructor |
112 | |
113 | ChoiceFormat::ChoiceFormat(const ChoiceFormat& that) |
114 | : NumberFormat(that), |
115 | constructorErrorCode(that.constructorErrorCode), |
116 | msgPattern(that.msgPattern) |
117 | { |
118 | } |
119 | |
120 | // ------------------------------------- |
121 | // Private constructor that creates a |
122 | // ChoiceFormat instance based on the |
123 | // pattern and populates UParseError |
124 | |
125 | ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, |
126 | UParseError& parseError, |
127 | UErrorCode& status) |
128 | : constructorErrorCode(status), |
129 | msgPattern(status) |
130 | { |
131 | applyPattern(newPattern,parseError, status); |
132 | } |
133 | // ------------------------------------- |
134 | |
135 | UBool |
136 | ChoiceFormat::operator==(const Format& that) const |
137 | { |
138 | if (this == &that) return TRUE; |
139 | if (!NumberFormat::operator==(that)) return FALSE; |
140 | ChoiceFormat& thatAlias = (ChoiceFormat&)that; |
141 | return msgPattern == thatAlias.msgPattern; |
142 | } |
143 | |
144 | // ------------------------------------- |
145 | // copy constructor |
146 | |
147 | const ChoiceFormat& |
148 | ChoiceFormat::operator=(const ChoiceFormat& that) |
149 | { |
150 | if (this != &that) { |
151 | NumberFormat::operator=(that); |
152 | constructorErrorCode = that.constructorErrorCode; |
153 | msgPattern = that.msgPattern; |
154 | } |
155 | return *this; |
156 | } |
157 | |
158 | // ------------------------------------- |
159 | |
160 | ChoiceFormat::~ChoiceFormat() |
161 | { |
162 | } |
163 | |
164 | // ------------------------------------- |
165 | |
166 | /** |
167 | * Convert a double value to a string without the overhead of NumberFormat. |
168 | */ |
169 | UnicodeString& |
170 | ChoiceFormat::dtos(double value, |
171 | UnicodeString& string) |
172 | { |
173 | /* Buffer to contain the digits and any extra formatting stuff. */ |
174 | char temp[DBL_DIG + 16]; |
175 | char *itrPtr = temp; |
176 | char *expPtr; |
177 | |
178 | sprintf(temp, "%.*g" , DBL_DIG, value); |
179 | |
180 | /* Find and convert the decimal point. |
181 | Using setlocale on some machines will cause sprintf to use a comma for certain locales. |
182 | */ |
183 | while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) { |
184 | itrPtr++; |
185 | } |
186 | if (*itrPtr != 0 && *itrPtr != 'e') { |
187 | /* We reached something that looks like a decimal point. |
188 | In case someone used setlocale(), which changes the decimal point. */ |
189 | *itrPtr = '.'; |
190 | itrPtr++; |
191 | } |
192 | /* Search for the exponent */ |
193 | while (*itrPtr && *itrPtr != 'e') { |
194 | itrPtr++; |
195 | } |
196 | if (*itrPtr == 'e') { |
197 | itrPtr++; |
198 | /* Verify the exponent sign */ |
199 | if (*itrPtr == '+' || *itrPtr == '-') { |
200 | itrPtr++; |
201 | } |
202 | /* Remove leading zeros. You will see this on Windows machines. */ |
203 | expPtr = itrPtr; |
204 | while (*itrPtr == '0') { |
205 | itrPtr++; |
206 | } |
207 | if (*itrPtr && expPtr != itrPtr) { |
208 | /* Shift the exponent without zeros. */ |
209 | while (*itrPtr) { |
210 | *(expPtr++) = *(itrPtr++); |
211 | } |
212 | // NULL terminate |
213 | *expPtr = 0; |
214 | } |
215 | } |
216 | |
217 | string = UnicodeString(temp, -1, US_INV); /* invariant codepage */ |
218 | return string; |
219 | } |
220 | |
221 | // ------------------------------------- |
222 | // calls the overloaded applyPattern method. |
223 | |
224 | void |
225 | ChoiceFormat::applyPattern(const UnicodeString& pattern, |
226 | UErrorCode& status) |
227 | { |
228 | msgPattern.parseChoiceStyle(pattern, NULL, status); |
229 | constructorErrorCode = status; |
230 | } |
231 | |
232 | // ------------------------------------- |
233 | // Applies the pattern to this ChoiceFormat instance. |
234 | |
235 | void |
236 | ChoiceFormat::applyPattern(const UnicodeString& pattern, |
237 | UParseError& parseError, |
238 | UErrorCode& status) |
239 | { |
240 | msgPattern.parseChoiceStyle(pattern, &parseError, status); |
241 | constructorErrorCode = status; |
242 | } |
243 | // ------------------------------------- |
244 | // Returns the input pattern string. |
245 | |
246 | UnicodeString& |
247 | ChoiceFormat::toPattern(UnicodeString& result) const |
248 | { |
249 | return result = msgPattern.getPatternString(); |
250 | } |
251 | |
252 | // ------------------------------------- |
253 | // Sets the limit and format arrays. |
254 | void |
255 | ChoiceFormat::setChoices( const double* limits, |
256 | const UnicodeString* formats, |
257 | int32_t cnt ) |
258 | { |
259 | UErrorCode errorCode = U_ZERO_ERROR; |
260 | setChoices(limits, NULL, formats, cnt, errorCode); |
261 | } |
262 | |
263 | // ------------------------------------- |
264 | // Sets the limit and format arrays. |
265 | void |
266 | ChoiceFormat::setChoices( const double* limits, |
267 | const UBool* closures, |
268 | const UnicodeString* formats, |
269 | int32_t cnt ) |
270 | { |
271 | UErrorCode errorCode = U_ZERO_ERROR; |
272 | setChoices(limits, closures, formats, cnt, errorCode); |
273 | } |
274 | |
275 | void |
276 | ChoiceFormat::setChoices(const double* limits, |
277 | const UBool* closures, |
278 | const UnicodeString* formats, |
279 | int32_t count, |
280 | UErrorCode &errorCode) { |
281 | if (U_FAILURE(errorCode)) { |
282 | return; |
283 | } |
284 | if (limits == NULL || formats == NULL) { |
285 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
286 | return; |
287 | } |
288 | // Reconstruct the original input pattern. |
289 | // Modified version of the pre-ICU 4.8 toPattern() implementation. |
290 | UnicodeString result; |
291 | for (int32_t i = 0; i < count; ++i) { |
292 | if (i != 0) { |
293 | result += VERTICAL_BAR; |
294 | } |
295 | UnicodeString buf; |
296 | if (uprv_isPositiveInfinity(limits[i])) { |
297 | result += INFINITY; |
298 | } else if (uprv_isNegativeInfinity(limits[i])) { |
299 | result += MINUS; |
300 | result += INFINITY; |
301 | } else { |
302 | result += dtos(limits[i], buf); |
303 | } |
304 | if (closures != NULL && closures[i]) { |
305 | result += LESS_THAN; |
306 | } else { |
307 | result += LESS_EQUAL; |
308 | } |
309 | // Append formats[i], using quotes if there are special |
310 | // characters. Single quotes themselves must be escaped in |
311 | // either case. |
312 | const UnicodeString& text = formats[i]; |
313 | int32_t textLength = text.length(); |
314 | int32_t nestingLevel = 0; |
315 | for (int32_t j = 0; j < textLength; ++j) { |
316 | UChar c = text[j]; |
317 | if (c == SINGLE_QUOTE && nestingLevel == 0) { |
318 | // Double each top-level apostrophe. |
319 | result.append(c); |
320 | } else if (c == VERTICAL_BAR && nestingLevel == 0) { |
321 | // Surround each pipe symbol with apostrophes for quoting. |
322 | // If the next character is an apostrophe, then that will be doubled, |
323 | // and although the parser will see the apostrophe pairs beginning |
324 | // and ending one character earlier than our doubling, the result |
325 | // is as desired. |
326 | // | -> '|' |
327 | // |' -> '|''' |
328 | // |'' -> '|''''' etc. |
329 | result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE); |
330 | continue; // Skip the append(c) at the end of the loop body. |
331 | } else if (c == LEFT_CURLY_BRACE) { |
332 | ++nestingLevel; |
333 | } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) { |
334 | --nestingLevel; |
335 | } |
336 | result.append(c); |
337 | } |
338 | } |
339 | // Apply the reconstructed pattern. |
340 | applyPattern(result, errorCode); |
341 | } |
342 | |
343 | // ------------------------------------- |
344 | // Gets the limit array. |
345 | |
346 | const double* |
347 | ChoiceFormat::getLimits(int32_t& cnt) const |
348 | { |
349 | cnt = 0; |
350 | return NULL; |
351 | } |
352 | |
353 | // ------------------------------------- |
354 | // Gets the closures array. |
355 | |
356 | const UBool* |
357 | ChoiceFormat::getClosures(int32_t& cnt) const |
358 | { |
359 | cnt = 0; |
360 | return NULL; |
361 | } |
362 | |
363 | // ------------------------------------- |
364 | // Gets the format array. |
365 | |
366 | const UnicodeString* |
367 | ChoiceFormat::getFormats(int32_t& cnt) const |
368 | { |
369 | cnt = 0; |
370 | return NULL; |
371 | } |
372 | |
373 | // ------------------------------------- |
374 | // Formats an int64 number, it's actually formatted as |
375 | // a double. The returned format string may differ |
376 | // from the input number because of this. |
377 | |
378 | UnicodeString& |
379 | ChoiceFormat::format(int64_t number, |
380 | UnicodeString& appendTo, |
381 | FieldPosition& status) const |
382 | { |
383 | return format((double) number, appendTo, status); |
384 | } |
385 | |
386 | // ------------------------------------- |
387 | // Formats an int32_t number, it's actually formatted as |
388 | // a double. |
389 | |
390 | UnicodeString& |
391 | ChoiceFormat::format(int32_t number, |
392 | UnicodeString& appendTo, |
393 | FieldPosition& status) const |
394 | { |
395 | return format((double) number, appendTo, status); |
396 | } |
397 | |
398 | // ------------------------------------- |
399 | // Formats a double number. |
400 | |
401 | UnicodeString& |
402 | ChoiceFormat::format(double number, |
403 | UnicodeString& appendTo, |
404 | FieldPosition& /*pos*/) const |
405 | { |
406 | if (msgPattern.countParts() == 0) { |
407 | // No pattern was applied, or it failed. |
408 | return appendTo; |
409 | } |
410 | // Get the appropriate sub-message. |
411 | int32_t msgStart = findSubMessage(msgPattern, 0, number); |
412 | if (!MessageImpl::jdkAposMode(msgPattern)) { |
413 | int32_t patternStart = msgPattern.getPart(msgStart).getLimit(); |
414 | int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart); |
415 | appendTo.append(msgPattern.getPatternString(), |
416 | patternStart, |
417 | msgPattern.getPatternIndex(msgLimit) - patternStart); |
418 | return appendTo; |
419 | } |
420 | // JDK compatibility mode: Remove SKIP_SYNTAX. |
421 | return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo); |
422 | } |
423 | |
424 | int32_t |
425 | ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) { |
426 | int32_t count = pattern.countParts(); |
427 | int32_t msgStart; |
428 | // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples |
429 | // until ARG_LIMIT or end of choice-only pattern. |
430 | // Ignore the first number and selector and start the loop on the first message. |
431 | partIndex += 2; |
432 | for (;;) { |
433 | // Skip but remember the current sub-message. |
434 | msgStart = partIndex; |
435 | partIndex = pattern.getLimitPartIndex(partIndex); |
436 | if (++partIndex >= count) { |
437 | // Reached the end of the choice-only pattern. |
438 | // Return with the last sub-message. |
439 | break; |
440 | } |
441 | const MessagePattern::Part &part = pattern.getPart(partIndex++); |
442 | UMessagePatternPartType type = part.getType(); |
443 | if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) { |
444 | // Reached the end of the ChoiceFormat style. |
445 | // Return with the last sub-message. |
446 | break; |
447 | } |
448 | // part is an ARG_INT or ARG_DOUBLE |
449 | U_ASSERT(MessagePattern::Part::hasNumericValue(type)); |
450 | double boundary = pattern.getNumericValue(part); |
451 | // Fetch the ARG_SELECTOR character. |
452 | int32_t selectorIndex = pattern.getPatternIndex(partIndex++); |
453 | UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex); |
454 | if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) { |
455 | // The number is in the interval between the previous boundary and the current one. |
456 | // Return with the sub-message between them. |
457 | // The !(a>b) and !(a>=b) comparisons are equivalent to |
458 | // (a<=b) and (a<b) except they "catch" NaN. |
459 | break; |
460 | } |
461 | } |
462 | return msgStart; |
463 | } |
464 | |
465 | // ------------------------------------- |
466 | // Formats an array of objects. Checks if the data type of the objects |
467 | // to get the right value for formatting. |
468 | |
469 | UnicodeString& |
470 | ChoiceFormat::format(const Formattable* objs, |
471 | int32_t cnt, |
472 | UnicodeString& appendTo, |
473 | FieldPosition& pos, |
474 | UErrorCode& status) const |
475 | { |
476 | if(cnt < 0) { |
477 | status = U_ILLEGAL_ARGUMENT_ERROR; |
478 | return appendTo; |
479 | } |
480 | if (msgPattern.countParts() == 0) { |
481 | status = U_INVALID_STATE_ERROR; |
482 | return appendTo; |
483 | } |
484 | |
485 | for (int32_t i = 0; i < cnt; i++) { |
486 | double objDouble = objs[i].getDouble(status); |
487 | if (U_SUCCESS(status)) { |
488 | format(objDouble, appendTo, pos); |
489 | } |
490 | } |
491 | |
492 | return appendTo; |
493 | } |
494 | |
495 | // ------------------------------------- |
496 | |
497 | void |
498 | ChoiceFormat::parse(const UnicodeString& text, |
499 | Formattable& result, |
500 | ParsePosition& pos) const |
501 | { |
502 | result.setDouble(parseArgument(msgPattern, 0, text, pos)); |
503 | } |
504 | |
505 | double |
506 | ChoiceFormat::parseArgument( |
507 | const MessagePattern &pattern, int32_t partIndex, |
508 | const UnicodeString &source, ParsePosition &pos) { |
509 | // find the best number (defined as the one with the longest parse) |
510 | int32_t start = pos.getIndex(); |
511 | int32_t furthest = start; |
512 | double bestNumber = uprv_getNaN(); |
513 | double tempNumber = 0.0; |
514 | int32_t count = pattern.countParts(); |
515 | while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) { |
516 | tempNumber = pattern.getNumericValue(pattern.getPart(partIndex)); |
517 | partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR |
518 | int32_t msgLimit = pattern.getLimitPartIndex(partIndex); |
519 | int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start); |
520 | if (len >= 0) { |
521 | int32_t newIndex = start + len; |
522 | if (newIndex > furthest) { |
523 | furthest = newIndex; |
524 | bestNumber = tempNumber; |
525 | if (furthest == source.length()) { |
526 | break; |
527 | } |
528 | } |
529 | } |
530 | partIndex = msgLimit + 1; |
531 | } |
532 | if (furthest == start) { |
533 | pos.setErrorIndex(start); |
534 | } else { |
535 | pos.setIndex(furthest); |
536 | } |
537 | return bestNumber; |
538 | } |
539 | |
540 | int32_t |
541 | ChoiceFormat::matchStringUntilLimitPart( |
542 | const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex, |
543 | const UnicodeString &source, int32_t sourceOffset) { |
544 | int32_t matchingSourceLength = 0; |
545 | const UnicodeString &msgString = pattern.getPatternString(); |
546 | int32_t prevIndex = pattern.getPart(partIndex).getLimit(); |
547 | for (;;) { |
548 | const MessagePattern::Part &part = pattern.getPart(++partIndex); |
549 | if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { |
550 | int32_t index = part.getIndex(); |
551 | int32_t length = index - prevIndex; |
552 | if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) { |
553 | return -1; // mismatch |
554 | } |
555 | matchingSourceLength += length; |
556 | if (partIndex == limitPartIndex) { |
557 | return matchingSourceLength; |
558 | } |
559 | prevIndex = part.getLimit(); // SKIP_SYNTAX |
560 | } |
561 | } |
562 | } |
563 | |
564 | // ------------------------------------- |
565 | |
566 | ChoiceFormat* |
567 | ChoiceFormat::clone() const |
568 | { |
569 | ChoiceFormat *aCopy = new ChoiceFormat(*this); |
570 | return aCopy; |
571 | } |
572 | |
573 | U_NAMESPACE_END |
574 | |
575 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
576 | |
577 | //eof |
578 | |