choicfmt.cpp source code [ClickHouse/contrib/icu/icu4c/source/i18n/choicfmt.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	* Copyright (C) 1997-2013, International Business Machines Corporation and *
6	* others. All Rights Reserved. *
7	*******************************************************************************
8	*
9	* File CHOICFMT.CPP
10	*
11	* Modification History:
12	*
13	* Date Name Description
14	* 02/19/97 aliu Converted from java.
15	* 03/20/97 helena Finished first cut of implementation and got rid
16	* of nextDouble/previousDouble and replaced with
17	* boolean array.
18	* 4/10/97 aliu Clean up. Modified to work on AIX.
19	* 06/04/97 helena Fixed applyPattern(), toPattern() and not to include
20	* wchar.h.
21	* 07/09/97 helena Made ParsePosition into a class.
22	* 08/06/97 nos removed overloaded constructor, fixed 'format(array)'
23	* 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags)
24	* 02/22/99 stephen Removed character literals for EBCDIC safety
25	********************************************************************************
26	*/
27
28	#include "unicode/utypes.h"
29
30	#if !UCONFIG_NO_FORMATTING
31
32	#include "unicode/choicfmt.h"
33	#include "unicode/numfmt.h"
34	#include "unicode/locid.h"
35	#include "cpputils.h"
36	#include "cstring.h"
37	#include "messageimpl.h"
38	#include "putilimp.h"
39	#include "uassert.h"
40	#include <stdio.h>
41	#include <float.h>
42
43	// *****************************************************************************
44	// class ChoiceFormat
45	// *****************************************************************************
46
47	U_NAMESPACE_BEGIN
48
49	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
50
51	// Special characters used by ChoiceFormat. There are two characters
52	// used interchangeably to indicate <=. Either is parsed, but only
53	// LESS_EQUAL is generated by toPattern().
54	#define SINGLE_QUOTE ((UChar)0x0027) /'/
55	#define LESS_THAN ((UChar)0x003C) /</
56	#define LESS_EQUAL ((UChar)0x0023) /#/
57	#define LESS_EQUAL2 ((UChar)0x2264)
58	#define VERTICAL_BAR ((UChar)0x007C) /\|/
59	#define MINUS ((UChar)0x002D) /-/
60
61	static const UChar LEFT_CURLY_BRACE = `0x7B`; /{/
62	static const UChar RIGHT_CURLY_BRACE = `0x7D`; /}/
63
64	#ifdef INFINITY
65	#undef INFINITY
66	#endif
67	#define INFINITY ((UChar)0x221E)
68
69	//static const UChar gPositiveInfinity[] = {INFINITY, 0};
70	//static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
71	#define POSITIVE_INF_STRLEN 1
72	#define NEGATIVE_INF_STRLEN 2
73
74	// -------------------------------------
75	// Creates a ChoiceFormat instance based on the pattern.
76
77	ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
78	UErrorCode& status)
79	: constructorErrorCode(status),
80	msgPattern (status)
81	{
82	applyPattern(newPattern, status);
83	}
84
85	// -------------------------------------
86	// Creates a ChoiceFormat instance with the limit array and
87	// format strings for each limit.
88
89	ChoiceFormat::ChoiceFormat(const double* limits,
90	const UnicodeString* formats,
91	int32_t cnt )
92	: constructorErrorCode(U_ZERO_ERROR),
93	msgPattern (constructorErrorCode)
94	{
95	setChoices(limits, NULL, formats, cnt, constructorErrorCode);
96	}
97
98	// -------------------------------------
99
100	ChoiceFormat::ChoiceFormat(const double* limits,
101	const UBool* closures,
102	const UnicodeString* formats,
103	int32_t cnt )
104	: constructorErrorCode(U_ZERO_ERROR),
105	msgPattern (constructorErrorCode)
106	{
107	setChoices(limits, closures, formats, cnt, constructorErrorCode);
108	}
109
110	// -------------------------------------
111	// copy constructor
112
113	ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
114	: NumberFormat (that),
115	constructorErrorCode(that.constructorErrorCode),
116	msgPattern (that.msgPattern)
117	{
118	}
119
120	// -------------------------------------
121	// Private constructor that creates a
122	// ChoiceFormat instance based on the
123	// pattern and populates UParseError
124
125	ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
126	UParseError& parseError,
127	UErrorCode& status)
128	: constructorErrorCode(status),
129	msgPattern (status)
130	{
131	applyPattern(newPattern,parseError, status);
132	}
133	// -------------------------------------
134
135	UBool
136	ChoiceFormat::operator==(const Format& that) const
137	{
138	if (this == &that) return TRUE;
139	if (!NumberFormat::operator==(that)) return FALSE;
140	ChoiceFormat& thatAlias = (ChoiceFormat&)that;
141	return msgPattern == thatAlias.msgPattern;
142	}
143
144	// -------------------------------------
145	// copy constructor
146
147	const ChoiceFormat&
148	ChoiceFormat::operator=(const ChoiceFormat& that)
149	{
150	if (this != &that) {
151	NumberFormat::operator=(that);
152	constructorErrorCode = that.constructorErrorCode;
153	msgPattern = that.msgPattern;
154	}
155	return *this;
156	}
157
158	// -------------------------------------
159
160	ChoiceFormat::~ChoiceFormat()
161	{
162	}
163
164	// -------------------------------------
165
166	/**
167	* Convert a double value to a string without the overhead of NumberFormat.
168	*/
169	UnicodeString&
170	ChoiceFormat::dtos(double value,
171	UnicodeString& string)
172	{
173	/ Buffer to contain the digits and any extra formatting stuff. /
174	char temp[DBL_DIG + `16`];
175	char *itrPtr = temp;
176	char *expPtr;
177
178	sprintf(temp, "%.*g", DBL_DIG, value);
179
180	/ Find and convert the decimal point.*
181	Using setlocale on some machines will cause sprintf to use a comma for certain locales.
182	*/
183	while (itrPtr && (itrPtr == `'-'` \|\| isdigit(*itrPtr))) {
184	itrPtr++;
185	}
186	if (itrPtr != `0` && itrPtr != `'e'`) {
187	/ We reached something that looks like a decimal point.*
188	In case someone used setlocale(), which changes the decimal point. /*
189	*itrPtr = `'.'`;
190	itrPtr++;
191	}
192	/ Search for the exponent /
193	while (itrPtr && itrPtr != `'e'`) {
194	itrPtr++;
195	}
196	if (*itrPtr == `'e'`) {
197	itrPtr++;
198	/ Verify the exponent sign /
199	if (itrPtr == `'+'` \|\| itrPtr == `'-'`) {
200	itrPtr++;
201	}
202	/ Remove leading zeros. You will see this on Windows machines. /
203	expPtr = itrPtr;
204	while (*itrPtr == `'0'`) {
205	itrPtr++;
206	}
207	if (*itrPtr && expPtr != itrPtr) {
208	/ Shift the exponent without zeros. /
209	while (*itrPtr) {
210	(expPtr++) = (itrPtr++);
211	}
212	// NULL terminate
213	*expPtr = `0`;
214	}
215	}
216
217	string = UnicodeString (temp, -`1`, US_INV); / invariant codepage /
218	return string;
219	}
220
221	// -------------------------------------
222	// calls the overloaded applyPattern method.
223
224	void
225	ChoiceFormat::applyPattern(const UnicodeString& pattern,
226	UErrorCode& status)
227	{
228	msgPattern.parseChoiceStyle(pattern, NULL, status);
229	constructorErrorCode = status;
230	}
231
232	// -------------------------------------
233	// Applies the pattern to this ChoiceFormat instance.
234
235	void
236	ChoiceFormat::applyPattern(const UnicodeString& pattern,
237	UParseError& parseError,
238	UErrorCode& status)
239	{
240	msgPattern.parseChoiceStyle(pattern, &parseError, status);
241	constructorErrorCode = status;
242	}
243	// -------------------------------------
244	// Returns the input pattern string.
245
246	UnicodeString&
247	ChoiceFormat::toPattern(UnicodeString& result) const
248	{
249	return result = msgPattern.getPatternString();
250	}
251
252	// -------------------------------------
253	// Sets the limit and format arrays.
254	void
255	ChoiceFormat::setChoices( const double* limits,
256	const UnicodeString* formats,
257	int32_t cnt )
258	{
259	UErrorCode errorCode = U_ZERO_ERROR;
260	setChoices(limits, NULL, formats, cnt, errorCode);
261	}
262
263	// -------------------------------------
264	// Sets the limit and format arrays.
265	void
266	ChoiceFormat::setChoices( const double* limits,
267	const UBool* closures,
268	const UnicodeString* formats,
269	int32_t cnt )
270	{
271	UErrorCode errorCode = U_ZERO_ERROR;
272	setChoices(limits, closures, formats, cnt, errorCode);
273	}
274
275	void
276	ChoiceFormat::setChoices(const double* limits,
277	const UBool* closures,
278	const UnicodeString* formats,
279	int32_t count,
280	UErrorCode &errorCode) {
281	if (U_FAILURE(errorCode)) {
282	return;
283	}
284	if (limits == NULL \|\| formats == NULL) {
285	errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286	return;
287	}
288	// Reconstruct the original input pattern.
289	// Modified version of the pre-ICU 4.8 toPattern() implementation.
290	UnicodeString result;
291	for (int32_t i = `0`; i < count; ++i) {
292	if (i != `0`) {
293	result += VERTICAL_BAR;
294	}
295	UnicodeString buf;
296	if (uprv_isPositiveInfinity(limits[i])) {
297	result += INFINITY;
298	} else if (uprv_isNegativeInfinity(limits[i])) {
299	result += MINUS;
300	result += INFINITY;
301	} else {
302	result += dtos(limits[i], buf);
303	}
304	if (closures != NULL && closures[i]) {
305	result += LESS_THAN;
306	} else {
307	result += LESS_EQUAL;
308	}
309	// Append formats[i], using quotes if there are special
310	// characters. Single quotes themselves must be escaped in
311	// either case.
312	const UnicodeString& text = formats[i];
313	int32_t textLength = text.length();
314	int32_t nestingLevel = `0`;
315	for (int32_t j = `0`; j < textLength; ++j) {
316	UChar c = text [j];
317	if (c == SINGLE_QUOTE && nestingLevel == `0`) {
318	// Double each top-level apostrophe.
319	result.append(c);
320	} else if (c == VERTICAL_BAR && nestingLevel == `0`) {
321	// Surround each pipe symbol with apostrophes for quoting.
322	// If the next character is an apostrophe, then that will be doubled,
323	// and although the parser will see the apostrophe pairs beginning
324	// and ending one character earlier than our doubling, the result
325	// is as desired.
326	// \| -> '\|'
327	// \|' -> '\|'''
328	// \|'' -> '\|''''' etc.
329	result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
330	continue; // Skip the append(c) at the end of the loop body.
331	} else if (c == LEFT_CURLY_BRACE) {
332	++nestingLevel;
333	} else if (c == RIGHT_CURLY_BRACE && nestingLevel > `0`) {
334	--nestingLevel;
335	}
336	result.append(c);
337	}
338	}
339	// Apply the reconstructed pattern.
340	applyPattern(result, errorCode);
341	}
342
343	// -------------------------------------
344	// Gets the limit array.
345
346	const double*
347	ChoiceFormat::getLimits(int32_t& cnt) const
348	{
349	cnt = `0`;
350	return NULL;
351	}
352
353	// -------------------------------------
354	// Gets the closures array.
355
356	const UBool*
357	ChoiceFormat::getClosures(int32_t& cnt) const
358	{
359	cnt = `0`;
360	return NULL;
361	}
362
363	// -------------------------------------
364	// Gets the format array.
365
366	const UnicodeString*
367	ChoiceFormat::getFormats(int32_t& cnt) const
368	{
369	cnt = `0`;
370	return NULL;
371	}
372
373	// -------------------------------------
374	// Formats an int64 number, it's actually formatted as
375	// a double. The returned format string may differ
376	// from the input number because of this.
377
378	UnicodeString&
379	ChoiceFormat::format(int64_t number,
380	UnicodeString& appendTo,
381	FieldPosition& status) const
382	{
383	return format((double) number, appendTo, status);
384	}
385
386	// -------------------------------------
387	// Formats an int32_t number, it's actually formatted as
388	// a double.
389
390	UnicodeString&
391	ChoiceFormat::format(int32_t number,
392	UnicodeString& appendTo,
393	FieldPosition& status) const
394	{
395	return format((double) number, appendTo, status);
396	}
397
398	// -------------------------------------
399	// Formats a double number.
400
401	UnicodeString&
402	ChoiceFormat::format(double number,
403	UnicodeString& appendTo,
404	FieldPosition& /pos/) const
405	{
406	if (msgPattern.countParts() == `0`) {
407	// No pattern was applied, or it failed.
408	return appendTo;
409	}
410	// Get the appropriate sub-message.
411	int32_t msgStart = findSubMessage(msgPattern, `0`, number);
412	if (!MessageImpl::jdkAposMode(msgPattern)) {
413	int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
414	int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
415	appendTo.append(msgPattern.getPatternString(),
416	patternStart,
417	msgPattern.getPatternIndex(msgLimit) - patternStart);
418	return appendTo;
419	}
420	// JDK compatibility mode: Remove SKIP_SYNTAX.
421	return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
422	}
423
424	int32_t
425	ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
426	int32_t count = pattern.countParts();
427	int32_t msgStart;
428	// Iterate over (ARG_INT\|DOUBLE, ARG_SELECTOR, message) tuples
429	// until ARG_LIMIT or end of choice-only pattern.
430	// Ignore the first number and selector and start the loop on the first message.
431	partIndex += `2`;
432	for (;;) {
433	// Skip but remember the current sub-message.
434	msgStart = partIndex;
435	partIndex = pattern.getLimitPartIndex(partIndex);
436	if (++partIndex >= count) {
437	// Reached the end of the choice-only pattern.
438	// Return with the last sub-message.
439	break;
440	}
441	const MessagePattern::Part &part = pattern.getPart(partIndex++);
442	UMessagePatternPartType type = part.getType();
443	if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
444	// Reached the end of the ChoiceFormat style.
445	// Return with the last sub-message.
446	break;
447	}
448	// part is an ARG_INT or ARG_DOUBLE
449	U_ASSERT(MessagePattern::Part::hasNumericValue(type));
450	double boundary = pattern.getNumericValue(part);
451	// Fetch the ARG_SELECTOR character.
452	int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
453	UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
454	if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
455	// The number is in the interval between the previous boundary and the current one.
456	// Return with the sub-message between them.
457	// The !(a>b) and !(a>=b) comparisons are equivalent to
458	// (a<=b) and (a<b) except they "catch" NaN.
459	break;
460	}
461	}
462	return msgStart;
463	}
464
465	// -------------------------------------
466	// Formats an array of objects. Checks if the data type of the objects
467	// to get the right value for formatting.
468
469	UnicodeString&
470	ChoiceFormat::format(const Formattable* objs,
471	int32_t cnt,
472	UnicodeString& appendTo,
473	FieldPosition& pos,
474	UErrorCode& status) const
475	{
476	if(cnt < `0`) {
477	status = U_ILLEGAL_ARGUMENT_ERROR;
478	return appendTo;
479	}
480	if (msgPattern.countParts() == `0`) {
481	status = U_INVALID_STATE_ERROR;
482	return appendTo;
483	}
484
485	for (int32_t i = `0`; i < cnt; i++) {
486	double objDouble = objs[i].getDouble(status);
487	if (U_SUCCESS(status)) {
488	format(objDouble, appendTo, pos);
489	}
490	}
491
492	return appendTo;
493	}
494
495	// -------------------------------------
496
497	void
498	ChoiceFormat::parse(const UnicodeString& text,
499	Formattable& result,
500	ParsePosition& pos) const
501	{
502	result.setDouble(parseArgument(msgPattern, `0`, text, pos));
503	}
504
505	double
506	ChoiceFormat::parseArgument(
507	const MessagePattern &pattern, int32_t partIndex,
508	const UnicodeString &source, ParsePosition &pos) {
509	// find the best number (defined as the one with the longest parse)
510	int32_t start = pos.getIndex();
511	int32_t furthest = start;
512	double bestNumber = uprv_getNaN();
513	double tempNumber = `0.0`;
514	int32_t count = pattern.countParts();
515	while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
516	tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
517	partIndex += `2`; // skip the numeric part and ignore the ARG_SELECTOR
518	int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
519	int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
520	if (len >= `0`) {
521	int32_t newIndex = start + len;
522	if (newIndex > furthest) {
523	furthest = newIndex;
524	bestNumber = tempNumber;
525	if (furthest == source.length()) {
526	break;
527	}
528	}
529	}
530	partIndex = msgLimit + `1`;
531	}
532	if (furthest == start) {
533	pos.setErrorIndex(start);
534	} else {
535	pos.setIndex(furthest);
536	}
537	return bestNumber;
538	}
539
540	int32_t
541	ChoiceFormat::matchStringUntilLimitPart(
542	const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
543	const UnicodeString &source, int32_t sourceOffset) {
544	int32_t matchingSourceLength = `0`;
545	const UnicodeString &msgString = pattern.getPatternString();
546	int32_t prevIndex = pattern.getPart(partIndex).getLimit();
547	for (;;) {
548	const MessagePattern::Part &part = pattern.getPart(++partIndex);
549	if (partIndex == limitPartIndex \|\| part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
550	int32_t index = part.getIndex();
551	int32_t length = index - prevIndex;
552	if (length != `0` && `0` != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
553	return -`1`; // mismatch
554	}
555	matchingSourceLength += length;
556	if (partIndex == limitPartIndex) {
557	return matchingSourceLength;
558	}
559	prevIndex = part.getLimit(); // SKIP_SYNTAX
560	}
561	}
562	}
563
564	// -------------------------------------
565
566	ChoiceFormat*
567	ChoiceFormat::clone() const
568	{
569	ChoiceFormat aCopy = new* ChoiceFormat (*this);
570	return aCopy;
571	}
572
573	U_NAMESPACE_END
574
575	#endif /* #if !UCONFIG_NO_FORMATTING */
576
577	//eof
578

Browse the source code of ClickHouse/contrib/icu/icu4c/source/i18n/choicfmt.cpp