util.h source code [ClickHouse/contrib/icu/icu4c/source/common/util.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (c) 2001-2011, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	* Date Name Description
9	* 11/19/2001 aliu Creation.
10	**********************************************************************
11	*/
12
13	#ifndef ICU_UTIL_H
14	#define ICU_UTIL_H
15
16	#include "unicode/utypes.h"
17	#include "unicode/uobject.h"
18	#include "unicode/unistr.h"
19
20	//--------------------------------------------------------------------
21	// class ICU_Utility
22	// i18n utility functions, scoped into the class ICU_Utility.
23	//--------------------------------------------------------------------
24
25	U_NAMESPACE_BEGIN
26
27	class UnicodeMatcher;
28
29	class U_COMMON_API ICU_Utility / not : public UObject because all methods are static / {
30	public:
31
32	/**
33	* Append a number to the given UnicodeString in the given radix.
34	* Standard digits '0'-'9' are used and letters 'A'-'Z' for
35	* radices 11 through 36.
36	* @param result the digits of the number are appended here
37	* @param n the number to be converted to digits; may be negative.
38	* If negative, a '-' is prepended to the digits.
39	* @param radix a radix from 2 to 36 inclusive.
40	* @param minDigits the minimum number of digits, not including
41	* any '-', to produce. Values less than 2 have no effect. One
42	* digit is always emitted regardless of this parameter.
43	* @return a reference to result
44	*/
45	static UnicodeString& appendNumber(UnicodeString& result, int32_t n,
46	int32_t radix = `10`,
47	int32_t minDigits = `1`);
48
49	/* Returns a bogus UnicodeString by value. /
50	static inline UnicodeString makeBogusString() {
51	UnicodeString result;
52	result.setToBogus();
53	return result;
54	}
55
56	/**
57	* Return true if the character is NOT printable ASCII.
58	*
59	* This method should really be in UnicodeString (or similar). For
60	* now, we implement it here and share it with friend classes.
61	*/
62	static UBool isUnprintable(UChar32 c);
63
64	/**
65	* Escape unprintable characters using \uxxxx notation for U+0000 to
66	* U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
67	* printable ASCII, then do nothing and return FALSE. Otherwise,
68	* append the escaped notation and return TRUE.
69	*/
70	static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
71
72	/**
73	* Returns the index of a character, ignoring quoted text.
74	* For example, in the string "abc'hide'h", the 'h' in "hide" will not be
75	* found by a search for 'h'.
76	* @param text text to be searched
77	* @param start the beginning index, inclusive; <code>0 <= start
78	* <= limit</code>.
79	* @param limit the ending index, exclusive; <code>start <= limit
80	* <= text.length()</code>.
81	* @param c character to search for
82	* @return Offset of the first instance of c, or -1 if not found.
83	*/
84	//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
85	// static int32_t quotedIndexOf(const UnicodeString& text,
86	// int32_t start, int32_t limit,
87	// UChar c);
88
89	/**
90	* Skip over a sequence of zero or more white space characters at pos.
91	* @param advance if true, advance pos to the first non-white-space
92	* character at or after pos, or str.length(), if there is none.
93	* Otherwise leave pos unchanged.
94	* @return the index of the first non-white-space character at or
95	* after pos, or str.length(), if there is none.
96	*/
97	static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos,
98	UBool advance = FALSE);
99
100	/**
101	* Skip over Pattern_White_Space in a Replaceable.
102	* Skipping may be done in the forward or
103	* reverse direction. In either case, the leftmost index will be
104	* inclusive, and the rightmost index will be exclusive. That is,
105	* given a range defined as [start, limit), the call
106	* skipWhitespace(text, start, limit) will advance start past leading
107	* whitespace, whereas the call skipWhitespace(text, limit, start),
108	* will back up limit past trailing whitespace.
109	* @param text the text to be analyzed
110	* @param pos either the start or limit of a range of 'text', to skip
111	* leading or trailing whitespace, respectively
112	* @param stop either the limit or start of a range of 'text', to skip
113	* leading or trailing whitespace, respectively
114	* @return the new start or limit, depending on what was passed in to
115	* 'pos'
116	*/
117	//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
118	//? static int32_t skipWhitespace(const Replaceable& text,
119	//? int32_t pos, int32_t stop);
120
121	/**
122	* Parse a single non-whitespace character 'ch', optionally
123	* preceded by whitespace.
124	* @param id the string to be parsed
125	* @param pos INPUT-OUTPUT parameter. On input, pos[0] is the
126	* offset of the first character to be parsed. On output, pos[0]
127	* is the index after the last parsed character. If the parse
128	* fails, pos[0] will be unchanged.
129	* @param ch the non-whitespace character to be parsed.
130	* @return true if 'ch' is seen preceded by zero or more
131	* whitespace characters.
132	*/
133	static UBool parseChar(const UnicodeString& id, int32_t& pos, UChar ch);
134
135	/**
136	* Parse a pattern string starting at offset pos. Keywords are
137	* matched case-insensitively. Spaces may be skipped and may be
138	* optional or required. Integer values may be parsed, and if
139	* they are, they will be returned in the given array. If
140	* successful, the offset of the next non-space character is
141	* returned. On failure, -1 is returned.
142	* @param pattern must only contain lowercase characters, which
143	* will match their uppercase equivalents as well. A space
144	* character matches one or more required spaces. A '~' character
145	* matches zero or more optional spaces. A '#' character matches
146	* an integer and stores it in parsedInts, which the caller must
147	* ensure has enough capacity.
148	* @param parsedInts array to receive parsed integers. Caller
149	* must ensure that parsedInts.length is >= the number of '#'
150	* signs in 'pattern'.
151	* @return the position after the last character parsed, or -1 if
152	* the parse failed
153	*/
154	static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
155	const UnicodeString& pattern, int32_t* parsedInts);
156
157	/**
158	* Parse a pattern string within the given Replaceable and a parsing
159	* pattern. Characters are matched literally and case-sensitively
160	* except for the following special characters:
161	*
162	* ~ zero or more Pattern_White_Space chars
163	*
164	* If end of pattern is reached with all matches along the way,
165	* pos is advanced to the first unparsed index and returned.
166	* Otherwise -1 is returned.
167	* @param pat pattern that controls parsing
168	* @param text text to be parsed, starting at index
169	* @param index offset to first character to parse
170	* @param limit offset after last character to parse
171	* @return index after last parsed character, or -1 on parse failure.
172	*/
173	static int32_t parsePattern(const UnicodeString& pat,
174	const Replaceable& text,
175	int32_t index,
176	int32_t limit);
177
178	/**
179	* Parse an integer at pos, either of the form \d+ or of the form
180	* 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
181	* or octal format.
182	* @param pos INPUT-OUTPUT parameter. On input, the index of the first
183	* character to parse. On output, the index of the character after the
184	* last parsed character.
185	*/
186	static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit);
187
188	/**
189	* Parse an integer at pos using only ASCII digits.
190	* Base 10 only.
191	* @param pos INPUT-OUTPUT parameter. On input, the index of the first
192	* character to parse. On output, the index of the character after the
193	* last parsed character.
194	*/
195	static int32_t parseAsciiInteger(const UnicodeString& str, int32_t& pos);
196
197	/**
198	* Parse a Unicode identifier from the given string at the given
199	* position. Return the identifier, or an empty string if there
200	* is no identifier.
201	* @param str the string to parse
202	* @param pos INPUT-OUPUT parameter. On INPUT, pos is the
203	* first character to examine. It must be less than str.length(),
204	* and it must not point to a whitespace character. That is, must
205	* have pos < str.length() and
206	* !UCharacter::isWhitespace(str.char32At(pos)). On
207	* OUTPUT, the position after the last parsed character.
208	* @return the Unicode identifier, or an empty string if there is
209	* no valid identifier at pos.
210	*/
211	static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos);
212
213	/**
214	* Parse an unsigned 31-bit integer at the given offset. Use
215	* UCharacter.digit() to parse individual characters into digits.
216	* @param text the text to be parsed
217	* @param pos INPUT-OUTPUT parameter. On entry, pos is the
218	* offset within text at which to start parsing; it should point
219	* to a valid digit. On exit, pos is the offset after the last
220	* parsed character. If the parse failed, it will be unchanged on
221	* exit. Must be >= 0 on entry.
222	* @param radix the radix in which to parse; must be >= 2 and <=
223	* 36.
224	* @return a non-negative parsed number, or -1 upon parse failure.
225	* Parse fails if there are no digits, that is, if pos does not
226	* point to a valid digit on entry, or if the number to be parsed
227	* does not fit into a 31-bit unsigned integer.
228	*/
229	static int32_t parseNumber(const UnicodeString& text,
230	int32_t& pos, int8_t radix);
231
232	static void appendToRule(UnicodeString& rule,
233	UChar32 c,
234	UBool isLiteral,
235	UBool escapeUnprintable,
236	UnicodeString& quoteBuf);
237
238	static void appendToRule(UnicodeString& rule,
239	const UnicodeString& text,
240	UBool isLiteral,
241	UBool escapeUnprintable,
242	UnicodeString& quoteBuf);
243
244	static void appendToRule(UnicodeString& rule,
245	const UnicodeMatcher* matcher,
246	UBool escapeUnprintable,
247	UnicodeString& quoteBuf);
248
249	private:
250	// do not instantiate
251	ICU_Utility();
252	};
253
254	U_NAMESPACE_END
255
256	#endif
257	//eof
258

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/util.h