cstring.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/cstring.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	******************************************************************************
5	*
6	* Copyright (C) 1997-2011, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	******************************************************************************
10	*
11	* File CSTRING.C
12	*
13	* @author Helena Shih
14	*
15	* Modification History:
16	*
17	* Date Name Description
18	* 6/18/98 hshih Created
19	* 09/08/98 stephen Added include for ctype, for Mac Port
20	* 11/15/99 helena Integrated S/390 IEEE changes.
21	******************************************************************************
22	*/
23
24
25
26	#include <stdlib.h>
27	#include <stdio.h>
28	#include "unicode/utypes.h"
29	#include "cmemory.h"
30	#include "cstring.h"
31	#include "uassert.h"
32
33	/*
34	* We hardcode case conversion for invariant characters to match our expectation
35	* and the compiler execution charset.
36	* This prevents problems on systems
37	* - with non-default casing behavior, like Turkish system locales where
38	* tolower('I') maps to dotless i and toupper('i') maps to dotted I
39	* - where there are no lowercase Latin characters at all, or using different
40	* codes (some old EBCDIC codepages)
41	*
42	* This works because the compiler usually runs on a platform where the execution
43	* charset includes all of the invariant characters at their expected
44	* code positions, so that the char * string literals in ICU code match
45	* the char literals here.
46	*
47	* Note that the set of lowercase Latin letters is discontiguous in EBCDIC
48	* and the set of uppercase Latin letters is discontiguous as well.
49	*/
50
51	U_CAPI UBool U_EXPORT2
52	uprv_isASCIILetter(char c) {
53	#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
54	return
55	(`'a'`<=c && c<=`'i'`) \|\| (`'j'`<=c && c<=`'r'`) \|\| (`'s'`<=c && c<=`'z'`) \|\|
56	(`'A'`<=c && c<=`'I'`) \|\| (`'J'`<=c && c<=`'R'`) \|\| (`'S'`<=c && c<=`'Z'`);
57	#else
58	return (`'a'`<=c && c<=`'z'`) \|\| (`'A'`<=c && c<=`'Z'`);
59	#endif
60	}
61
62	U_CAPI char U_EXPORT2
63	uprv_toupper(char c) {
64	#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
65	if((`'a'`<=c && c<=`'i'`) \|\| (`'j'`<=c && c<=`'r'`) \|\| (`'s'`<=c && c<=`'z'`)) {
66	c=(char)(c+(`'A'`-`'a'`));
67	}
68	#else
69	if(`'a'`<=c && c<=`'z'`) {
70	c=(char)(c+(`'A'`-`'a'`));
71	}
72	#endif
73	return c;
74	}
75
76
77	#if 0
78	/*
79	* Commented out because cstring.h defines uprv_tolower() to be
80	* the same as either uprv_asciitolower() or uprv_ebcdictolower()
81	* to reduce the amount of code to cover with tests.
82	*
83	* Note that this uprv_tolower() definition is likely to work for most
84	* charset families, not just ASCII and EBCDIC, because its #else branch
85	* is written generically.
86	*/
87	U_CAPI char U_EXPORT2
88	uprv_tolower(char c) {
89	#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
90	if((`'A'`<=c && c<=`'I'`) \|\| (`'J'`<=c && c<=`'R'`) \|\| (`'S'`<=c && c<=`'Z'`)) {
91	c=(char)(c+(`'a'`-`'A'`));
92	}
93	#else
94	if(`'A'`<=c && c<=`'Z'`) {
95	c=(char)(c+(`'a'`-`'A'`));
96	}
97	#endif
98	return c;
99	}
100	#endif
101
102	U_CAPI char U_EXPORT2
103	uprv_asciitolower(char c) {
104	if(`0x41`<=c && c<=`0x5a`) {
105	c=(char)(c+`0x20`);
106	}
107	return c;
108	}
109
110	U_CAPI char U_EXPORT2
111	uprv_ebcdictolower(char c) {
112	if( (`0xc1`<=(uint8_t)c && (uint8_t)c<=`0xc9`) \|\|
113	(`0xd1`<=(uint8_t)c && (uint8_t)c<=`0xd9`) \|\|
114	(`0xe2`<=(uint8_t)c && (uint8_t)c<=`0xe9`)
115	) {
116	c=(char)(c-`0x40`);
117	}
118	return c;
119	}
120
121
122	U_CAPI char* U_EXPORT2
123	T_CString_toLowerCase(char* str)
124	{
125	char* origPtr = str;
126
127	if (str) {
128	do
129	str = (char)uprv_tolower(str);
130	while (*(str++));
131	}
132
133	return origPtr;
134	}
135
136	U_CAPI char* U_EXPORT2
137	T_CString_toUpperCase(char* str)
138	{
139	char* origPtr = str;
140
141	if (str) {
142	do
143	str = (char)uprv_toupper(str);
144	while (*(str++));
145	}
146
147	return origPtr;
148	}
149
150	/*
151	* Takes a int32_t and fills in a char* string with that number "radix"-based.
152	* Does not handle negative values (makes an empty string for them).
153	* Writes at most 12 chars ("-2147483647" plus NUL).
154	* Returns the length of the string (not including the NUL).
155	*/
156	U_CAPI int32_t U_EXPORT2
157	T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
158	{
159	char tbuf[`30`];
160	int32_t tbx = sizeof(tbuf);
161	uint8_t digit;
162	int32_t length = `0`;
163	uint32_t uval;
164
165	U_ASSERT(radix>=`2` && radix<=`16`);
166	uval = (uint32_t) v;
167	if(v<`0` && radix == `10`) {
168	/ Only in base 10 do we conside numbers to be signed. /
169	uval = (uint32_t)(-v);
170	buffer[length++] = `'-'`;
171	}
172
173	tbx = sizeof(tbuf)-`1`;
174	tbuf[tbx] = `0`; / We are generating the digits backwards. Null term the end. /
175	do {
176	digit = (uint8_t)(uval % radix);
177	tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
178	uval = uval / radix;
179	} while (uval != `0`);
180
181	/ copy converted number into user buffer /
182	uprv_strcpy(buffer+length, tbuf+tbx);
183	length += sizeof(tbuf) - tbx -`1`;
184	return length;
185	}
186
187
188
189	/*
190	* Takes a int64_t and fills in a char* string with that number "radix"-based.
191	* Writes at most 21: chars ("-9223372036854775807" plus NUL).
192	* Returns the length of the string, not including the terminating NULL.
193	*/
194	U_CAPI int32_t U_EXPORT2
195	T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
196	{
197	char tbuf[`30`];
198	int32_t tbx = sizeof(tbuf);
199	uint8_t digit;
200	int32_t length = `0`;
201	uint64_t uval;
202
203	U_ASSERT(radix>=`2` && radix<=`16`);
204	uval = (uint64_t) v;
205	if(v<`0` && radix == `10`) {
206	/ Only in base 10 do we conside numbers to be signed. /
207	uval = (uint64_t)(-v);
208	buffer[length++] = `'-'`;
209	}
210
211	tbx = sizeof(tbuf)-`1`;
212	tbuf[tbx] = `0`; / We are generating the digits backwards. Null term the end. /
213	do {
214	digit = (uint8_t)(uval % radix);
215	tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
216	uval = uval / radix;
217	} while (uval != `0`);
218
219	/ copy converted number into user buffer /
220	uprv_strcpy(buffer+length, tbuf+tbx);
221	length += sizeof(tbuf) - tbx -`1`;
222	return length;
223	}
224
225
226	U_CAPI int32_t U_EXPORT2
227	T_CString_stringToInteger(const char *integerString, int32_t radix)
228	{
229	char *end;
230	return uprv_strtoul(integerString, &end, radix);
231
232	}
233
234	U_CAPI int U_EXPORT2
235	uprv_stricmp(const char str1, const* char *str2) {
236	if(str1==NULL) {
237	if(str2==NULL) {
238	return `0`;
239	} else {
240	return -`1`;
241	}
242	} else if(str2==NULL) {
243	return `1`;
244	} else {
245	/ compare non-NULL strings lexically with lowercase /
246	int rc;
247	unsigned char c1, c2;
248
249	for(;;) {
250	c1=(unsigned char)*str1;
251	c2=(unsigned char)*str2;
252	if(c1==`0`) {
253	if(c2==`0`) {
254	return `0`;
255	} else {
256	return -`1`;
257	}
258	} else if(c2==`0`) {
259	return `1`;
260	} else {
261	/ compare non-zero characters with lowercase /
262	rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
263	if(rc!=`0`) {
264	return rc;
265	}
266	}
267	++str1;
268	++str2;
269	}
270	}
271	}
272
273	U_CAPI int U_EXPORT2
274	uprv_strnicmp(const char str1, const* char *str2, uint32_t n) {
275	if(str1==NULL) {
276	if(str2==NULL) {
277	return `0`;
278	} else {
279	return -`1`;
280	}
281	} else if(str2==NULL) {
282	return `1`;
283	} else {
284	/ compare non-NULL strings lexically with lowercase /
285	int rc;
286	unsigned char c1, c2;
287
288	for(; n--;) {
289	c1=(unsigned char)*str1;
290	c2=(unsigned char)*str2;
291	if(c1==`0`) {
292	if(c2==`0`) {
293	return `0`;
294	} else {
295	return -`1`;
296	}
297	} else if(c2==`0`) {
298	return `1`;
299	} else {
300	/ compare non-zero characters with lowercase /
301	rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
302	if(rc!=`0`) {
303	return rc;
304	}
305	}
306	++str1;
307	++str2;
308	}
309	}
310
311	return `0`;
312	}
313
314	U_CAPI char* U_EXPORT2
315	uprv_strdup(const char *src) {
316	size_t len = uprv_strlen(src) + `1`;
317	char dup = (char* *) uprv_malloc(len);
318
319	if (dup) {
320	uprv_memcpy(dup, src, len);
321	}
322
323	return dup;
324	}
325
326	U_CAPI char* U_EXPORT2
327	uprv_strndup(const char *src, int32_t n) {
328	char *dup;
329
330	if(n < `0`) {
331	dup = uprv_strdup(src);
332	} else {
333	dup = (char*)uprv_malloc(n+`1`);
334	if (dup) {
335	uprv_memcpy(dup, src, n);
336	dup[n] = `0`;
337	}
338	}
339
340	return dup;
341	}
342

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/cstring.cpp