ctype-simple.c source code [MariaDB/strings/ctype-simple.c]

1	/ Copyright (c) 2002, 2013, Oracle and/or its affiliates.*
2	Copyright (c) 2009, 2014, SkySQL Ab.
3
4	This program is free software; you can redistribute it and/or modify
5	it under the terms of the GNU General Public License as published by
6	the Free Software Foundation; version 2 of the License.
7
8	This program is distributed in the hope that it will be useful,
9	but WITHOUT ANY WARRANTY; without even the implied warranty of
10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11	GNU General Public License for more details.
12
13	You should have received a copy of the GNU General Public License
14	along with this program; if not, write to the Free Software
15	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA /*
16
17	#include "strings_def.h"
18	#include <m_ctype.h>
19	#include "my_sys.h" /* Needed for MY_ERRNO_ERANGE */
20	#include <errno.h>
21
22	#include "stdarg.h"
23
24	/*
25	Returns the number of bytes required for strnxfrm().
26	*/
27
28	size_t my_strnxfrmlen_simple(CHARSET_INFO *cs, size_t len)
29	{
30	return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : `1`);
31	}
32
33
34	/*
35	Converts a string into its sort key.
36
37	SYNOPSIS
38	my_strnxfrm_xxx()
39
40	IMPLEMENTATION
41
42	The my_strxfrm_xxx() function transforms a string pointed to by
43	'src' with length 'srclen' according to the charset+collation
44	pair 'cs' and copies the result key into 'dest'.
45
46	Comparing two strings using memcmp() after my_strnxfrm_xxx()
47	is equal to comparing two original strings with my_strnncollsp_xxx().
48
49	Not more than 'dstlen' bytes are written into 'dst'.
50	To guarantee that the whole string is transformed, 'dstlen' must be
51	at least srclencs->strnxfrm_multiply bytes long. Otherwise,*
52	consequent memcmp() may return a non-accurate result.
53
54	If the source string is too short to fill whole 'dstlen' bytes,
55	then the 'dest' string is padded up to 'dstlen', ensuring that:
56
57	"a" == "a "
58	"a\0" < "a"
59	"a\0" < "a "
60
61	my_strnxfrm_simple() is implemented for 8bit charsets and
62	simple collations with one-to-one string->key transformation.
63
64	See also implementations for various charsets/collations in
65	other ctype-xxx.c files.
66
67	RETURN
68
69	Target len 'dstlen'.
70
71	*/
72
73
74	size_t my_strnxfrm_simple_internal(CHARSET_INFO * cs,
75	uchar dst, size_t dstlen, uint nweights,
76	const uchar *src, size_t srclen)
77	{
78	const uchar *map= cs->sort_order;
79	uchar *d0= dst;
80	uint frmlen;
81	if ((frmlen= (uint)MY_MIN(dstlen, *nweights)) > srclen)
82	frmlen= (uint)srclen;
83	if (dst != src)
84	{
85	const uchar *end;
86	for (end= src + frmlen; src < end;)
87	dst++= map[src++];
88	}
89	else
90	{
91	const uchar *end;
92	for (end= dst + frmlen; dst < end; dst++)
93	dst= map[(uchar) dst];
94	}
95	*nweights-= frmlen;
96	return dst - d0;
97	}
98
99
100	size_t my_strnxfrm_simple(CHARSET_INFO * cs,
101	uchar *dst, size_t dstlen, uint nweights,
102	const uchar *src, size_t srclen, uint flags)
103	{
104	uchar *d0= dst;
105	dst= d0 + my_strnxfrm_simple_internal(cs, dst, dstlen, &nweights,
106	src, srclen);
107	return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, d0 + dstlen,
108	nweights, flags, `0`);
109	}
110
111
112	size_t my_strnxfrm_simple_nopad(CHARSET_INFO * cs,
113	uchar *dst, size_t dstlen, uint nweights,
114	const uchar *src, size_t srclen, uint flags)
115	{
116	uchar *d0= dst;
117	dst= d0 + my_strnxfrm_simple_internal(cs, dst, dstlen, &nweights,
118	src, srclen);
119	return my_strxfrm_pad_desc_and_reverse_nopad(cs, d0, dst, d0 + dstlen,
120	nweights, flags, `0`);
121	}
122
123
124	int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, size_t slen,
125	const uchar *t, size_t tlen,
126	my_bool t_is_prefix)
127	{
128	size_t len = ( slen > tlen ) ? tlen : slen;
129	const uchar *map= cs->sort_order;
130	if (t_is_prefix && slen > tlen)
131	slen=tlen;
132	while (len--)
133	{
134	if (map[s++] != map[t++])
135	return ((int) map[s[-`1`]] - (int) map[t[-`1`]]);
136	}
137	/*
138	We can't use (slen - tlen) here as the result may be outside of the
139	precision of a signed int
140	*/
141	return slen > tlen ? `1` : slen < tlen ? -`1` : `0` ;
142	}
143
144
145	/*
146	Compare strings, discarding end space
147
148	SYNOPSIS
149	my_strnncollsp_simple()
150	cs character set handler
151	a First string to compare
152	a_length Length of 'a'
153	b Second string to compare
154	b_length Length of 'b'
155
156	IMPLEMENTATION
157	If one string is shorter as the other, then we space extend the other
158	so that the strings have equal length.
159
160	This will ensure that the following things hold:
161
162	"a" == "a "
163	"a\0" < "a"
164	"a\0" < "a "
165
166	RETURN
167	< 0 a < b
168	= 0 a == b
169	> 0 a > b
170	*/
171
172	int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length,
173	const uchar *b, size_t b_length)
174	{
175	const uchar map= cs->sort_order, end;
176	size_t length;
177	int res;
178
179	end= a + (length= MY_MIN(a_length, b_length));
180	while (a < end)
181	{
182	if (map[a++] != map[b++])
183	return ((int) map[a[-`1`]] - (int) map[b[-`1`]]);
184	}
185	res= `0`;
186	if (a_length != b_length)
187	{
188	int swap= `1`;
189	/*
190	Check the next not space character of the longer key. If it's < ' ',
191	then it's smaller than the other key.
192	*/
193	if (a_length < b_length)
194	{
195	/ put shorter key in s /
196	a_length= b_length;
197	a= b;
198	swap= -`1`; / swap sign of result /
199	res= -res;
200	}
201	for (end= a + a_length-length; a < end ; a++)
202	{
203	if (map[*a] != map[`' '`])
204	return (map[*a] < map[`' '`]) ? -swap : swap;
205	}
206	}
207	return res;
208	}
209
210
211	int my_strnncollsp_simple_nopad(CHARSET_INFO * cs,
212	const uchar *a, size_t a_length,
213	const uchar *b, size_t b_length)
214	{
215	return my_strnncoll_simple(cs, a, a_length, b, b_length, FALSE);
216	}
217
218
219	size_t my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
220	{
221	register const uchar *map= cs->to_upper;
222	char *str_orig= str;
223	while ((str= (char) map[(uchar) str]) != `0`)
224	str++;
225	return (size_t) (str - str_orig);
226	}
227
228
229	size_t my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
230	{
231	register const uchar *map= cs->to_lower;
232	char *str_orig= str;
233	while ((str= (char) map[(uchar) str]) != `0`)
234	str++;
235	return (size_t) (str - str_orig);
236	}
237
238
239	size_t my_caseup_8bit(CHARSET_INFO * cs, char *src, size_t srclen,
240	char dst __attribute__*((unused)),
241	size_t dstlen __attribute__((unused)))
242	{
243	char *end= src + srclen;
244	register const uchar *map= cs->to_upper;
245	DBUG_ASSERT(src == dst && srclen == dstlen);
246	for ( ; src != end ; src++)
247	src= (char) map[(uchar) src];
248	return srclen;
249	}
250
251
252	size_t my_casedn_8bit(CHARSET_INFO * cs, char *src, size_t srclen,
253	char dst __attribute__*((unused)),
254	size_t dstlen __attribute__((unused)))
255	{
256	char *end= src + srclen;
257	register const uchar *map=cs->to_lower;
258	DBUG_ASSERT(src == dst && srclen == dstlen);
259	for ( ; src != end ; src++)
260	src= (char) map[(uchar) src];
261	return srclen;
262	}
263
264	int my_strcasecmp_8bit(CHARSET_INFO * cs,const char s, const* char *t)
265	{
266	register const uchar *map=cs->to_upper;
267	while (map[(uchar) s] == map[(uchar) t++])
268	if (!s++) return* `0`;
269	return ((int) map[(uchar) s[`0`]] - (int) map[(uchar) t[-`1`]]);
270	}
271
272
273	int my_charlen_8bit(CHARSET_INFO cs __attribute__*((unused)),
274	const uchar str, const* uchar *end)
275	{
276	return str >= end ? MY_CS_TOOSMALL : `1`;
277	}
278
279
280	int my_mb_wc_8bit(CHARSET_INFO cs,my_wc_t wc,
281	const uchar *str,
282	const uchar end __attribute__*((unused)))
283	{
284	if (str >= end)
285	return MY_CS_TOOSMALL;
286
287	wc=cs->tab_to_uni[str];
288	return (!wc[`0`] && str[`0`]) ? -`1` : `1`;
289	}
290
291	int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,
292	uchar *str,
293	uchar *end)
294	{
295	MY_UNI_IDX *idx;
296
297	if (str >= end)
298	return MY_CS_TOOSMALL;
299
300	for (idx=cs->tab_from_uni; idx->tab ; idx++)
301	{
302	if (idx->from <= wc && idx->to >= wc)
303	{
304	str[`0`]= idx->tab[wc - idx->from];
305	return (!str[`0`] && wc) ? MY_CS_ILUNI : `1`;
306	}
307	}
308	return MY_CS_ILUNI;
309	}
310
311
312	/*
313	We can't use vsprintf here as it's not guaranteed to return
314	the length on all operating systems.
315	This function is also not called in a safe environment, so the
316	end buffer must be checked.
317	*/
318
319	size_t my_snprintf_8bit(CHARSET_INFO cs __attribute__*((unused)),
320	char* to, size_t n __attribute__((unused)),
321	const char* fmt, ...)
322	{
323	va_list args;
324	size_t result;
325	va_start(args,fmt);
326	result= my_vsnprintf(to, n, fmt, args);
327	va_end(args);
328	return result;
329	}
330
331
332	void my_hash_sort_simple_nopad(CHARSET_INFO *cs,
333	const uchar *key, size_t len,
334	ulong nr1, ulong nr2)
335	{
336	register const uchar *sort_order=cs->sort_order;
337	const uchar *end= key + len;
338	register ulong m1= nr1, m2= nr2;
339	for (; key < (uchar*) end ; key++)
340	{
341	MY_HASH_ADD(m1, m2, (uint) sort_order[(uint) *key]);
342	}
343	*nr1= m1;
344	*nr2= m2;
345	}
346
347
348	void my_hash_sort_simple(CHARSET_INFO *cs,
349	const uchar *key, size_t len,
350	ulong nr1, ulong nr2)
351	{
352	register const uchar *sort_order=cs->sort_order;
353	const uchar *end;
354	uint16 space_weight= sort_order[`' '`];
355
356	/*
357	Remove all trailing characters that are equal to space.
358	We have to do this to be able to compare 'A ' and 'A' as identical.
359
360	If the key is long enough, cut the trailing spaces (0x20) using an
361	optimized function implemented in skip_trailing_spaces().
362
363	"len > 16" is just some heuristic here.
364	Calling skip_triling_space() for short values is not desirable,
365	because its initialization block may be more expensive than the
366	performance gained.
367	*/
368
369	end= len > `16` ? skip_trailing_space(key, len) : key + len;
370
371	/*
372	We removed all trailing characters that are binary equal to space 0x20.
373	Now remove all trailing characters that have weights equal to space.
374	Some 8bit simple collations may have such characters:
375	- cp1250_general_ci 0xA0 NO-BREAK SPACE == 0x20 SPACE
376	- cp1251_ukrainian_ci 0x60 GRAVE ACCENT == 0x20 SPACE
377	- koi8u_general_ci 0x60 GRAVE ACCENT == 0x20 SPACE
378	*/
379
380	for ( ; key < end ; )
381	{
382	if (sort_order[*--end] != space_weight)
383	{
384	end++;
385	break;
386	}
387	}
388	my_hash_sort_simple_nopad(cs, key, end - key, nr1, nr2);
389	}
390
391
392	long my_strntol_8bit(CHARSET_INFO *cs,
393	const char nptr, size_t l, int* base,
394	char *endptr, int* *err)
395	{
396	int negative;
397	register uint32 cutoff;
398	register uint cutlim;
399	register uint32 i;
400	register const char *s;
401	register uchar c;
402	const char save, e;
403	int overflow;
404
405	err= `0`; /* Initialize error indicator /
406
407	s = nptr;
408	e = nptr+l;
409
410	for ( ; s<e && my_isspace(cs, *s) ; s++);
411
412	if (s == e)
413	{
414	goto noconv;
415	}
416
417	/ Check for a sign. /
418	if (*s == `'-'`)
419	{
420	negative = `1`;
421	++s;
422	}
423	else if (*s == `'+'`)
424	{
425	negative = `0`;
426	++s;
427	}
428	else
429	negative = `0`;
430
431	save = s;
432	cutoff = ((uint32)~`0L`) / (uint32) base;
433	cutlim = (uint) (((uint32)~`0L`) % (uint32) base);
434
435	overflow = `0`;
436	i = `0`;
437	for (c = s; s != e; c = ++s)
438	{
439	if (c>=`'0'` && c<=`'9'`)
440	c -= `'0'`;
441	else if (c>=`'A'` && c<=`'Z'`)
442	c = c - `'A'` + `10`;
443	else if (c>=`'a'` && c<=`'z'`)
444	c = c - `'a'` + `10`;
445	else
446	break;
447	if (c >= base)
448	break;
449	if (i > cutoff \|\| (i == cutoff && c > cutlim))
450	overflow = `1`;
451	else
452	{
453	i *= (uint32) base;
454	i += c;
455	}
456	}
457
458	if (s == save)
459	goto noconv;
460
461	if (endptr != NULL)
462	endptr = (char* *) s;
463
464	if (negative)
465	{
466	if (i > (uint32) INT_MIN32)
467	overflow = `1`;
468	}
469	else if (i > INT_MAX32)
470	overflow = `1`;
471
472	if (overflow)
473	{
474	err[`0`]= ERANGE;
475	return negative ? INT_MIN32 : INT_MAX32;
476	}
477
478	return (negative ? -((long) i) : (long) i);
479
480	noconv:
481	err[`0`]= EDOM;
482	if (endptr != NULL)
483	endptr = (char* *) nptr;
484	return `0L`;
485	}
486
487
488	ulong my_strntoul_8bit(CHARSET_INFO *cs,
489	const char nptr, size_t l, int* base,
490	char *endptr, int* *err)
491	{
492	int negative;
493	register uint32 cutoff;
494	register uint cutlim;
495	register uint32 i;
496	register const char *s;
497	register uchar c;
498	const char save, e;
499	int overflow;
500
501	err= `0`; /* Initialize error indicator /
502
503	s = nptr;
504	e = nptr+l;
505
506	for( ; s<e && my_isspace(cs, *s); s++);
507
508	if (s==e)
509	{
510	goto noconv;
511	}
512
513	if (*s == `'-'`)
514	{
515	negative = `1`;
516	++s;
517	}
518	else if (*s == `'+'`)
519	{
520	negative = `0`;
521	++s;
522	}
523	else
524	negative = `0`;
525
526	save = s;
527	cutoff = ((uint32)~`0L`) / (uint32) base;
528	cutlim = (uint) (((uint32)~`0L`) % (uint32) base);
529	overflow = `0`;
530	i = `0`;
531
532	for (c = s; s != e; c = ++s)
533	{
534	if (c>=`'0'` && c<=`'9'`)
535	c -= `'0'`;
536	else if (c>=`'A'` && c<=`'Z'`)
537	c = c - `'A'` + `10`;
538	else if (c>=`'a'` && c<=`'z'`)
539	c = c - `'a'` + `10`;
540	else
541	break;
542	if (c >= base)
543	break;
544	if (i > cutoff \|\| (i == cutoff && c > cutlim))
545	overflow = `1`;
546	else
547	{
548	i *= (uint32) base;
549	i += c;
550	}
551	}
552
553	if (s == save)
554	goto noconv;
555
556	if (endptr != NULL)
557	endptr = (char* *) s;
558
559	if (overflow)
560	{
561	err[`0`]= ERANGE;
562	return (~(uint32) `0`);
563	}
564
565	return (negative ? -((long) i) : (long) i);
566
567	noconv:
568	err[`0`]= EDOM;
569	if (endptr != NULL)
570	endptr = (char* *) nptr;
571	return `0L`;
572	}
573
574
575	longlong my_strntoll_8bit(CHARSET_INFO cs __attribute__*((unused)),
576	const char nptr, size_t l, int* base,
577	char *endptr,int* *err)
578	{
579	int negative;
580	register ulonglong cutoff;
581	register uint cutlim;
582	register ulonglong i;
583	register const char s, e;
584	const char *save;
585	int overflow;
586
587	err= `0`; /* Initialize error indicator /
588
589	s = nptr;
590	e = nptr+l;
591
592	for(; s<e && my_isspace(cs,*s); s++);
593
594	if (s == e)
595	{
596	goto noconv;
597	}
598
599	if (*s == `'-'`)
600	{
601	negative = `1`;
602	++s;
603	}
604	else if (*s == `'+'`)
605	{
606	negative = `0`;
607	++s;
608	}
609	else
610	negative = `0`;
611
612	save = s;
613
614	cutoff = (~(ulonglong) `0`) / (unsigned long int) base;
615	cutlim = (uint) ((~(ulonglong) `0`) % (unsigned long int) base);
616
617	overflow = `0`;
618	i = `0`;
619	for ( ; s != e; s++)
620	{
621	register uchar c= *s;
622	if (c>=`'0'` && c<=`'9'`)
623	c -= `'0'`;
624	else if (c>=`'A'` && c<=`'Z'`)
625	c = c - `'A'` + `10`;
626	else if (c>=`'a'` && c<=`'z'`)
627	c = c - `'a'` + `10`;
628	else
629	break;
630	if (c >= base)
631	break;
632	if (i > cutoff \|\| (i == cutoff && c > cutlim))
633	overflow = `1`;
634	else
635	{
636	i *= (ulonglong) base;
637	i += c;
638	}
639	}
640
641	if (s == save)
642	goto noconv;
643
644	if (endptr != NULL)
645	endptr = (char* *) s;
646
647	if (negative)
648	{
649	if (i > (ulonglong) LONGLONG_MIN)
650	overflow = `1`;
651	}
652	else if (i > (ulonglong) LONGLONG_MAX)
653	overflow = `1`;
654
655	if (overflow)
656	{
657	err[`0`]= ERANGE;
658	return negative ? LONGLONG_MIN : LONGLONG_MAX;
659	}
660
661	return (negative ? -((longlong) i) : (longlong) i);
662
663	noconv:
664	err[`0`]= EDOM;
665	if (endptr != NULL)
666	endptr = (char* *) nptr;
667	return `0L`;
668	}
669
670
671	ulonglong my_strntoull_8bit(CHARSET_INFO *cs,
672	const char nptr, size_t l, int* base,
673	char *endptr, int* *err)
674	{
675	int negative;
676	register ulonglong cutoff;
677	register uint cutlim;
678	register ulonglong i;
679	register const char s, e;
680	const char *save;
681	int overflow;
682
683	err= `0`; /* Initialize error indicator /
684
685	s = nptr;
686	e = nptr+l;
687
688	for(; s<e && my_isspace(cs,*s); s++);
689
690	if (s == e)
691	{
692	goto noconv;
693	}
694
695	if (*s == `'-'`)
696	{
697	negative = `1`;
698	++s;
699	}
700	else if (*s == `'+'`)
701	{
702	negative = `0`;
703	++s;
704	}
705	else
706	negative = `0`;
707
708	save = s;
709
710	cutoff = (~(ulonglong) `0`) / (unsigned long int) base;
711	cutlim = (uint) ((~(ulonglong) `0`) % (unsigned long int) base);
712
713	overflow = `0`;
714	i = `0`;
715	for ( ; s != e; s++)
716	{
717	register uchar c= *s;
718
719	if (c>=`'0'` && c<=`'9'`)
720	c -= `'0'`;
721	else if (c>=`'A'` && c<=`'Z'`)
722	c = c - `'A'` + `10`;
723	else if (c>=`'a'` && c<=`'z'`)
724	c = c - `'a'` + `10`;
725	else
726	break;
727	if (c >= base)
728	break;
729	if (i > cutoff \|\| (i == cutoff && c > cutlim))
730	overflow = `1`;
731	else
732	{
733	i *= (ulonglong) base;
734	i += c;
735	}
736	}
737
738	if (s == save)
739	goto noconv;
740
741	if (endptr != NULL)
742	endptr = (char* *) s;
743
744	if (overflow)
745	{
746	err[`0`]= ERANGE;
747	return (~(ulonglong) `0`);
748	}
749
750	return (negative ? -((longlong) i) : (longlong) i);
751
752	noconv:
753	err[`0`]= EDOM;
754	if (endptr != NULL)
755	endptr = (char* *) nptr;
756	return `0L`;
757	}
758
759
760	/*
761	Read double from string
762
763	SYNOPSIS:
764	my_strntod_8bit()
765	cs Character set information
766	str String to convert to double
767	length Optional length for string.
768	end result pointer to end of converted string
769	err Error number if failed conversion
770
771	NOTES:
772	If length is not INT_MAX32 or str[length] != 0 then the given str must
773	be writeable
774	If length == INT_MAX32 the str must be \0 terminated.
775
776	It's implemented this way to save a buffer allocation and a memory copy.
777
778	RETURN
779	Value of number in string
780	*/
781
782
783	double my_strntod_8bit(CHARSET_INFO cs __attribute__*((unused)),
784	char *str, size_t length,
785	char *end, int* *err)
786	{
787	if (length == INT_MAX32)
788	length= `65535`; / Should be big enough /
789	*end= str + length;
790	return my_strtod(str, end, err);
791	}
792
793
794	/*
795	This is a fast version optimized for the case of radix 10 / -10
796
797	Assume len >= 1
798	*/
799
800	size_t my_long10_to_str_8bit(CHARSET_INFO cs __attribute__*((unused)),
801	char dst, size_t len, int* radix, long int val)
802	{
803	char buffer[`66`];
804	register char p, e;
805	long int new_val;
806	uint sign=`0`;
807	unsigned long int uval = (unsigned long int) val;
808
809	e = p = &buffer[sizeof(buffer)-`1`];
810	*p= `0`;
811
812	if (radix < `0`)
813	{
814	if (val < `0`)
815	{
816	/ Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). /
817	uval= (unsigned long int)`0` - uval;
818	*dst++= `'-'`;
819	len--;
820	sign= `1`;
821	}
822	}
823
824	new_val = (long) (uval / `10`);
825	--p = `'0'`+ (char) (uval - (unsigned* long) new_val * `10`);
826	val = new_val;
827
828	while (val != `0`)
829	{
830	new_val=val/`10`;
831	--p = `'0'` + (char) (val-new_val`10`);
832	val= new_val;
833	}
834
835	len= MY_MIN(len, (size_t) (e-p));
836	memcpy(dst, p, len);
837	return len+sign;
838	}
839
840
841	size_t my_longlong10_to_str_8bit(CHARSET_INFO cs __attribute__*((unused)),
842	char dst, size_t len, int* radix,
843	longlong val)
844	{
845	char buffer[`65`];
846	register char p, e;
847	long long_val;
848	uint sign= `0`;
849	ulonglong uval = (ulonglong)val;
850
851	if (radix < `0`)
852	{
853	if (val < `0`)
854	{
855	/ Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). /
856	uval = (ulonglong)`0` - uval;
857	*dst++= `'-'`;
858	len--;
859	sign= `1`;
860	}
861	}
862
863	e = p = &buffer[sizeof(buffer)-`1`];
864	*p= `0`;
865
866	if (uval == `0`)
867	{
868	*--p= `'0'`;
869	len= `1`;
870	goto cnv;
871	}
872
873	while (uval > (ulonglong) LONG_MAX)
874	{
875	ulonglong quo= uval/(uint) `10`;
876	uint rem= (uint) (uval- quo* (uint) `10`);
877	*--p = `'0'` + rem;
878	uval= quo;
879	}
880
881	long_val= (long) uval;
882	while (long_val != `0`)
883	{
884	long quo= long_val/`10`;
885	--p = (char) (`'0'` + (long_val - quo`10`));
886	long_val= quo;
887	}
888
889	len= MY_MIN(len, (size_t) (e-p));
890	cnv:
891	memcpy(dst, p, len);
892	return len+sign;
893	}
894
895
896	/*
897	** Compare string against string with wildcard
898	** 0 if matched
899	** -1 if not matched with wildcard
900	** 1 if matched with wildcard
901	*/
902
903	#ifdef LIKE_CMP_TOUPPER
904	#define likeconv(s,A) (uchar) my_toupper(s,A)
905	#else
906	#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
907	#endif
908
909	#define INC_PTR(cs,A,B) (A)++
910
911
912	static
913	int my_wildcmp_8bit_impl(CHARSET_INFO *cs,
914	const char str,const* char *str_end,
915	const char wildstr,const* char *wildend,
916	int escape, int w_one, int w_many, int recurse_level)
917	{
918	int result= -`1`; / Not found, using wildcards /
919
920	if (my_string_stack_guard && my_string_stack_guard(recurse_level))
921	return `1`;
922	while (wildstr != wildend)
923	{
924	while (wildstr != w_many && wildstr != w_one)
925	{
926	if (*wildstr == escape && wildstr+`1` != wildend)
927	wildstr++;
928
929	if (str == str_end \|\| likeconv(cs,wildstr++) != likeconv(cs,str++))
930	return(`1`); / No match /
931	if (wildstr == wildend)
932	return(str != str_end); / Match if both are at end /
933	result=`1`; / Found an anchor char /
934	}
935	if (*wildstr == w_one)
936	{
937	do
938	{
939	if (str == str_end) / Skip one char if possible /
940	return(result);
941	INC_PTR(cs,str,str_end);
942	} while (++wildstr < wildend && *wildstr == w_one);
943	if (wildstr == wildend)
944	break;
945	}
946	if (*wildstr == w_many)
947	{ / Found w_many /
948	uchar cmp;
949
950	wildstr++;
951	/ Remove any '%' and '_' from the wild search string /
952	for (; wildstr != wildend ; wildstr++)
953	{
954	if (*wildstr == w_many)
955	continue;
956	if (*wildstr == w_one)
957	{
958	if (str == str_end)
959	return(-`1`);
960	INC_PTR(cs,str,str_end);
961	continue;
962	}
963	break; / Not a wild character /
964	}
965	if (wildstr == wildend)
966	return(`0`); / Ok if w_many is last /
967	if (str == str_end)
968	return(-`1`);
969
970	if ((cmp= *wildstr) == escape && wildstr+`1` != wildend)
971	cmp= *++wildstr;
972
973	INC_PTR(cs,wildstr,wildend); / This is compared trough cmp /
974	cmp=likeconv(cs,cmp);
975	do
976	{
977	/*
978	Find the next character in the subject string equal to 'cmp', then
979	check recursively my_wildcmp_8bit_impl() for the pattern remainder.
980	*/
981	while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
982	str++;
983	if (str++ == str_end)
984	return(-`1`); / 'cmp' was not found in the subject string /
985	{
986	int tmp=my_wildcmp_8bit_impl(cs,str,str_end,
987	wildstr,wildend,escape,w_one,
988	w_many, recurse_level+`1`);
989	if (tmp <= `0`)
990	return(tmp);
991	}
992	/*
993	The recursion call did not match. But it returned 1, which means
994	the pattern remainder has some non-special characters.
995	Continue, there is a chance that we'll find another 'cmp'
996	at a different position in the subject string.
997	*/
998	} while (str != str_end);
999	return(-`1`);
1000	}
1001	}
1002	return(str != str_end ? `1` : `0`);
1003	}
1004
1005	int my_wildcmp_8bit(CHARSET_INFO *cs,
1006	const char str,const* char *str_end,
1007	const char wildstr,const* char *wildend,
1008	int escape, int w_one, int w_many)
1009	{
1010	return my_wildcmp_8bit_impl(cs, str, str_end,
1011	wildstr, wildend,
1012	escape, w_one, w_many, `1`);
1013	}
1014
1015
1016	/*
1017	** Calculate min_str and max_str that ranges a LIKE string.
1018	** Arguments:
1019	** ptr Pointer to LIKE string.
1020	** ptr_length Length of LIKE string.
1021	** escape Escape character in LIKE. (Normally '\').
1022	** All escape characters should be removed from min_str and max_str
1023	** res_length Length of min_str and max_str.
1024	** min_str Smallest case sensitive string that ranges LIKE.
1025	** Should be space padded to res_length.
1026	** max_str Largest case sensitive string that ranges LIKE.
1027	** Normally padded with the biggest character sort value.
1028	**
1029	** The function should return 0 if ok and 1 if the LIKE string can't be
1030	** optimized !
1031	*/
1032
1033	my_bool my_like_range_simple(CHARSET_INFO *cs,
1034	const char *ptr, size_t ptr_length,
1035	pbool escape, pbool w_one, pbool w_many,
1036	size_t res_length,
1037	char min_str,char* *max_str,
1038	size_t min_length, size_t max_length)
1039	{
1040	const char *end= ptr + ptr_length;
1041	char *min_org=min_str;
1042	char *min_end=min_str+res_length;
1043	size_t charlen= res_length / cs->mbmaxlen;
1044
1045	for (; ptr != end && min_str != min_end && charlen > `0` ; ptr++, charlen--)
1046	{
1047	if (*ptr == escape && ptr+`1` != end)
1048	{
1049	ptr++; / Skip escape /
1050	min_str++= max_str++ = *ptr;
1051	continue;
1052	}
1053	if (ptr == w_one) /* '_' in SQL /
1054	{
1055	min_str++=`'\0'`; /* This should be min char /
1056	max_str++= (char*) cs->max_sort_char;
1057	continue;
1058	}
1059	if (ptr == w_many) /* '%' in SQL /
1060	{
1061	/ Calculate length of keys /
1062	*min_length= (cs->state & (MY_CS_BINSORT \| MY_CS_NOPAD)) ?
1063	(size_t) (min_str - min_org) :
1064	res_length;
1065	*max_length= res_length;
1066	do
1067	{
1068	*min_str++= `0`;
1069	max_str++= (char*) cs->max_sort_char;
1070	} while (min_str != min_end);
1071	return `0`;
1072	}
1073	min_str++= max_str++ = *ptr;
1074	}
1075
1076	min_length= max_length = (size_t) (min_str - min_org);
1077	while (min_str != min_end)
1078	min_str++= max_str++ = `' '`; / Because if key compression /
1079	return `0`;
1080	}
1081
1082
1083	size_t my_scan_8bit(CHARSET_INFO cs, const* char str, const* char end, int* sq)
1084	{
1085	const char *str0= str;
1086	switch (sq)
1087	{
1088	case MY_SEQ_INTTAIL:
1089	if (*str == `'.'`)
1090	{
1091	for(str++ ; str != end && *str == `'0'` ; str++);
1092	return (size_t) (str - str0);
1093	}
1094	return `0`;
1095
1096	case MY_SEQ_SPACES:
1097	for ( ; str < end ; str++)
1098	{
1099	if (!my_isspace(cs,*str))
1100	break;
1101	}
1102	return (size_t) (str - str0);
1103	case MY_SEQ_NONSPACES:
1104	for ( ; str < end ; str++)
1105	{
1106	if (my_isspace(cs, *str))
1107	break;
1108	}
1109	return (size_t) (str - str0);
1110	default:
1111	return `0`;
1112	}
1113	}
1114
1115
1116	void my_fill_8bit(CHARSET_INFO cs __attribute__*((unused)),
1117	char s, size_t l, int* fill)
1118	{
1119	bfill((uchar*) s,l,fill);
1120	}
1121
1122
1123	size_t my_numchars_8bit(CHARSET_INFO cs __attribute__*((unused)),
1124	const char b, const* char *e)
1125	{
1126	return (size_t) (e - b);
1127	}
1128
1129
1130	size_t my_numcells_8bit(CHARSET_INFO cs __attribute__*((unused)),
1131	const char b, const* char *e)
1132	{
1133	return (size_t) (e - b);
1134	}
1135
1136
1137	size_t my_charpos_8bit(CHARSET_INFO cs __attribute__*((unused)),
1138	const char b __attribute__*((unused)),
1139	const char e __attribute__*((unused)),
1140	size_t pos)
1141	{
1142	return pos;
1143	}
1144
1145
1146	size_t
1147	my_well_formed_char_length_8bit(CHARSET_INFO cs __attribute__*((unused)),
1148	const char start, const* char *end,
1149	size_t nchars, MY_STRCOPY_STATUS *status)
1150	{
1151	size_t nbytes= (size_t) (end - start);
1152	size_t res= MY_MIN(nbytes, nchars);
1153	status->m_well_formed_error_pos= NULL;
1154	status->m_source_end_pos= start + res;
1155	return res;
1156	}
1157
1158
1159	/*
1160	Copy a 8-bit string. Not more than "nchars" character are copied.
1161	*/
1162	size_t
1163	my_copy_8bit(CHARSET_INFO cs __attribute__*((unused)),
1164	char *dst, size_t dst_length,
1165	const char *src, size_t src_length,
1166	size_t nchars, MY_STRCOPY_STATUS *status)
1167	{
1168	set_if_smaller(src_length, dst_length);
1169	set_if_smaller(src_length, nchars);
1170	if (src_length)
1171	memmove(dst, src, src_length);
1172	status->m_source_end_pos= src + src_length;
1173	status->m_well_formed_error_pos= NULL;
1174	return src_length;
1175	}
1176
1177
1178	size_t my_lengthsp_8bit(CHARSET_INFO cs __attribute__*((unused)),
1179	const char *ptr, size_t length)
1180	{
1181	const char *end;
1182	end= (const char ) skip_trailing_space((const* uchar *)ptr, length);
1183	return (size_t) (end-ptr);
1184	}
1185
1186
1187	uint my_instr_simple(CHARSET_INFO *cs,
1188	const char *b, size_t b_length,
1189	const char *s, size_t s_length,
1190	my_match_t *match, uint nmatch)
1191	{
1192	register const uchar str, search, end, search_end;
1193
1194	if (s_length <= b_length)
1195	{
1196	if (!s_length)
1197	{
1198	if (nmatch)
1199	{
1200	match->beg= `0`;
1201	match->end= `0`;
1202	match->mb_len= `0`;
1203	}
1204	return `1`; / Empty string is always found /
1205	}
1206
1207	str= (const uchar*) b;
1208	search= (const uchar*) s;
1209	end= (const uchar*) b+b_length-s_length+`1`;
1210	search_end= (const uchar*) s + s_length;
1211
1212	skip:
1213	while (str != end)
1214	{
1215	if (cs->sort_order[str++] == cs->sort_order[search])
1216	{
1217	register const uchar i,j;
1218
1219	i= str;
1220	j= search+`1`;
1221
1222	while (j != search_end)
1223	if (cs->sort_order[i++] != cs->sort_order[j++])
1224	goto skip;
1225
1226	if (nmatch > `0`)
1227	{
1228	match[`0`].beg= `0`;
1229	match[`0`].end= (uint) (str- (const uchar*)b-`1`);
1230	match[`0`].mb_len= match[`0`].end;
1231
1232	if (nmatch > `1`)
1233	{
1234	match[`1`].beg= match[`0`].end;
1235	match[`1`].end= (uint)(match[`0`].end+s_length);
1236	match[`1`].mb_len= match[`1`].end-match[`1`].beg;
1237	}
1238	}
1239	return `2`;
1240	}
1241	}
1242	}
1243	return `0`;
1244	}
1245
1246
1247	typedef struct
1248	{
1249	int nchars;
1250	struct my_uni_idx_st uidx;
1251	} uni_idx;
1252
1253	#define PLANE_SIZE 0x100
1254	#define PLANE_NUM 0x100
1255	#define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM)
1256
1257	static int pcmp(const void * f, const void * s)
1258	{
1259	const uni_idx F= (const* uni_idx*) f;
1260	const uni_idx S= (const* uni_idx*) s;
1261	int res;
1262
1263	if (!(res=((S->nchars)-(F->nchars))))
1264	res=((F->uidx.from)-(S->uidx.to));
1265	return res;
1266	}
1267
1268	static my_bool
1269	create_fromuni(struct charset_info_st *cs,
1270	MY_CHARSET_LOADER *loader)
1271	{
1272	uni_idx idx[PLANE_NUM];
1273	int i,n;
1274
1275	/*
1276	Check that Unicode map is loaded.
1277	It can be not loaded when the collation is
1278	listed in Index.xml but not specified
1279	in the character set specific XML file.
1280	*/
1281	if (!cs->tab_to_uni)
1282	return TRUE;
1283
1284	/ Clear plane statistics /
1285	bzero(idx,sizeof(idx));
1286
1287	/ Count number of characters in each plane /
1288	for (i=`0`; i< `0x100`; i++)
1289	{
1290	uint16 wc=cs->tab_to_uni[i];
1291	int pl= PLANE_NUMBER(wc);
1292
1293	if (wc \|\| !i)
1294	{
1295	if (!idx[pl].nchars)
1296	{
1297	idx[pl].uidx.from=wc;
1298	idx[pl].uidx.to=wc;
1299	}else
1300	{
1301	idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
1302	idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
1303	}
1304	idx[pl].nchars++;
1305	}
1306	}
1307
1308	/ Sort planes in descending order /
1309	qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
1310
1311	for (i=`0`; i < PLANE_NUM; i++)
1312	{
1313	int ch,numchars;
1314	uchar *tab;
1315
1316	/ Skip empty plane /
1317	if (!idx[i].nchars)
1318	break;
1319
1320	numchars=idx[i].uidx.to-idx[i].uidx.from+`1`;
1321	if (!(idx[i].uidx.tab= tab= (uchar*)
1322	(loader->once_alloc) (numchars *
1323	sizeof(*idx[i].uidx.tab))))
1324	return TRUE;
1325
1326	bzero(tab,numchars*sizeof(*tab));
1327
1328	for (ch=`1`; ch < PLANE_SIZE; ch++)
1329	{
1330	uint16 wc=cs->tab_to_uni[ch];
1331	if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
1332	{
1333	int ofs= wc - idx[i].uidx.from;
1334	if (!tab[ofs] \|\| tab[ofs] > `0x7F`) / Prefer ASCII/
1335	{
1336	/*
1337	Some character sets can have double encoding. For example,
1338	in ARMSCII8, the following characters are encoded twice:
1339
1340	Encoding#1 Encoding#2 Unicode Character Name
1341	---------- ---------- ------- --------------
1342	0x27 0xFF U+0027 APOSTROPHE
1343	0x28 0xA5 U+0028 LEFT PARENTHESIS
1344	0x29 0xA4 U+0029 RIGHT PARENTHESIS
1345	0x2C 0xAB U+002C COMMA
1346	0x2D 0xAC U+002D HYPHEN-MINUS
1347	0x2E 0xA9 U+002E FULL STOP
1348
1349	That is, both 0x27 and 0xFF convert to Unicode U+0027.
1350	When converting back from Unicode to ARMSCII,
1351	we prefer the ASCII range, that is we want U+0027
1352	to convert to 0x27 rather than to 0xFF.
1353	*/
1354	tab[ofs]= ch;
1355	}
1356	}
1357	}
1358	}
1359
1360	/ Allocate and fill reverse table for each plane /
1361	n=i;
1362	if (!(cs->tab_from_uni= (MY_UNI_IDX *)
1363	(loader->once_alloc)(sizeof(MY_UNI_IDX) * (n + `1`))))
1364	return TRUE;
1365
1366	for (i=`0`; i< n; i++)
1367	((struct my_uni_idx_st*)cs->tab_from_uni)[i]= idx[i].uidx;
1368
1369	/ Set end-of-list marker /
1370	bzero((char) &cs->tab_from_uni[i],sizeof*(MY_UNI_IDX));
1371	return FALSE;
1372	}
1373
1374
1375	/*
1376	Detect if a character set is 8bit,
1377	and it is pure ascii, i.e. doesn't have
1378	characters outside U+0000..U+007F
1379	This functions is shared between "conf_to_src"
1380	and dynamic charsets loader in "mysqld".
1381	*/
1382	static my_bool
1383	my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
1384	{
1385	size_t code;
1386	if (!cs->tab_to_uni)
1387	return `0`;
1388	for (code= `0`; code < `256`; code++)
1389	{
1390	if (cs->tab_to_uni[code] > `0x7F`)
1391	return `0`;
1392	}
1393	return `1`;
1394	}
1395
1396
1397	/*
1398	Shared function between conf_to_src and mysys.
1399	Check if a 8bit character set is compatible with
1400	ascii on the range 0x00..0x7F.
1401	*/
1402	static my_bool
1403	my_charset_is_ascii_compatible(CHARSET_INFO *cs)
1404	{
1405	uint i;
1406	if (!cs->tab_to_uni)
1407	return `1`;
1408	for (i= `0`; i < `128`; i++)
1409	{
1410	if (cs->tab_to_uni[i] != i)
1411	return `0`;
1412	}
1413	return `1`;
1414	}
1415
1416
1417	uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs)
1418	{
1419	uint flags= `0`;
1420	if (my_charset_is_8bit_pure_ascii(cs))
1421	flags\|= MY_CS_PUREASCII;
1422	if (!my_charset_is_ascii_compatible(cs))
1423	flags\|= MY_CS_NONASCII;
1424	return flags;
1425	}
1426
1427
1428	/*
1429	Check if case sensitive sort order: A < a < B.
1430	We need MY_CS_FLAG for regex library, and for
1431	case sensitivity flag for 5.0 client protocol,
1432	to support isCaseSensitive() method in JDBC driver
1433	*/
1434	uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs)
1435	{
1436	uint flags= `0`;
1437	if (cs->sort_order && cs->sort_order[`'A'`] < cs->sort_order[`'a'`] &&
1438	cs->sort_order[`'a'`] < cs->sort_order[`'B'`])
1439	flags\|= MY_CS_CSSORT;
1440	return flags;
1441	}
1442
1443
1444	static my_bool
1445	my_cset_init_8bit(struct charset_info_st cs, MY_CHARSET_LOADER loader)
1446	{
1447	cs->state\|= my_8bit_charset_flags_from_data(cs);
1448	cs->caseup_multiply= `1`;
1449	cs->casedn_multiply= `1`;
1450	cs->pad_char= `' '`;
1451	if (!cs->to_lower \|\| !cs->to_upper \|\| !cs->ctype \|\| !cs->tab_to_uni)
1452	return TRUE;
1453	return create_fromuni(cs, loader);
1454	}
1455
1456	static void set_max_sort_char(struct charset_info_st *cs)
1457	{
1458	uchar max_char;
1459	uint i;
1460
1461	if (!cs->sort_order)
1462	return;
1463
1464	max_char=cs->sort_order[(uchar) cs->max_sort_char];
1465	for (i= `0`; i < `256`; i++)
1466	{
1467	if ((uchar) cs->sort_order[i] > max_char)
1468	{
1469	max_char=(uchar) cs->sort_order[i];
1470	cs->max_sort_char= i;
1471	}
1472	}
1473	}
1474
1475	static my_bool my_coll_init_simple(struct charset_info_st *cs,
1476	MY_CHARSET_LOADER loader __attribute__*((unused)))
1477	{
1478	if (!cs->sort_order)
1479	return TRUE;
1480	cs->state\|= my_8bit_collation_flags_from_data(cs);
1481	set_max_sort_char(cs);
1482	return FALSE;
1483	}
1484
1485
1486	longlong my_strtoll10_8bit(CHARSET_INFO cs __attribute__*((unused)),
1487	const char nptr, char* *endptr, int* *error)
1488	{
1489	return my_strtoll10(nptr, endptr, error);
1490	}
1491
1492
1493	int my_mb_ctype_8bit(CHARSET_INFO cs, int* *ctype,
1494	const uchar s, const* uchar *e)
1495	{
1496	if (s >= e)
1497	{
1498	*ctype= `0`;
1499	return MY_CS_TOOSMALL;
1500	}
1501	ctype= cs->ctype[s + `1`];
1502	return `1`;
1503	}
1504
1505
1506	#define CUTOFF (ULONGLONG_MAX / 10)
1507	#define CUTLIM (ULONGLONG_MAX % 10)
1508	#define DIGITS_IN_ULONGLONG 20
1509
1510	static ulonglong d10[DIGITS_IN_ULONGLONG]=
1511	{
1512	`1`,
1513	`10`,
1514	`100`,
1515	`1000`,
1516	`10000`,
1517	`100000`,
1518	`1000000`,
1519	`10000000`,
1520	`100000000`,
1521	`1000000000`,
1522	`10000000000ULL`,
1523	`100000000000ULL`,
1524	`1000000000000ULL`,
1525	`10000000000000ULL`,
1526	`100000000000000ULL`,
1527	`1000000000000000ULL`,
1528	`10000000000000000ULL`,
1529	`100000000000000000ULL`,
1530	`1000000000000000000ULL`,
1531	`10000000000000000000ULL`
1532	};
1533
1534
1535	/*
1536
1537	Convert a string to unsigned long long integer value
1538	with rounding.
1539
1540	SYNOPSIS
1541	my_strntoull10_8bit()
1542	cs in pointer to character set
1543	str in pointer to the string to be converted
1544	length in string length
1545	unsigned_flag in whether the number is unsigned
1546	endptr out pointer to the stop character
1547	error out returned error code
1548
1549	DESCRIPTION
1550	This function takes the decimal representation of integer number
1551	from string str and converts it to an signed or unsigned
1552	long long integer value.
1553	Space characters and tab are ignored.
1554	A sign character might precede the digit characters.
1555	The number may have any number of pre-zero digits.
1556	The number may have decimal point and exponent.
1557	Rounding is always done in "away from zero" style:
1558	0.5 -> 1
1559	-0.5 -> -1
1560
1561	The function stops reading the string str after "length" bytes
1562	or at the first character that is not a part of correct number syntax:
1563
1564	<signed numeric literal> ::=
1565	[ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ]
1566
1567	<exact numeric literal> ::=
1568	<unsigned integer> [ <period> [ <unsigned integer> ] ]
1569	\| <period> <unsigned integer>
1570	<unsigned integer> ::= <digit>...
1571
1572	RETURN VALUES
1573	Value of string as a signed/unsigned longlong integer
1574
1575	endptr cannot be NULL. The function will store the end pointer
1576	to the stop character here.
1577
1578	The error parameter contains information how things went:
1579	0 ok
1580	ERANGE If the the value of the converted number is out of range
1581	In this case the return value is:
1582	- ULONGLONG_MAX if unsigned_flag and the number was too big
1583	- 0 if unsigned_flag and the number was negative
1584	- LONGLONG_MAX if no unsigned_flag and the number is too big
1585	- LONGLONG_MIN if no unsigned_flag and the number it too big negative
1586
1587	EDOM If the string didn't contain any digits.
1588	In this case the return value is 0.
1589	*/
1590
1591	ulonglong
1592	my_strntoull10rnd_8bit(CHARSET_INFO cs __attribute__*((unused)),
1593	const char str, size_t length, int* unsigned_flag,
1594	char *endptr, int* *error)
1595	{
1596	const char dot, end9, beg, end= str + length;
1597	ulonglong ull;
1598	ulong ul;
1599	uchar ch;
1600	int shift= `0`, digits= `0`, negative, addon;
1601
1602	/ Skip leading spaces and tabs /
1603	for ( ; str < end && (str == `' '` \|\| str == `'\t'`) ; str++);
1604
1605	if (str >= end)
1606	goto ret_edom;
1607
1608	if ((negative= (str == `'-'`)) \|\| str==`'+'`) / optional sign /
1609	{
1610	if (++str == end)
1611	goto ret_edom;
1612	}
1613
1614	beg= str;
1615	end9= (str + `9`) > end ? end : (str + `9`);
1616	/ Accumulate small number into ulong, for performance purposes /
1617	for (ul= `0` ; str < end9 && (ch= (uchar) (*str - `'0'`)) < `10`; str++)
1618	{
1619	ul= ul * `10` + ch;
1620	}
1621
1622	if (str >= end) / Small number without dots and expanents /
1623	{
1624	endptr= (char**) str;
1625	if (negative)
1626	{
1627	if (unsigned_flag)
1628	{
1629	*error= ul ? MY_ERRNO_ERANGE : `0`;
1630	return `0`;
1631	}
1632	else
1633	{
1634	*error= `0`;
1635	return (ulonglong) (longlong) -(long) ul;
1636	}
1637	}
1638	else
1639	{
1640	*error=`0`;
1641	return (ulonglong) ul;
1642	}
1643	}
1644
1645	digits= (int) (str - beg);
1646
1647	/ Continue to accumulate into ulonglong /
1648	for (dot= NULL, ull= ul; str < end; str++)
1649	{
1650	if ((ch= (uchar) (*str - `'0'`)) < `10`)
1651	{
1652	if (ull < CUTOFF \|\| (ull == CUTOFF && ch <= CUTLIM))
1653	{
1654	ull= ull * `10` + ch;
1655	digits++;
1656	continue;
1657	}
1658	/*
1659	Adding the next digit would overflow.
1660	Remember the next digit in "addon", for rounding.
1661	Scan all digits with an optional single dot.
1662	*/
1663	if (ull == CUTOFF)
1664	{
1665	ull= ULONGLONG_MAX;
1666	addon= `1`;
1667	str++;
1668	}
1669	else
1670	addon= (*str >= `'5'`);
1671	if (!dot)
1672	{
1673	for ( ; str < end && (ch= (uchar) (*str - `'0'`)) < `10`; shift++, str++);
1674	if (str < end && *str == `'.'`)
1675	{
1676	str++;
1677	for ( ; str < end && (ch= (uchar) (*str - `'0'`)) < `10`; str++);
1678	}
1679	}
1680	else
1681	{
1682	shift= (int) (dot - str);
1683	for ( ; str < end && (ch= (uchar) (*str - `'0'`)) < `10`; str++);
1684	}
1685	goto exp;
1686	}
1687
1688	if (*str == `'.'`)
1689	{
1690	if (dot)
1691	{
1692	/ The second dot character /
1693	addon= `0`;
1694	goto exp;
1695	}
1696	else
1697	{
1698	dot= str + `1`;
1699	}
1700	continue;
1701	}
1702
1703	/ Unknown character, exit the loop /
1704	break;
1705	}
1706	shift= dot ? (int)(dot - str) : `0`; / Right shift /
1707	addon= `0`;
1708
1709	exp: / [ E [ <sign> ] <unsigned integer> ] /
1710
1711	if (!digits)
1712	{
1713	str= beg;
1714	goto ret_edom;
1715	}
1716
1717	if (str < end && (str == `'e'` \|\| str == `'E'`))
1718	{
1719	str++;
1720	if (str < end)
1721	{
1722	int negative_exp, exponent;
1723	if ((negative_exp= (str == `'-'`)) \|\| str==`'+'`)
1724	{
1725	if (++str == end)
1726	{
1727	str-= `2`; / 'e-' or 'e+' not followed by digits /
1728	goto ret_sign;
1729	}
1730	}
1731	for (exponent= `0` ;
1732	str < end && (ch= (uchar) (*str - `'0'`)) < `10`;
1733	str++)
1734	{
1735	exponent= exponent * `10` + ch;
1736	}
1737	shift+= negative_exp ? -exponent : exponent;
1738	}
1739	else
1740	str--; / 'e' not followed by digits /
1741	}
1742
1743	if (shift == `0`) / No shift, check addon digit /
1744	{
1745	if (addon)
1746	{
1747	if (ull == ULONGLONG_MAX)
1748	goto ret_too_big;
1749	ull++;
1750	}
1751	goto ret_sign;
1752	}
1753
1754	if (shift < `0`) / Right shift /
1755	{
1756	ulonglong d, r;
1757
1758	if (-shift >= DIGITS_IN_ULONGLONG)
1759	goto ret_zero; / Exponent is a big negative number, return 0 /
1760
1761	d= d10[-shift];
1762	r= (ull % d) * `2`;
1763	ull /= d;
1764	if (r >= d)
1765	ull++;
1766	goto ret_sign;
1767	}
1768
1769	if (shift > DIGITS_IN_ULONGLONG) / Huge left shift /
1770	{
1771	if (!ull)
1772	goto ret_sign;
1773	goto ret_too_big;
1774	}
1775
1776	for ( ; shift > `0`; shift--, ull= `10`) /* Left shift /
1777	{
1778	if (ull > CUTOFF)
1779	goto ret_too_big; / Overflow, number too big /
1780	}
1781
1782	ret_sign:
1783	endptr= (char**) str;
1784
1785	if (!unsigned_flag)
1786	{
1787	if (negative)
1788	{
1789	if (ull > (ulonglong) LONGLONG_MIN)
1790	{
1791	*error= MY_ERRNO_ERANGE;
1792	return (ulonglong) LONGLONG_MIN;
1793	}
1794	*error= `0`;
1795	return (ulonglong) -(longlong) ull;
1796	}
1797	else
1798	{
1799	if (ull > (ulonglong) LONGLONG_MAX)
1800	{
1801	*error= MY_ERRNO_ERANGE;
1802	return (ulonglong) LONGLONG_MAX;
1803	}
1804	*error= `0`;
1805	return ull;
1806	}
1807	}
1808
1809	/ Unsigned number /
1810	if (negative && ull)
1811	{
1812	*error= MY_ERRNO_ERANGE;
1813	return `0`;
1814	}
1815	*error= `0`;
1816	return ull;
1817
1818	ret_zero:
1819	endptr= (char**) str;
1820	*error= `0`;
1821	return `0`;
1822
1823	ret_edom:
1824	endptr= (char**) str;
1825	*error= MY_ERRNO_EDOM;
1826	return `0`;
1827
1828	ret_too_big:
1829	endptr= (char**) str;
1830	*error= MY_ERRNO_ERANGE;
1831	return unsigned_flag ?
1832	ULONGLONG_MAX :
1833	negative ? (ulonglong) LONGLONG_MIN : (ulonglong) LONGLONG_MAX;
1834	}
1835
1836
1837	/*
1838	Check if a constant can be propagated
1839
1840	SYNOPSIS:
1841	my_propagate_simple()
1842	cs Character set information
1843	str String to convert to double
1844	length Optional length for string.
1845
1846	NOTES:
1847	Takes the string in the given charset and check
1848	if it can be safely propagated in the optimizer.
1849
1850	create table t1 (
1851	s char(5) character set latin1 collate latin1_german2_ci);
1852	insert into t1 values (0xf6); -- o-umlaut
1853	select from t1 where length(s)=1 and s='oe';*
1854
1855	The above query should return one row.
1856	We cannot convert this query into:
1857	select from t1 where length('oe')=1 and s='oe';*
1858
1859	Currently we don't check the constant itself,
1860	and decide not to propagate a constant
1861	just if the collation itself allows tricky things
1862	like expansions and contractions. In the future
1863	we can write a more sophisticated functions to
1864	check the constants. For example, 'oa' can always
1865	be safety propagated in German2 because unlike
1866	'oe' it does not have any special meaning.
1867
1868	RETURN
1869	1 if constant can be safely propagated
1870	0 if it is not safe to propagate the constant
1871	*/
1872
1873
1874
1875	my_bool my_propagate_simple(CHARSET_INFO cs __attribute__*((unused)),
1876	const uchar str __attribute__*((unused)),
1877	size_t length __attribute__((unused)))
1878	{
1879	return `1`;
1880	}
1881
1882
1883	my_bool my_propagate_complex(CHARSET_INFO cs __attribute__*((unused)),
1884	const uchar str __attribute__*((unused)),
1885	size_t length __attribute__((unused)))
1886	{
1887	return `0`;
1888	}
1889
1890
1891	/*
1892	Normalize strxfrm flags
1893
1894	SYNOPSIS:
1895	my_strxfrm_flag_normalize()
1896	flags - non-normalized flags
1897	nlevels - number of levels
1898
1899	NOTES:
1900	If levels are omitted, then 1-maximum is assumed.
1901	If any level number is greater than the maximum,
1902	it is treated as the maximum.
1903
1904	RETURN
1905	normalized flags
1906	*/
1907
1908	uint my_strxfrm_flag_normalize(uint flags, uint maximum)
1909	{
1910	DBUG_ASSERT(maximum >= `1` && maximum <= MY_STRXFRM_NLEVELS);
1911
1912	/ If levels are omitted, then 1-maximum is assumed/
1913	if (!(flags & MY_STRXFRM_LEVEL_ALL))
1914	{
1915	static uint def_level_flags[]= {`0`, `0x01`, `0x03`, `0x07`, `0x0F`, `0x1F`, `0x3F` };
1916	uint flag_pad= flags &
1917	(MY_STRXFRM_PAD_WITH_SPACE \| MY_STRXFRM_PAD_TO_MAXLEN);
1918	flags= def_level_flags[maximum] \| flag_pad;
1919	}
1920	else
1921	{
1922	uint i;
1923	uint flag_lev= flags & MY_STRXFRM_LEVEL_ALL;
1924	uint flag_dsc= (flags >> MY_STRXFRM_DESC_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1925	uint flag_rev= (flags >> MY_STRXFRM_REVERSE_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1926	uint flag_pad= flags &
1927	(MY_STRXFRM_PAD_WITH_SPACE \| MY_STRXFRM_PAD_TO_MAXLEN);
1928
1929	/*
1930	If any level number is greater than the maximum,
1931	it is treated as the maximum.
1932	*/
1933	for (maximum--, flags= `0`, i= `0`; i < MY_STRXFRM_NLEVELS; i++)
1934	{
1935	uint src_bit= `1` << i;
1936	if (flag_lev & src_bit)
1937	{
1938	uint dst_bit= `1` << MY_MIN(i, maximum);
1939	flags\|= dst_bit;
1940	flags\|= (flag_dsc & dst_bit) << MY_STRXFRM_DESC_SHIFT;
1941	flags\|= (flag_rev & dst_bit) << MY_STRXFRM_REVERSE_SHIFT;
1942	}
1943	}
1944	flags\|= flag_pad;
1945	}
1946
1947	return flags;
1948	}
1949
1950
1951	/*
1952	Apply DESC and REVERSE collation rules.
1953
1954	SYNOPSIS:
1955	my_strxfrm_desc_and_reverse()
1956	str - pointer to string
1957	strend - end of string
1958	flags - flags
1959	level - which level, starting from 0.
1960
1961	NOTES:
1962	Apply DESC or REVERSE or both flags.
1963
1964	If DESC flag is given, then the weights
1965	come out NOTed or negated for that level.
1966
1967	If REVERSE flags is given, then the weights come out in
1968	reverse order for that level, that is, starting with
1969	the last character and ending with the first character.
1970
1971	If nether DESC nor REVERSE flags are give,
1972	the string is not changed.
1973
1974	*/
1975	void
1976	my_strxfrm_desc_and_reverse(uchar str, uchar strend,
1977	uint flags, uint level)
1978	{
1979	if (flags & (MY_STRXFRM_DESC_LEVEL1 << level))
1980	{
1981	if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1982	{
1983	for (strend--; str <= strend;)
1984	{
1985	uchar tmp= *str;
1986	str++= ~strend;
1987	*strend--= ~tmp;
1988	}
1989	}
1990	else
1991	{
1992	for (; str < strend; str++)
1993	str= ~str;
1994	}
1995	}
1996	else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1997	{
1998	for (strend--; str < strend;)
1999	{
2000	uchar tmp= *str;
2001	str++= strend;
2002	*strend--= tmp;
2003	}
2004	}
2005	}
2006
2007
2008	size_t
2009	my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
2010	uchar str, uchar frmend, uchar *strend,
2011	uint nweights, uint flags, uint level)
2012	{
2013	if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
2014	{
2015	uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen);
2016	cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
2017	frmend+= fill_length;
2018	}
2019	my_strxfrm_desc_and_reverse(str, frmend, flags, level);
2020	if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend)
2021	{
2022	size_t fill_length= strend - frmend;
2023	cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
2024	frmend= strend;
2025	}
2026	return frmend - str;
2027	}
2028
2029
2030	size_t
2031	my_strxfrm_pad_desc_and_reverse_nopad(CHARSET_INFO *cs,
2032	uchar str, uchar frmend, uchar *strend,
2033	uint nweights, uint flags, uint level)
2034	{
2035	if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
2036	{
2037	uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen);
2038	memset(frmend, `0x00`, fill_length);
2039	frmend+= fill_length;
2040	}
2041	my_strxfrm_desc_and_reverse(str, frmend, flags, level);
2042	if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend)
2043	{
2044	size_t fill_length= strend - frmend;
2045	memset(frmend, `0x00`, fill_length);
2046	frmend= strend;
2047	}
2048	return frmend - str;
2049	}
2050
2051
2052	MY_CHARSET_HANDLER my_charset_8bit_handler=
2053	{
2054	my_cset_init_8bit,
2055	my_numchars_8bit,
2056	my_charpos_8bit,
2057	my_lengthsp_8bit,
2058	my_numcells_8bit,
2059	my_mb_wc_8bit,
2060	my_wc_mb_8bit,
2061	my_mb_ctype_8bit,
2062	my_caseup_str_8bit,
2063	my_casedn_str_8bit,
2064	my_caseup_8bit,
2065	my_casedn_8bit,
2066	my_snprintf_8bit,
2067	my_long10_to_str_8bit,
2068	my_longlong10_to_str_8bit,
2069	my_fill_8bit,
2070	my_strntol_8bit,
2071	my_strntoul_8bit,
2072	my_strntoll_8bit,
2073	my_strntoull_8bit,
2074	my_strntod_8bit,
2075	my_strtoll10_8bit,
2076	my_strntoull10rnd_8bit,
2077	my_scan_8bit,
2078	my_charlen_8bit,
2079	my_well_formed_char_length_8bit,
2080	my_copy_8bit,
2081	my_wc_mb_bin, / native_to_mb /
2082	};
2083
2084	MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
2085	{
2086	my_coll_init_simple, / init /
2087	my_strnncoll_simple,
2088	my_strnncollsp_simple,
2089	my_strnxfrm_simple,
2090	my_strnxfrmlen_simple,
2091	my_like_range_simple,
2092	my_wildcmp_8bit,
2093	my_strcasecmp_8bit,
2094	my_instr_simple,
2095	my_hash_sort_simple,
2096	my_propagate_simple
2097	};
2098
2099
2100	MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler =
2101	{
2102	my_coll_init_simple, / init /
2103	my_strnncoll_simple,
2104	my_strnncollsp_simple_nopad,
2105	my_strnxfrm_simple_nopad,
2106	my_strnxfrmlen_simple,
2107	my_like_range_simple,
2108	my_wildcmp_8bit,
2109	my_strcasecmp_8bit,
2110	my_instr_simple,
2111	my_hash_sort_simple_nopad,
2112	my_propagate_simple
2113	};
2114

Browse the source code of MariaDB/strings/ctype-simple.c