ctype-bin.c source code [MariaDB/strings/ctype-bin.c]

1	/ Copyright (c) 2002-2007 MySQL AB & tommy@valley.ne.jp*
2	Copyright (c) 2002, 2014, Oracle and/or its affiliates.
3	Copyright (c) 2009, 2014, SkySQL Ab.
4
5	This library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Library General Public
7	License as published by the Free Software Foundation; version 2
8	of the License.
9
10	This library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Library General Public License for more details.
14
15	You should have received a copy of the GNU Library General Public
16	License along with this library; if not, write to the Free
17	Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18	MA 02110-1301, USA /*
19
20	/ This file is for binary pseudo charset, created by bar@mysql.com /
21
22
23	#include "strings_def.h"
24	#include <m_ctype.h>
25
26	static const uchar ctype_bin[]=
27	{
28	`0`,
29	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `40`, `40`, `40`, `40`, `40`, `32`, `32`,
30	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
31	`72`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`, `16`,
32	`132`,`132`,`132`,`132`,`132`,`132`,`132`,`132`,`132`,`132`, `16`, `16`, `16`, `16`, `16`, `16`,
33	`16`,`129`,`129`,`129`,`129`,`129`,`129`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
34	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `16`, `16`, `16`, `16`, `16`,
35	`16`,`130`,`130`,`130`,`130`,`130`,`130`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
36	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `16`, `16`, `16`, `16`, `32`,
37	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
38	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
39	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
40	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
41	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
42	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
43	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
44	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
45	};
46
47
48	/ Dummy array for toupper / tolower / sortorder /
49
50	static const uchar bin_char_array[] =
51	{
52	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
53	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
54	`32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`, `41`, `42`, `43`, `44`, `45`, `46`, `47`,
55	`48`, `49`, `50`, `51`, `52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `62`, `63`,
56	`64`, `65`, `66`, `67`, `68`, `69`, `70`, `71`, `72`, `73`, `74`, `75`, `76`, `77`, `78`, `79`,
57	`80`, `81`, `82`, `83`, `84`, `85`, `86`, `87`, `88`, `89`, `90`, `91`, `92`, `93`, `94`, `95`,
58	`96`, `97`, `98`, `99`,`100`,`101`,`102`,`103`,`104`,`105`,`106`,`107`,`108`,`109`,`110`,`111`,
59	`112`,`113`,`114`,`115`,`116`,`117`,`118`,`119`,`120`,`121`,`122`,`123`,`124`,`125`,`126`,`127`,
60	`128`,`129`,`130`,`131`,`132`,`133`,`134`,`135`,`136`,`137`,`138`,`139`,`140`,`141`,`142`,`143`,
61	`144`,`145`,`146`,`147`,`148`,`149`,`150`,`151`,`152`,`153`,`154`,`155`,`156`,`157`,`158`,`159`,
62	`160`,`161`,`162`,`163`,`164`,`165`,`166`,`167`,`168`,`169`,`170`,`171`,`172`,`173`,`174`,`175`,
63	`176`,`177`,`178`,`179`,`180`,`181`,`182`,`183`,`184`,`185`,`186`,`187`,`188`,`189`,`190`,`191`,
64	`192`,`193`,`194`,`195`,`196`,`197`,`198`,`199`,`200`,`201`,`202`,`203`,`204`,`205`,`206`,`207`,
65	`208`,`209`,`210`,`211`,`212`,`213`,`214`,`215`,`216`,`217`,`218`,`219`,`220`,`221`,`222`,`223`,
66	`224`,`225`,`226`,`227`,`228`,`229`,`230`,`231`,`232`,`233`,`234`,`235`,`236`,`237`,`238`,`239`,
67	`240`,`241`,`242`,`243`,`244`,`245`,`246`,`247`,`248`,`249`,`250`,`251`,`252`,`253`,`254`,`255`
68	};
69
70
71	static my_bool
72	my_coll_init_8bit_bin(struct charset_info_st *cs,
73	MY_CHARSET_LOADER loader __attribute__*((unused)))
74	{
75	cs->max_sort_char=`255`;
76	return FALSE;
77	}
78
79	static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)),
80	const uchar *s, size_t slen,
81	const uchar *t, size_t tlen,
82	my_bool t_is_prefix)
83	{
84	size_t len=MY_MIN(slen,tlen);
85	int cmp= memcmp(s,t,len);
86	return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
87	}
88
89
90	size_t my_lengthsp_binary(CHARSET_INFO cs __attribute__*((unused)),
91	const char ptr __attribute__*((unused)),
92	size_t length)
93	{
94	return length;
95	}
96
97
98	/*
99	Compare two strings. Result is sign(first_argument - second_argument)
100
101	SYNOPSIS
102	my_strnncollsp_binary()
103	cs Chararacter set
104	s String to compare
105	slen Length of 's'
106	t String to compare
107	tlen Length of 't'
108
109	NOTE
110	This function is used for real binary strings, i.e. for
111	BLOB, BINARY(N) and VARBINARY(N).
112	It compares trailing spaces as spaces.
113
114	RETURN
115	< 0 s < t
116	0 s == t
117	> 0 s > t
118	*/
119
120	static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)),
121	const uchar *s, size_t slen,
122	const uchar *t, size_t tlen)
123	{
124	return my_strnncoll_binary(cs,s,slen,t,tlen,`0`);
125	}
126
127
128	static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
129	const uchar *s, size_t slen,
130	const uchar *t, size_t tlen,
131	my_bool t_is_prefix)
132	{
133	size_t len=MY_MIN(slen,tlen);
134	int cmp= memcmp(s,t,len);
135	return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
136	}
137
138
139	/*
140	Compare a string to an array of spaces, for PAD SPACE behaviour.
141	@param str - the string
142	@param length - the length of the string
143	@return <0 - if a byte less than SPACE was found
144	@return >0 - if a byte greater than SPACE was found
145	@return 0 - if the string entirely consists of SPACE characters
146	*/
147	int my_strnncollsp_padspace_bin(const uchar *str, size_t length)
148	{
149	for ( ; length ; str++, length--)
150	{
151	if (*str < `' '`)
152	return -`1`;
153	else if (*str > `' '`)
154	return `1`;
155	}
156	return `0`;
157	}
158
159
160	/*
161	Compare two strings. Result is sign(first_argument - second_argument)
162
163	SYNOPSIS
164	my_strnncollsp_8bit_bin()
165	cs Chararacter set
166	s String to compare
167	slen Length of 's'
168	t String to compare
169	tlen Length of 't'
170
171	NOTE
172	This function is used for character strings with binary collations.
173	The shorter string is extended with end space to be as long as the longer
174	one.
175
176	RETURN
177	< 0 s < t
178	0 s == t
179	> 0 s > t
180	*/
181
182	static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
183	const uchar *a, size_t a_length,
184	const uchar *b, size_t b_length)
185	{
186	const uchar *end;
187	size_t length;
188
189	end= a + (length= MY_MIN(a_length, b_length));
190	while (a < end)
191	{
192	if (a++ != b++)
193	return ((int) a[-`1`] - (int) b[-`1`]);
194	}
195	return a_length == b_length ? `0` :
196	a_length < b_length ?
197	-my_strnncollsp_padspace_bin(b, b_length - length) :
198	my_strnncollsp_padspace_bin(a, a_length - length);
199	}
200
201
202	static int my_strnncollsp_8bit_nopad_bin(CHARSET_INFO * cs
203	__attribute__((unused)),
204	const uchar *a, size_t a_length,
205	const uchar *b, size_t b_length)
206	{
207	return my_strnncoll_8bit_bin(cs, a, a_length, b, b_length, FALSE);
208	}
209
210
211	/ This function is used for all conversion functions /
212
213	static size_t my_case_str_bin(CHARSET_INFO cs __attribute__*((unused)),
214	char str __attribute__*((unused)))
215	{
216	return `0`;
217	}
218
219
220	static size_t my_case_bin(CHARSET_INFO cs __attribute__*((unused)),
221	char src __attribute__*((unused)),
222	size_t srclen,
223	char dst __attribute__*((unused)),
224	size_t dstlen __attribute__((unused)))
225	{
226	return srclen;
227	}
228
229
230	static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)),
231	const char s, const* char *t)
232	{
233	return strcmp(s,t);
234	}
235
236
237	static int my_mb_wc_bin(CHARSET_INFO cs __attribute__*((unused)),
238	my_wc_t *wc,
239	const uchar *str,
240	const uchar end __attribute__*((unused)))
241	{
242	if (str >= end)
243	return MY_CS_TOOSMALL;
244
245	*wc=str[`0`];
246	return `1`;
247	}
248
249
250	int my_wc_mb_bin(CHARSET_INFO cs __attribute__*((unused)),
251	my_wc_t wc, uchar s, uchar e)
252	{
253	if (s >= e)
254	return MY_CS_TOOSMALL;
255
256	if (wc < `256`)
257	{
258	s[`0`]= (char) wc;
259	return `1`;
260	}
261	return MY_CS_ILUNI;
262	}
263
264
265	void my_hash_sort_bin(CHARSET_INFO cs __attribute__*((unused)),
266	const uchar key, size_t len,ulong nr1, ulong *nr2)
267	{
268	const uchar *end = key + len;
269	ulong tmp1= *nr1;
270	ulong tmp2= *nr2;
271
272	for (; key < end ; key++)
273	{
274	MY_HASH_ADD(tmp1, tmp2, (uint) *key);
275	}
276
277	*nr1= tmp1;
278	*nr2= tmp2;
279	}
280
281
282	void my_hash_sort_8bit_bin(CHARSET_INFO cs __attribute__*((unused)),
283	const uchar *key, size_t len,
284	ulong nr1, ulong nr2)
285	{
286	/*
287	Remove trailing spaces. We have to do this to be able to compare
288	'A ' and 'A' as identical
289	*/
290	const uchar *end= skip_trailing_space(key, len);
291	my_hash_sort_bin(cs, key, end - key, nr1, nr2);
292	}
293
294
295	/*
296	The following defines is here to keep the following code identical to
297	the one in ctype-simple.c
298	*/
299
300	#define likeconv(s,A) (A)
301	#define INC_PTR(cs,A,B) (A)++
302
303
304	static
305	int my_wildcmp_bin_impl(CHARSET_INFO *cs,
306	const char str,const* char *str_end,
307	const char wildstr,const* char *wildend,
308	int escape, int w_one, int w_many, int recurse_level)
309	{
310	int result= -`1`; / Not found, using wildcards /
311
312	if (my_string_stack_guard && my_string_stack_guard(recurse_level))
313	return `1`;
314	while (wildstr != wildend)
315	{
316	while (wildstr != w_many && wildstr != w_one)
317	{
318	if (*wildstr == escape && wildstr+`1` != wildend)
319	wildstr++;
320	if (str == str_end \|\| likeconv(cs,wildstr++) != likeconv(cs,str++))
321	return(`1`); / No match /
322	if (wildstr == wildend)
323	return(str != str_end); / Match if both are at end /
324	result=`1`; / Found an anchor char /
325	}
326	if (*wildstr == w_one)
327	{
328	do
329	{
330	if (str == str_end) / Skip one char if possible /
331	return(result);
332	INC_PTR(cs,str,str_end);
333	} while (++wildstr < wildend && *wildstr == w_one);
334	if (wildstr == wildend)
335	break;
336	}
337	if (*wildstr == w_many)
338	{ / Found w_many /
339	uchar cmp;
340	wildstr++;
341	/ Remove any '%' and '_' from the wild search string /
342	for (; wildstr != wildend ; wildstr++)
343	{
344	if (*wildstr == w_many)
345	continue;
346	if (*wildstr == w_one)
347	{
348	if (str == str_end)
349	return(-`1`);
350	INC_PTR(cs,str,str_end);
351	continue;
352	}
353	break; / Not a wild character /
354	}
355	if (wildstr == wildend)
356	return(`0`); / match if w_many is last /
357	if (str == str_end)
358	return(-`1`);
359
360	if ((cmp= *wildstr) == escape && wildstr+`1` != wildend)
361	cmp= *++wildstr;
362
363	INC_PTR(cs,wildstr,wildend); / This is compared through cmp /
364	cmp=likeconv(cs,cmp);
365	do
366	{
367	while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
368	str++;
369	if (str++ == str_end)
370	return(-`1`);
371	{
372	int tmp=my_wildcmp_bin_impl(cs,str,str_end,wildstr,wildend,escape,w_one,
373	w_many, recurse_level + `1`);
374	if (tmp <= `0`)
375	return(tmp);
376	}
377	} while (str != str_end);
378	return(-`1`);
379	}
380	}
381	return(str != str_end ? `1` : `0`);
382	}
383
384	int my_wildcmp_bin(CHARSET_INFO *cs,
385	const char str,const* char *str_end,
386	const char wildstr,const* char *wildend,
387	int escape, int w_one, int w_many)
388	{
389	return my_wildcmp_bin_impl(cs, str, str_end,
390	wildstr, wildend,
391	escape, w_one, w_many, `1`);
392	}
393
394
395	static size_t
396	my_strnxfrm_8bit_bin(CHARSET_INFO *cs,
397	uchar * dst, size_t dstlen, uint nweights,
398	const uchar *src, size_t srclen, uint flags)
399	{
400	set_if_smaller(srclen, dstlen);
401	set_if_smaller(srclen, nweights);
402	if (dst != src)
403	memcpy(dst, src, srclen);
404	return my_strxfrm_pad_desc_and_reverse(cs, dst, dst + srclen, dst + dstlen,
405	(uint)(nweights - srclen), flags, `0`);
406	}
407
408
409	static size_t
410	my_strnxfrm_8bit_nopad_bin(CHARSET_INFO *cs,
411	uchar * dst, size_t dstlen, uint nweights,
412	const uchar *src, size_t srclen, uint flags)
413	{
414	set_if_smaller(srclen, dstlen);
415	set_if_smaller(srclen, nweights);
416	if (dst != src)
417	memcpy(dst, src, srclen);
418	return my_strxfrm_pad_desc_and_reverse_nopad(cs, dst, dst + srclen,
419	dst + dstlen,(uint)(nweights - srclen),
420	flags, `0`);
421	}
422
423
424	static
425	uint my_instr_bin(CHARSET_INFO cs __attribute__*((unused)),
426	const char *b, size_t b_length,
427	const char *s, size_t s_length,
428	my_match_t *match, uint nmatch)
429	{
430	register const uchar str, search, end, search_end;
431
432	if (s_length <= b_length)
433	{
434	if (!s_length)
435	{
436	if (nmatch)
437	{
438	match->beg= `0`;
439	match->end= `0`;
440	match->mb_len= `0`;
441	}
442	return `1`; / Empty string is always found /
443	}
444
445	str= (const uchar*) b;
446	search= (const uchar*) s;
447	end= (const uchar*) b+b_length-s_length+`1`;
448	search_end= (const uchar*) s + s_length;
449
450	skip:
451	while (str != end)
452	{
453	if ( (str++) == (search))
454	{
455	register const uchar i,j;
456
457	i= str;
458	j= search+`1`;
459
460	while (j != search_end)
461	if ((i++) != (j++))
462	goto skip;
463
464	if (nmatch > `0`)
465	{
466	match[`0`].beg= `0`;
467	match[`0`].end= (uint) (str- (const uchar*)b-`1`);
468	match[`0`].mb_len= match[`0`].end;
469
470	if (nmatch > `1`)
471	{
472	match[`1`].beg= match[`0`].end;
473	match[`1`].end= (uint)(match[`0`].end+s_length);
474	match[`1`].mb_len= match[`1`].end-match[`1`].beg;
475	}
476	}
477	return `2`;
478	}
479	}
480	}
481	return `0`;
482	}
483
484
485	MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
486	{
487	my_coll_init_8bit_bin,
488	my_strnncoll_8bit_bin,
489	my_strnncollsp_8bit_bin,
490	my_strnxfrm_8bit_bin,
491	my_strnxfrmlen_simple,
492	my_like_range_simple,
493	my_wildcmp_bin,
494	my_strcasecmp_bin,
495	my_instr_bin,
496	my_hash_sort_8bit_bin,
497	my_propagate_simple
498	};
499
500
501	MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler =
502	{
503	my_coll_init_8bit_bin,
504	my_strnncoll_8bit_bin,
505	my_strnncollsp_8bit_nopad_bin,
506	my_strnxfrm_8bit_nopad_bin,
507	my_strnxfrmlen_simple,
508	my_like_range_simple,
509	my_wildcmp_bin,
510	my_strcasecmp_bin,
511	my_instr_bin,
512	my_hash_sort_bin,
513	my_propagate_simple
514	};
515
516
517	static MY_COLLATION_HANDLER my_collation_binary_handler =
518	{
519	NULL, / init /
520	my_strnncoll_binary,
521	my_strnncollsp_binary,
522	my_strnxfrm_8bit_bin,
523	my_strnxfrmlen_simple,
524	my_like_range_simple,
525	my_wildcmp_bin,
526	my_strcasecmp_bin,
527	my_instr_bin,
528	my_hash_sort_bin,
529	my_propagate_simple
530	};
531
532
533	static MY_CHARSET_HANDLER my_charset_handler=
534	{
535	NULL, / init /
536	my_numchars_8bit,
537	my_charpos_8bit,
538	my_lengthsp_binary,
539	my_numcells_8bit,
540	my_mb_wc_bin,
541	my_wc_mb_bin,
542	my_mb_ctype_8bit,
543	my_case_str_bin,
544	my_case_str_bin,
545	my_case_bin,
546	my_case_bin,
547	my_snprintf_8bit,
548	my_long10_to_str_8bit,
549	my_longlong10_to_str_8bit,
550	my_fill_8bit,
551	my_strntol_8bit,
552	my_strntoul_8bit,
553	my_strntoll_8bit,
554	my_strntoull_8bit,
555	my_strntod_8bit,
556	my_strtoll10_8bit,
557	my_strntoull10rnd_8bit,
558	my_scan_8bit,
559	my_charlen_8bit,
560	my_well_formed_char_length_8bit,
561	my_copy_8bit,
562	my_wc_mb_bin,
563	};
564
565
566	struct charset_info_st my_charset_bin =
567	{
568	`63`,`0`,`0`, / number /
569	MY_CS_COMPILED\|MY_CS_BINSORT\|MY_CS_PRIMARY\|MY_CS_NOPAD,/ state /
570	"binary", / cs name /
571	"binary", / name /
572	"", / comment /
573	NULL, / tailoring /
574	ctype_bin, / ctype /
575	bin_char_array, / to_lower /
576	bin_char_array, / to_upper /
577	NULL, / sort_order /
578	NULL, / uca /
579	NULL, / tab_to_uni /
580	NULL, / tab_from_uni /
581	&my_unicase_default, / caseinfo /
582	NULL, / state_map /
583	NULL, / ident_map /
584	`1`, / strxfrm_multiply /
585	`1`, / caseup_multiply /
586	`1`, / casedn_multiply /
587	`1`, / mbminlen /
588	`1`, / mbmaxlen /
589	`0`, / min_sort_char /
590	`255`, / max_sort_char /
591	`0`, / pad char /
592	`0`, / escape_with_backslash_is_dangerous /
593	`1`, / levels_for_order /
594	&my_charset_handler,
595	&my_collation_binary_handler
596	};
597

Browse the source code of MariaDB/strings/ctype-bin.c