1/* Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
15
16/* Some useful string utility functions used by the MySQL server */
17
18#include "mariadb.h"
19#include "sql_priv.h"
20#include "unireg.h"
21#include "strfunc.h"
22#include "sql_class.h"
23#include "typelib.h" // TYPELIB
24#include "m_ctype.h" // my_charset_latin1
25#include "mysqld.h" // system_charset_info
26
27/*
28 Return bitmap for strings used in a set
29
30 SYNOPSIS
31 find_set()
32 lib Strings in set
33 str Strings of set-strings separated by ','
34 err_pos If error, set to point to start of wrong set string
35 err_len If error, set to the length of wrong set string
36 set_warning Set to 1 if some string in set couldn't be used
37
38 NOTE
39 We delete all end space from str before comparison
40
41 RETURN
42 bitmap of all sets found in x.
43 set_warning is set to 1 if there was any sets that couldn't be set
44*/
45
46static const char field_separator=',';
47
48ulonglong find_set(TYPELIB *lib, const char *str, size_t length, CHARSET_INFO *cs,
49 char **err_pos, uint *err_len, bool *set_warning)
50{
51 CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
52 const char *end= str + strip->cset->lengthsp(strip, str, length);
53 ulonglong found= 0;
54 *err_pos= 0; // No error yet
55 *err_len= 0;
56 if (str != end)
57 {
58 const char *start= str;
59 for (;;)
60 {
61 const char *pos= start;
62 uint var_len;
63 int mblen= 1;
64
65 if (cs && cs->mbminlen > 1)
66 {
67 for ( ; pos < end; pos+= mblen)
68 {
69 my_wc_t wc;
70 if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos,
71 (const uchar *) end)) < 1)
72 mblen= 1; // Not to hang on a wrong multibyte sequence
73 if (wc == (my_wc_t) field_separator)
74 break;
75 }
76 }
77 else
78 for (; pos != end && *pos != field_separator; pos++) ;
79 var_len= (uint) (pos - start);
80 uint find= cs ? find_type2(lib, start, var_len, cs) :
81 find_type(lib, start, var_len, (bool) 0);
82 if (unlikely(!find && *err_len == 0))
83 {
84 // report the first error with length > 0
85 *err_pos= (char*) start;
86 *err_len= var_len;
87 *set_warning= 1;
88 }
89 else
90 found|= 1ULL << (find - 1);
91 if (pos >= end)
92 break;
93 start= pos + mblen;
94 }
95 }
96 return found;
97}
98
99/*
100 Function to find a string in a TYPELIB
101 (similar to find_type() of mysys/typelib.c)
102
103 SYNOPSIS
104 find_type()
105 lib TYPELIB (struct of pointer to values + count)
106 find String to find
107 length Length of string to find
108 part_match Allow part matching of value
109
110 RETURN
111 0 error
112 > 0 position in TYPELIB->type_names +1
113*/
114
115uint find_type(const TYPELIB *lib, const char *find, size_t length,
116 bool part_match)
117{
118 uint found_count=0, found_pos=0;
119 const char *end= find+length;
120 const char *i;
121 const char *j;
122 for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
123 {
124 for (i=find ; i != end &&
125 my_toupper(system_charset_info,*i) ==
126 my_toupper(system_charset_info,*j) ; i++, j++) ;
127 if (i == end)
128 {
129 if (! *j)
130 return(pos);
131 found_count++;
132 found_pos= pos;
133 }
134 }
135 return(found_count == 1 && part_match ? found_pos : 0);
136}
137
138
139/*
140 Find a string in a list of strings according to collation
141
142 SYNOPSIS
143 find_type2()
144 lib TYPELIB (struct of pointer to values + count)
145 x String to find
146 length String length
147 cs Character set + collation to use for comparison
148
149 NOTES
150
151 RETURN
152 0 No matching value
153 >0 Offset+1 in typelib for matched string
154*/
155
156uint find_type2(const TYPELIB *typelib, const char *x, size_t length,
157 CHARSET_INFO *cs)
158{
159 int pos;
160 const char *j;
161 DBUG_ENTER("find_type2");
162 DBUG_PRINT("enter",("x: '%.*s' lib: %p", (int)length, x, typelib));
163
164 if (!typelib->count)
165 {
166 DBUG_PRINT("exit",("no count"));
167 DBUG_RETURN(0);
168 }
169
170 for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
171 {
172 if (!my_strnncoll(cs, (const uchar*) x, length,
173 (const uchar*) j, typelib->type_lengths[pos]))
174 DBUG_RETURN(pos+1);
175 }
176 DBUG_PRINT("exit",("Couldn't find type"));
177 DBUG_RETURN(0);
178} /* find_type */
179
180
181/*
182 Un-hex all elements in a typelib
183
184 SYNOPSIS
185 unhex_type2()
186 interval TYPELIB (struct of pointer to values + lengths + count)
187
188 NOTES
189
190 RETURN
191 N/A
192*/
193
194void unhex_type2(TYPELIB *interval)
195{
196 for (uint pos= 0; pos < interval->count; pos++)
197 {
198 char *from, *to;
199 for (from= to= (char*) interval->type_names[pos]; *from; )
200 {
201 /*
202 Note, hexchar_to_int(*from++) doesn't work
203 one some compilers, e.g. IRIX. Looks like a compiler
204 bug in inline functions in combination with arguments
205 that have a side effect. So, let's use from[0] and from[1]
206 and increment 'from' by two later.
207 */
208
209 *to++= (char) (hexchar_to_int(from[0]) << 4) +
210 hexchar_to_int(from[1]);
211 from+= 2;
212 }
213 interval->type_lengths[pos] /= 2;
214 }
215}
216
217
218/*
219 Check if the first word in a string is one of the ones in TYPELIB
220
221 SYNOPSIS
222 check_word()
223 lib TYPELIB
224 val String to check
225 end End of input
226 end_of_word Store value of last used byte here if we found word
227
228 RETURN
229 0 No matching value
230 > 1 lib->type_names[#-1] matched
231 end_of_word will point to separator character/end in 'val'
232*/
233
234uint check_word(TYPELIB *lib, const char *val, const char *end,
235 const char **end_of_word)
236{
237 int res;
238 const char *ptr;
239
240 /* Fiend end of word */
241 for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
242 ;
243 if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
244 *end_of_word= ptr;
245 return res;
246}
247
248
249/*
250 Converts a string between character sets
251
252 SYNOPSIS
253 strconvert()
254 from_cs source character set
255 from source, a null terminated string
256 to destination buffer
257 to_length destination buffer length
258
259 NOTES
260 'to' is always terminated with a '\0' character.
261 If there is no enough space to convert whole string,
262 only prefix is converted, and terminated with '\0'.
263
264 RETURN VALUES
265 result string length
266*/
267
268
269uint strconvert(CHARSET_INFO *from_cs, const char *from, size_t from_length,
270 CHARSET_INFO *to_cs, char *to, size_t to_length, uint *errors)
271{
272 int cnvres;
273 my_wc_t wc;
274 char *to_start= to;
275 uchar *to_end= (uchar*) to + to_length - 1;
276 const uchar *from_end= (const uchar*) from + from_length;
277 my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
278 my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
279 uint error_count= 0;
280
281 while (1)
282 {
283 if ((cnvres= (*mb_wc)(from_cs, &wc,
284 (uchar*) from, from_end)) > 0)
285 {
286 if (!wc)
287 break;
288 from+= cnvres;
289 }
290 else if (cnvres == MY_CS_ILSEQ)
291 {
292 error_count++;
293 from++;
294 wc= '?';
295 }
296 else
297 break; // Impossible char.
298
299outp:
300
301 if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
302 to+= cnvres;
303 else if (cnvres == MY_CS_ILUNI && wc != '?')
304 {
305 error_count++;
306 wc= '?';
307 goto outp;
308 }
309 else
310 break;
311 }
312 *to= '\0';
313 *errors= error_count;
314 return (uint32) (to - to_start);
315
316}
317
318
319/*
320 Searches for a LEX_STRING in an LEX_STRING array.
321
322 SYNOPSIS
323 find_string_in_array()
324 heap The array
325 needle The string to search for
326
327 NOTE
328 The last LEX_STRING in the array should have str member set to NULL
329
330 RETURN VALUES
331 -1 Not found
332 >=0 Ordinal position
333*/
334
335int find_string_in_array(LEX_CSTRING * const haystack, LEX_CSTRING * const needle,
336 CHARSET_INFO * const cs)
337{
338 const LEX_CSTRING *pos;
339 for (pos= haystack; pos->str; pos++)
340 if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
341 (uchar *) needle->str, needle->length))
342 {
343 return (int)(pos - haystack);
344 }
345 return -1;
346}
347
348
349const char *set_to_string(THD *thd, LEX_CSTRING *result, ulonglong set,
350 const char *lib[])
351{
352 char buff[STRING_BUFFER_USUAL_SIZE*8];
353 String tmp(buff, sizeof(buff), &my_charset_latin1);
354 LEX_CSTRING unused;
355
356 if (!result)
357 result= &unused;
358
359 tmp.length(0);
360
361 for (uint i= 0; set; i++, set >>= 1)
362 if (set & 1) {
363 tmp.append(lib[i]);
364 tmp.append(',');
365 }
366
367 if (tmp.length())
368 {
369 result->str= thd->strmake(tmp.ptr(), tmp.length()-1);
370 result->length= tmp.length()-1;
371 }
372 else
373 {
374 result->str= const_cast<char*>("");
375 result->length= 0;
376 }
377 return result->str;
378}
379
380const char *flagset_to_string(THD *thd, LEX_CSTRING *result, ulonglong set,
381 const char *lib[])
382{
383 char buff[STRING_BUFFER_USUAL_SIZE*8];
384 String tmp(buff, sizeof(buff), &my_charset_latin1);
385 LEX_CSTRING unused;
386
387 if (!result) result= &unused;
388
389 tmp.length(0);
390
391 // note that the last element is always "default", and it's ignored below
392 for (uint i= 0; lib[i+1]; i++, set >>= 1)
393 {
394 tmp.append(lib[i]);
395 tmp.append(set & 1 ? "=on," : "=off,");
396 }
397
398 result->str= thd->strmake(tmp.ptr(), tmp.length()-1);
399 result->length= tmp.length()-1;
400
401 return result->str;
402}
403
404