| 1 | /* Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. | 
|---|
| 2 |  | 
|---|
| 3 | This program is free software; you can redistribute it and/or modify | 
|---|
| 4 | it under the terms of the GNU General Public License as published by | 
|---|
| 5 | the Free Software Foundation; version 2 of the License. | 
|---|
| 6 |  | 
|---|
| 7 | This program is distributed in the hope that it will be useful, | 
|---|
| 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 10 | GNU General Public License for more details. | 
|---|
| 11 |  | 
|---|
| 12 | You should have received a copy of the GNU General Public License | 
|---|
| 13 | along with this program; if not, write to the Free Software | 
|---|
| 14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA */ | 
|---|
| 15 |  | 
|---|
| 16 | /* Some useful string utility functions used by the MySQL server */ | 
|---|
| 17 |  | 
|---|
| 18 | #include "mariadb.h" | 
|---|
| 19 | #include "sql_priv.h" | 
|---|
| 20 | #include "unireg.h" | 
|---|
| 21 | #include "strfunc.h" | 
|---|
| 22 | #include "sql_class.h" | 
|---|
| 23 | #include "typelib.h"                            // TYPELIB | 
|---|
| 24 | #include "m_ctype.h"                            // my_charset_latin1 | 
|---|
| 25 | #include "mysqld.h"                             // system_charset_info | 
|---|
| 26 |  | 
|---|
| 27 | /* | 
|---|
| 28 | Return bitmap for strings used in a set | 
|---|
| 29 |  | 
|---|
| 30 | SYNOPSIS | 
|---|
| 31 | find_set() | 
|---|
| 32 | lib			Strings in set | 
|---|
| 33 | str			Strings of set-strings separated by ',' | 
|---|
| 34 | err_pos		If error, set to point to start of wrong set string | 
|---|
| 35 | err_len		If error, set to the length of wrong set string | 
|---|
| 36 | set_warning		Set to 1 if some string in set couldn't be used | 
|---|
| 37 |  | 
|---|
| 38 | NOTE | 
|---|
| 39 | We delete all end space from str before comparison | 
|---|
| 40 |  | 
|---|
| 41 | RETURN | 
|---|
| 42 | bitmap of all sets found in x. | 
|---|
| 43 | set_warning is set to 1 if there was any sets that couldn't be set | 
|---|
| 44 | */ | 
|---|
| 45 |  | 
|---|
| 46 | static const char field_separator=','; | 
|---|
| 47 |  | 
|---|
| 48 | ulonglong find_set(TYPELIB *lib, const char *str, size_t length, CHARSET_INFO *cs, | 
|---|
| 49 | char **err_pos, uint *err_len, bool *set_warning) | 
|---|
| 50 | { | 
|---|
| 51 | CHARSET_INFO *strip= cs ? cs : &my_charset_latin1; | 
|---|
| 52 | const char *end= str + strip->cset->lengthsp(strip, str, length); | 
|---|
| 53 | ulonglong found= 0; | 
|---|
| 54 | *err_pos= 0;                  // No error yet | 
|---|
| 55 | *err_len= 0; | 
|---|
| 56 | if (str != end) | 
|---|
| 57 | { | 
|---|
| 58 | const char *start= str; | 
|---|
| 59 | for (;;) | 
|---|
| 60 | { | 
|---|
| 61 | const char *pos= start; | 
|---|
| 62 | uint var_len; | 
|---|
| 63 | int mblen= 1; | 
|---|
| 64 |  | 
|---|
| 65 | if (cs && cs->mbminlen > 1) | 
|---|
| 66 | { | 
|---|
| 67 | for ( ; pos < end; pos+= mblen) | 
|---|
| 68 | { | 
|---|
| 69 | my_wc_t wc; | 
|---|
| 70 | if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos, | 
|---|
| 71 | (const uchar *) end)) < 1) | 
|---|
| 72 | mblen= 1; // Not to hang on a wrong multibyte sequence | 
|---|
| 73 | if (wc == (my_wc_t) field_separator) | 
|---|
| 74 | break; | 
|---|
| 75 | } | 
|---|
| 76 | } | 
|---|
| 77 | else | 
|---|
| 78 | for (; pos != end && *pos != field_separator; pos++) ; | 
|---|
| 79 | var_len= (uint) (pos - start); | 
|---|
| 80 | uint find= cs ? find_type2(lib, start, var_len, cs) : | 
|---|
| 81 | find_type(lib, start, var_len, (bool) 0); | 
|---|
| 82 | if (unlikely(!find && *err_len == 0)) | 
|---|
| 83 | { | 
|---|
| 84 | // report the first error with length > 0 | 
|---|
| 85 | *err_pos= (char*) start; | 
|---|
| 86 | *err_len= var_len; | 
|---|
| 87 | *set_warning= 1; | 
|---|
| 88 | } | 
|---|
| 89 | else | 
|---|
| 90 | found|= 1ULL << (find - 1); | 
|---|
| 91 | if (pos >= end) | 
|---|
| 92 | break; | 
|---|
| 93 | start= pos + mblen; | 
|---|
| 94 | } | 
|---|
| 95 | } | 
|---|
| 96 | return found; | 
|---|
| 97 | } | 
|---|
| 98 |  | 
|---|
| 99 | /* | 
|---|
| 100 | Function to find a string in a TYPELIB | 
|---|
| 101 | (similar to find_type() of mysys/typelib.c) | 
|---|
| 102 |  | 
|---|
| 103 | SYNOPSIS | 
|---|
| 104 | find_type() | 
|---|
| 105 | lib			TYPELIB (struct of pointer to values + count) | 
|---|
| 106 | find			String to find | 
|---|
| 107 | length		Length of string to find | 
|---|
| 108 | part_match		Allow part matching of value | 
|---|
| 109 |  | 
|---|
| 110 | RETURN | 
|---|
| 111 | 0 error | 
|---|
| 112 | > 0 position in TYPELIB->type_names +1 | 
|---|
| 113 | */ | 
|---|
| 114 |  | 
|---|
| 115 | uint find_type(const TYPELIB *lib, const char *find, size_t length, | 
|---|
| 116 | bool part_match) | 
|---|
| 117 | { | 
|---|
| 118 | uint found_count=0, found_pos=0; | 
|---|
| 119 | const char *end= find+length; | 
|---|
| 120 | const char *i; | 
|---|
| 121 | const char *j; | 
|---|
| 122 | for (uint pos=0 ; (j=lib->type_names[pos++]) ; ) | 
|---|
| 123 | { | 
|---|
| 124 | for (i=find ; i != end && | 
|---|
| 125 | my_toupper(system_charset_info,*i) == | 
|---|
| 126 | my_toupper(system_charset_info,*j) ; i++, j++) ; | 
|---|
| 127 | if (i == end) | 
|---|
| 128 | { | 
|---|
| 129 | if (! *j) | 
|---|
| 130 | return(pos); | 
|---|
| 131 | found_count++; | 
|---|
| 132 | found_pos= pos; | 
|---|
| 133 | } | 
|---|
| 134 | } | 
|---|
| 135 | return(found_count == 1 && part_match ? found_pos : 0); | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 |  | 
|---|
| 139 | /* | 
|---|
| 140 | Find a string in a list of strings according to collation | 
|---|
| 141 |  | 
|---|
| 142 | SYNOPSIS | 
|---|
| 143 | find_type2() | 
|---|
| 144 | lib			TYPELIB (struct of pointer to values + count) | 
|---|
| 145 | x			String to find | 
|---|
| 146 | length               String length | 
|---|
| 147 | cs			Character set + collation to use for comparison | 
|---|
| 148 |  | 
|---|
| 149 | NOTES | 
|---|
| 150 |  | 
|---|
| 151 | RETURN | 
|---|
| 152 | 0	No matching value | 
|---|
| 153 | >0  Offset+1 in typelib for matched string | 
|---|
| 154 | */ | 
|---|
| 155 |  | 
|---|
| 156 | uint find_type2(const TYPELIB *typelib, const char *x, size_t length, | 
|---|
| 157 | CHARSET_INFO *cs) | 
|---|
| 158 | { | 
|---|
| 159 | int pos; | 
|---|
| 160 | const char *j; | 
|---|
| 161 | DBUG_ENTER( "find_type2"); | 
|---|
| 162 | DBUG_PRINT( "enter",( "x: '%.*s'  lib: %p", (int)length, x, typelib)); | 
|---|
| 163 |  | 
|---|
| 164 | if (!typelib->count) | 
|---|
| 165 | { | 
|---|
| 166 | DBUG_PRINT( "exit",( "no count")); | 
|---|
| 167 | DBUG_RETURN(0); | 
|---|
| 168 | } | 
|---|
| 169 |  | 
|---|
| 170 | for (pos=0 ; (j=typelib->type_names[pos]) ; pos++) | 
|---|
| 171 | { | 
|---|
| 172 | if (!my_strnncoll(cs, (const uchar*) x, length, | 
|---|
| 173 | (const uchar*) j, typelib->type_lengths[pos])) | 
|---|
| 174 | DBUG_RETURN(pos+1); | 
|---|
| 175 | } | 
|---|
| 176 | DBUG_PRINT( "exit",( "Couldn't find type")); | 
|---|
| 177 | DBUG_RETURN(0); | 
|---|
| 178 | } /* find_type */ | 
|---|
| 179 |  | 
|---|
| 180 |  | 
|---|
| 181 | /* | 
|---|
| 182 | Un-hex all elements in a typelib | 
|---|
| 183 |  | 
|---|
| 184 | SYNOPSIS | 
|---|
| 185 | unhex_type2() | 
|---|
| 186 | interval       TYPELIB (struct of pointer to values + lengths + count) | 
|---|
| 187 |  | 
|---|
| 188 | NOTES | 
|---|
| 189 |  | 
|---|
| 190 | RETURN | 
|---|
| 191 | N/A | 
|---|
| 192 | */ | 
|---|
| 193 |  | 
|---|
| 194 | void unhex_type2(TYPELIB *interval) | 
|---|
| 195 | { | 
|---|
| 196 | for (uint pos= 0; pos < interval->count; pos++) | 
|---|
| 197 | { | 
|---|
| 198 | char *from, *to; | 
|---|
| 199 | for (from= to= (char*) interval->type_names[pos]; *from; ) | 
|---|
| 200 | { | 
|---|
| 201 | /* | 
|---|
| 202 | Note, hexchar_to_int(*from++) doesn't work | 
|---|
| 203 | one some compilers, e.g. IRIX. Looks like a compiler | 
|---|
| 204 | bug in inline functions in combination with arguments | 
|---|
| 205 | that have a side effect. So, let's use from[0] and from[1] | 
|---|
| 206 | and increment 'from' by two later. | 
|---|
| 207 | */ | 
|---|
| 208 |  | 
|---|
| 209 | *to++= (char) (hexchar_to_int(from[0]) << 4) + | 
|---|
| 210 | hexchar_to_int(from[1]); | 
|---|
| 211 | from+= 2; | 
|---|
| 212 | } | 
|---|
| 213 | interval->type_lengths[pos] /= 2; | 
|---|
| 214 | } | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 |  | 
|---|
| 218 | /* | 
|---|
| 219 | Check if the first word in a string is one of the ones in TYPELIB | 
|---|
| 220 |  | 
|---|
| 221 | SYNOPSIS | 
|---|
| 222 | check_word() | 
|---|
| 223 | lib		TYPELIB | 
|---|
| 224 | val		String to check | 
|---|
| 225 | end		End of input | 
|---|
| 226 | end_of_word	Store value of last used byte here if we found word | 
|---|
| 227 |  | 
|---|
| 228 | RETURN | 
|---|
| 229 | 0	 No matching value | 
|---|
| 230 | > 1  lib->type_names[#-1] matched | 
|---|
| 231 | end_of_word will point to separator character/end in 'val' | 
|---|
| 232 | */ | 
|---|
| 233 |  | 
|---|
| 234 | uint check_word(TYPELIB *lib, const char *val, const char *end, | 
|---|
| 235 | const char **end_of_word) | 
|---|
| 236 | { | 
|---|
| 237 | int res; | 
|---|
| 238 | const char *ptr; | 
|---|
| 239 |  | 
|---|
| 240 | /* Fiend end of word */ | 
|---|
| 241 | for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++) | 
|---|
| 242 | ; | 
|---|
| 243 | if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0) | 
|---|
| 244 | *end_of_word= ptr; | 
|---|
| 245 | return res; | 
|---|
| 246 | } | 
|---|
| 247 |  | 
|---|
| 248 |  | 
|---|
| 249 | /* | 
|---|
| 250 | Converts a string between character sets | 
|---|
| 251 |  | 
|---|
| 252 | SYNOPSIS | 
|---|
| 253 | strconvert() | 
|---|
| 254 | from_cs       source character set | 
|---|
| 255 | from          source, a null terminated string | 
|---|
| 256 | to            destination buffer | 
|---|
| 257 | to_length     destination buffer length | 
|---|
| 258 |  | 
|---|
| 259 | NOTES | 
|---|
| 260 | 'to' is always terminated with a '\0' character. | 
|---|
| 261 | If there is no enough space to convert whole string, | 
|---|
| 262 | only prefix is converted, and terminated with '\0'. | 
|---|
| 263 |  | 
|---|
| 264 | RETURN VALUES | 
|---|
| 265 | result string length | 
|---|
| 266 | */ | 
|---|
| 267 |  | 
|---|
| 268 |  | 
|---|
| 269 | uint strconvert(CHARSET_INFO *from_cs, const char *from, size_t from_length, | 
|---|
| 270 | CHARSET_INFO *to_cs, char *to, size_t to_length, uint *errors) | 
|---|
| 271 | { | 
|---|
| 272 | int cnvres; | 
|---|
| 273 | my_wc_t wc; | 
|---|
| 274 | char *to_start= to; | 
|---|
| 275 | uchar *to_end= (uchar*) to + to_length - 1; | 
|---|
| 276 | const uchar *from_end= (const uchar*) from + from_length; | 
|---|
| 277 | my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc; | 
|---|
| 278 | my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb; | 
|---|
| 279 | uint error_count= 0; | 
|---|
| 280 |  | 
|---|
| 281 | while (1) | 
|---|
| 282 | { | 
|---|
| 283 | if ((cnvres= (*mb_wc)(from_cs, &wc, | 
|---|
| 284 | (uchar*) from, from_end)) > 0) | 
|---|
| 285 | { | 
|---|
| 286 | if (!wc) | 
|---|
| 287 | break; | 
|---|
| 288 | from+= cnvres; | 
|---|
| 289 | } | 
|---|
| 290 | else if (cnvres == MY_CS_ILSEQ) | 
|---|
| 291 | { | 
|---|
| 292 | error_count++; | 
|---|
| 293 | from++; | 
|---|
| 294 | wc= '?'; | 
|---|
| 295 | } | 
|---|
| 296 | else | 
|---|
| 297 | break; // Impossible char. | 
|---|
| 298 |  | 
|---|
| 299 | outp: | 
|---|
| 300 |  | 
|---|
| 301 | if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) | 
|---|
| 302 | to+= cnvres; | 
|---|
| 303 | else if (cnvres == MY_CS_ILUNI && wc != '?') | 
|---|
| 304 | { | 
|---|
| 305 | error_count++; | 
|---|
| 306 | wc= '?'; | 
|---|
| 307 | goto outp; | 
|---|
| 308 | } | 
|---|
| 309 | else | 
|---|
| 310 | break; | 
|---|
| 311 | } | 
|---|
| 312 | *to= '\0'; | 
|---|
| 313 | *errors= error_count; | 
|---|
| 314 | return (uint32) (to - to_start); | 
|---|
| 315 |  | 
|---|
| 316 | } | 
|---|
| 317 |  | 
|---|
| 318 |  | 
|---|
| 319 | /* | 
|---|
| 320 | Searches for a LEX_STRING in an LEX_STRING array. | 
|---|
| 321 |  | 
|---|
| 322 | SYNOPSIS | 
|---|
| 323 | find_string_in_array() | 
|---|
| 324 | heap    The array | 
|---|
| 325 | needle  The string to search for | 
|---|
| 326 |  | 
|---|
| 327 | NOTE | 
|---|
| 328 | The last LEX_STRING in the array should have str member set to NULL | 
|---|
| 329 |  | 
|---|
| 330 | RETURN VALUES | 
|---|
| 331 | -1   Not found | 
|---|
| 332 | >=0  Ordinal position | 
|---|
| 333 | */ | 
|---|
| 334 |  | 
|---|
| 335 | int find_string_in_array(LEX_CSTRING * const haystack, LEX_CSTRING * const needle, | 
|---|
| 336 | CHARSET_INFO * const cs) | 
|---|
| 337 | { | 
|---|
| 338 | const LEX_CSTRING *pos; | 
|---|
| 339 | for (pos= haystack; pos->str; pos++) | 
|---|
| 340 | if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length, | 
|---|
| 341 | (uchar *) needle->str, needle->length)) | 
|---|
| 342 | { | 
|---|
| 343 | return (int)(pos - haystack); | 
|---|
| 344 | } | 
|---|
| 345 | return -1; | 
|---|
| 346 | } | 
|---|
| 347 |  | 
|---|
| 348 |  | 
|---|
| 349 | const char *set_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, | 
|---|
| 350 | const char *lib[]) | 
|---|
| 351 | { | 
|---|
| 352 | char buff[STRING_BUFFER_USUAL_SIZE*8]; | 
|---|
| 353 | String tmp(buff, sizeof(buff), &my_charset_latin1); | 
|---|
| 354 | LEX_CSTRING unused; | 
|---|
| 355 |  | 
|---|
| 356 | if (!result) | 
|---|
| 357 | result= &unused; | 
|---|
| 358 |  | 
|---|
| 359 | tmp.length(0); | 
|---|
| 360 |  | 
|---|
| 361 | for (uint i= 0; set; i++, set >>= 1) | 
|---|
| 362 | if (set & 1) { | 
|---|
| 363 | tmp.append(lib[i]); | 
|---|
| 364 | tmp.append(','); | 
|---|
| 365 | } | 
|---|
| 366 |  | 
|---|
| 367 | if (tmp.length()) | 
|---|
| 368 | { | 
|---|
| 369 | result->str=    thd->strmake(tmp.ptr(), tmp.length()-1); | 
|---|
| 370 | result->length= tmp.length()-1; | 
|---|
| 371 | } | 
|---|
| 372 | else | 
|---|
| 373 | { | 
|---|
| 374 | result->str= const_cast<char*>( ""); | 
|---|
| 375 | result->length= 0; | 
|---|
| 376 | } | 
|---|
| 377 | return result->str; | 
|---|
| 378 | } | 
|---|
| 379 |  | 
|---|
| 380 | const char *flagset_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, | 
|---|
| 381 | const char *lib[]) | 
|---|
| 382 | { | 
|---|
| 383 | char buff[STRING_BUFFER_USUAL_SIZE*8]; | 
|---|
| 384 | String tmp(buff, sizeof(buff), &my_charset_latin1); | 
|---|
| 385 | LEX_CSTRING unused; | 
|---|
| 386 |  | 
|---|
| 387 | if (!result) result= &unused; | 
|---|
| 388 |  | 
|---|
| 389 | tmp.length(0); | 
|---|
| 390 |  | 
|---|
| 391 | // note that the last element is always "default", and it's ignored below | 
|---|
| 392 | for (uint i= 0; lib[i+1]; i++, set >>= 1) | 
|---|
| 393 | { | 
|---|
| 394 | tmp.append(lib[i]); | 
|---|
| 395 | tmp.append(set & 1 ? "=on,": "=off,"); | 
|---|
| 396 | } | 
|---|
| 397 |  | 
|---|
| 398 | result->str=    thd->strmake(tmp.ptr(), tmp.length()-1); | 
|---|
| 399 | result->length= tmp.length()-1; | 
|---|
| 400 |  | 
|---|
| 401 | return result->str; | 
|---|
| 402 | } | 
|---|
| 403 |  | 
|---|
| 404 |  | 
|---|