| 1 | /* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. |
| 2 | Copyright (c) 2017, MariaDB Corporation. |
| 3 | |
| 4 | This program is free software; you can redistribute it and/or modify |
| 5 | it under the terms of the GNU General Public License as published by |
| 6 | the Free Software Foundation; version 2 of the License. |
| 7 | |
| 8 | This program is distributed in the hope that it will be useful, |
| 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License |
| 14 | along with this program; if not, write to the Free Software Foundation, |
| 15 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ |
| 16 | |
| 17 | /* |
| 18 | This code needs extra visibility in the lexer structures |
| 19 | */ |
| 20 | |
| 21 | #include "mariadb.h" |
| 22 | #include "my_md5.h" |
| 23 | #include "unireg.h" |
| 24 | |
| 25 | #include "sql_string.h" |
| 26 | #include "sql_class.h" |
| 27 | #include "sql_lex.h" |
| 28 | #include "sp_pcontext.h" |
| 29 | #include "sql_digest.h" |
| 30 | #include "sql_digest_stream.h" |
| 31 | |
| 32 | #include "sql_get_diagnostics.h" |
| 33 | |
| 34 | /* Generated code */ |
| 35 | #include "sql_yacc.h" |
| 36 | #define LEX_TOKEN_WITH_DEFINITION |
| 37 | #include "lex_token.h" |
| 38 | |
| 39 | /* Name pollution from sql/sql_lex.h */ |
| 40 | #ifdef LEX_YYSTYPE |
| 41 | #undef LEX_YYSTYPE |
| 42 | #endif |
| 43 | |
| 44 | #define LEX_YYSTYPE YYSTYPE* |
| 45 | |
| 46 | #define SIZE_OF_A_TOKEN 2 |
| 47 | |
| 48 | /** |
| 49 | Read a single token from token array. |
| 50 | */ |
| 51 | inline uint read_token(const sql_digest_storage *digest_storage, |
| 52 | uint index, uint *tok) |
| 53 | { |
| 54 | uint safe_byte_count= digest_storage->m_byte_count; |
| 55 | |
| 56 | if (index + SIZE_OF_A_TOKEN <= safe_byte_count && |
| 57 | safe_byte_count <= digest_storage->m_token_array_length) |
| 58 | { |
| 59 | const unsigned char *src= & digest_storage->m_token_array[index]; |
| 60 | *tok= src[0] | (src[1] << 8); |
| 61 | return index + SIZE_OF_A_TOKEN; |
| 62 | } |
| 63 | |
| 64 | /* The input byte stream is exhausted. */ |
| 65 | *tok= 0; |
| 66 | return MAX_DIGEST_STORAGE_SIZE + 1; |
| 67 | } |
| 68 | |
| 69 | /** |
| 70 | Store a single token in token array. |
| 71 | */ |
| 72 | inline void store_token(sql_digest_storage* digest_storage, uint token) |
| 73 | { |
| 74 | DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length); |
| 75 | |
| 76 | if (digest_storage->m_byte_count + SIZE_OF_A_TOKEN <= digest_storage->m_token_array_length) |
| 77 | { |
| 78 | unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count]; |
| 79 | dest[0]= token & 0xff; |
| 80 | dest[1]= (token >> 8) & 0xff; |
| 81 | digest_storage->m_byte_count+= SIZE_OF_A_TOKEN; |
| 82 | } |
| 83 | else |
| 84 | { |
| 85 | digest_storage->m_full= true; |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | /** |
| 90 | Read an identifier from token array. |
| 91 | */ |
| 92 | inline uint read_identifier(const sql_digest_storage* digest_storage, |
| 93 | uint index, char ** id_string, int *id_length) |
| 94 | { |
| 95 | uint new_index; |
| 96 | uint safe_byte_count= digest_storage->m_byte_count; |
| 97 | |
| 98 | DBUG_ASSERT(index <= safe_byte_count); |
| 99 | DBUG_ASSERT(safe_byte_count <= digest_storage->m_token_array_length); |
| 100 | |
| 101 | /* |
| 102 | token + length + string are written in an atomic way, |
| 103 | so we do always expect a length + string here |
| 104 | */ |
| 105 | |
| 106 | uint bytes_needed= SIZE_OF_A_TOKEN; |
| 107 | /* If we can read token and identifier length */ |
| 108 | if ((index + bytes_needed) <= safe_byte_count) |
| 109 | { |
| 110 | const unsigned char *src= & digest_storage->m_token_array[index]; |
| 111 | /* Read the length of identifier */ |
| 112 | uint length= src[0] | (src[1] << 8); |
| 113 | bytes_needed+= length; |
| 114 | /* If we can read entire identifier from token array */ |
| 115 | if ((index + bytes_needed) <= safe_byte_count) |
| 116 | { |
| 117 | *id_string= (char *) (src + 2); |
| 118 | *id_length= length; |
| 119 | |
| 120 | new_index= index + bytes_needed; |
| 121 | DBUG_ASSERT(new_index <= safe_byte_count); |
| 122 | return new_index; |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | /* The input byte stream is exhausted. */ |
| 127 | return MAX_DIGEST_STORAGE_SIZE + 1; |
| 128 | } |
| 129 | |
| 130 | /** |
| 131 | Store an identifier in token array. |
| 132 | */ |
| 133 | inline void store_token_identifier(sql_digest_storage* digest_storage, |
| 134 | uint token, |
| 135 | size_t id_length, const char *id_name) |
| 136 | { |
| 137 | DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length); |
| 138 | |
| 139 | size_t bytes_needed= 2 * SIZE_OF_A_TOKEN + id_length; |
| 140 | if (digest_storage->m_byte_count + bytes_needed <= (unsigned int)digest_storage->m_token_array_length) |
| 141 | { |
| 142 | unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count]; |
| 143 | /* Write the token */ |
| 144 | dest[0]= token & 0xff; |
| 145 | dest[1]= (token >> 8) & 0xff; |
| 146 | /* Write the string length */ |
| 147 | dest[2]= id_length & 0xff; |
| 148 | dest[3]= (id_length >> 8) & 0xff; |
| 149 | /* Write the string data */ |
| 150 | if (id_length > 0) |
| 151 | memcpy((char *)(dest + 4), id_name, id_length); |
| 152 | digest_storage->m_byte_count+= (uint)bytes_needed; |
| 153 | } |
| 154 | else |
| 155 | { |
| 156 | digest_storage->m_full= true; |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | void compute_digest_md5(const sql_digest_storage *digest_storage, unsigned char *md5) |
| 161 | { |
| 162 | compute_md5_hash(md5, |
| 163 | (const char *) digest_storage->m_token_array, |
| 164 | digest_storage->m_byte_count); |
| 165 | } |
| 166 | |
| 167 | /* |
| 168 | Iterate token array and updates digest_text. |
| 169 | */ |
| 170 | void compute_digest_text(const sql_digest_storage* digest_storage, |
| 171 | String *digest_text) |
| 172 | { |
| 173 | DBUG_ASSERT(digest_storage != NULL); |
| 174 | uint byte_count= digest_storage->m_byte_count; |
| 175 | String *digest_output= digest_text; |
| 176 | uint tok= 0; |
| 177 | uint current_byte= 0; |
| 178 | lex_token_string *tok_data; |
| 179 | |
| 180 | /* Reset existing data */ |
| 181 | digest_output->length(0); |
| 182 | |
| 183 | if (byte_count > digest_storage->m_token_array_length) |
| 184 | { |
| 185 | digest_output->append("\0" , 1); |
| 186 | return; |
| 187 | } |
| 188 | |
| 189 | /* Convert text to utf8 */ |
| 190 | const CHARSET_INFO *from_cs= get_charset(digest_storage->m_charset_number, MYF(0)); |
| 191 | const CHARSET_INFO *to_cs= &my_charset_utf8_bin; |
| 192 | |
| 193 | if (from_cs == NULL) |
| 194 | { |
| 195 | /* |
| 196 | Can happen, as we do dirty reads on digest_storage, |
| 197 | which can be written to in another thread. |
| 198 | */ |
| 199 | digest_output->append("\0" , 1); |
| 200 | return; |
| 201 | } |
| 202 | |
| 203 | char id_buffer[NAME_LEN + 1]= {'\0'}; |
| 204 | char *id_string; |
| 205 | size_t id_length; |
| 206 | bool convert_text= !my_charset_same(from_cs, to_cs); |
| 207 | |
| 208 | while (current_byte < byte_count) |
| 209 | { |
| 210 | current_byte= read_token(digest_storage, current_byte, &tok); |
| 211 | |
| 212 | if (tok <= 0 || tok >= array_elements(lex_token_array) |
| 213 | || current_byte > max_digest_length) |
| 214 | return; |
| 215 | |
| 216 | tok_data= &lex_token_array[tok]; |
| 217 | |
| 218 | switch (tok) |
| 219 | { |
| 220 | /* All identifiers are printed with their name. */ |
| 221 | case IDENT: |
| 222 | case IDENT_QUOTED: |
| 223 | case TOK_IDENT: |
| 224 | { |
| 225 | char *id_ptr= NULL; |
| 226 | int id_len= 0; |
| 227 | uint err_cs= 0; |
| 228 | |
| 229 | /* Get the next identifier from the storage buffer. */ |
| 230 | current_byte= read_identifier(digest_storage, current_byte, |
| 231 | &id_ptr, &id_len); |
| 232 | if (current_byte > max_digest_length) |
| 233 | return; |
| 234 | |
| 235 | if (convert_text) |
| 236 | { |
| 237 | /* Verify that the converted text will fit. */ |
| 238 | if (to_cs->mbmaxlen*id_len > NAME_LEN) |
| 239 | { |
| 240 | digest_output->append("..." , 3); |
| 241 | break; |
| 242 | } |
| 243 | /* Convert identifier string into the storage character set. */ |
| 244 | id_length= my_convert(id_buffer, NAME_LEN, to_cs, |
| 245 | id_ptr, id_len, from_cs, &err_cs); |
| 246 | id_string= id_buffer; |
| 247 | } |
| 248 | else |
| 249 | { |
| 250 | id_string= id_ptr; |
| 251 | id_length= id_len; |
| 252 | } |
| 253 | |
| 254 | if (id_length == 0 || err_cs != 0) |
| 255 | { |
| 256 | break; |
| 257 | } |
| 258 | /* Copy the converted identifier into the digest string. */ |
| 259 | digest_output->append("`" , 1); |
| 260 | if (id_length > 0) |
| 261 | digest_output->append(id_string, id_length); |
| 262 | digest_output->append("` " , 2); |
| 263 | } |
| 264 | break; |
| 265 | |
| 266 | /* Everything else is printed as is. */ |
| 267 | default: |
| 268 | /* |
| 269 | Make sure not to overflow digest_text buffer. |
| 270 | +1 is to make sure extra space for ' '. |
| 271 | */ |
| 272 | int tok_length= tok_data->m_token_length; |
| 273 | |
| 274 | digest_output->append(tok_data->m_token_string, tok_length); |
| 275 | if (tok_data->m_append_space) |
| 276 | digest_output->append(" " , 1); |
| 277 | break; |
| 278 | } |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | static inline uint peek_token(const sql_digest_storage *digest, uint index) |
| 283 | { |
| 284 | uint token; |
| 285 | DBUG_ASSERT(index + SIZE_OF_A_TOKEN <= digest->m_byte_count); |
| 286 | DBUG_ASSERT(digest->m_byte_count <= digest->m_token_array_length); |
| 287 | |
| 288 | token= ((digest->m_token_array[index + 1])<<8) | digest->m_token_array[index]; |
| 289 | return token; |
| 290 | } |
| 291 | |
| 292 | /** |
| 293 | Function to read last two tokens from token array. If an identifier |
| 294 | is found, do not look for token before that. |
| 295 | */ |
| 296 | static inline void peek_last_two_tokens(const sql_digest_storage* digest_storage, |
| 297 | uint last_id_index, uint *t1, uint *t2) |
| 298 | { |
| 299 | uint byte_count= digest_storage->m_byte_count; |
| 300 | uint peek_index= byte_count; |
| 301 | |
| 302 | if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) |
| 303 | { |
| 304 | /* Take last token. */ |
| 305 | peek_index-= SIZE_OF_A_TOKEN; |
| 306 | *t1= peek_token(digest_storage, peek_index); |
| 307 | |
| 308 | if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) |
| 309 | { |
| 310 | /* Take 2nd token from last. */ |
| 311 | peek_index-= SIZE_OF_A_TOKEN; |
| 312 | *t2= peek_token(digest_storage, peek_index); |
| 313 | } |
| 314 | else |
| 315 | { |
| 316 | *t2= TOK_UNUSED; |
| 317 | } |
| 318 | } |
| 319 | else |
| 320 | { |
| 321 | *t1= TOK_UNUSED; |
| 322 | *t2= TOK_UNUSED; |
| 323 | } |
| 324 | } |
| 325 | |
| 326 | /** |
| 327 | Function to read last three tokens from token array. If an identifier |
| 328 | is found, do not look for token before that. |
| 329 | */ |
| 330 | static inline void peek_last_three_tokens(const sql_digest_storage* digest_storage, |
| 331 | uint last_id_index, uint *t1, uint *t2, uint *t3) |
| 332 | { |
| 333 | uint byte_count= digest_storage->m_byte_count; |
| 334 | uint peek_index= byte_count; |
| 335 | |
| 336 | if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) |
| 337 | { |
| 338 | /* Take last token. */ |
| 339 | peek_index-= SIZE_OF_A_TOKEN; |
| 340 | *t1= peek_token(digest_storage, peek_index); |
| 341 | |
| 342 | if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) |
| 343 | { |
| 344 | /* Take 2nd token from last. */ |
| 345 | peek_index-= SIZE_OF_A_TOKEN; |
| 346 | *t2= peek_token(digest_storage, peek_index); |
| 347 | |
| 348 | if (last_id_index + SIZE_OF_A_TOKEN <= peek_index) |
| 349 | { |
| 350 | /* Take 3rd token from last. */ |
| 351 | peek_index-= SIZE_OF_A_TOKEN; |
| 352 | *t3= peek_token(digest_storage, peek_index); |
| 353 | } |
| 354 | else |
| 355 | { |
| 356 | *t3= TOK_UNUSED; |
| 357 | } |
| 358 | } |
| 359 | else |
| 360 | { |
| 361 | *t2= TOK_UNUSED; |
| 362 | *t3= TOK_UNUSED; |
| 363 | } |
| 364 | } |
| 365 | else |
| 366 | { |
| 367 | *t1= TOK_UNUSED; |
| 368 | *t2= TOK_UNUSED; |
| 369 | *t3= TOK_UNUSED; |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | sql_digest_state* digest_add_token(sql_digest_state *state, |
| 374 | uint token, |
| 375 | LEX_YYSTYPE yylval) |
| 376 | { |
| 377 | sql_digest_storage *digest_storage= NULL; |
| 378 | |
| 379 | digest_storage= &state->m_digest_storage; |
| 380 | |
| 381 | /* |
| 382 | Stop collecting further tokens if digest storage is full or |
| 383 | if END token is received. |
| 384 | */ |
| 385 | if (digest_storage->m_full || token == END_OF_INPUT) |
| 386 | return NULL; |
| 387 | |
| 388 | /* |
| 389 | Take last_token 2 tokens collected till now. These tokens will be used |
| 390 | in reduce for normalisation. Make sure not to consider ID tokens in reduce. |
| 391 | */ |
| 392 | uint last_token; |
| 393 | uint last_token2; |
| 394 | |
| 395 | switch (token) |
| 396 | { |
| 397 | case NUM: |
| 398 | case LONG_NUM: |
| 399 | case ULONGLONG_NUM: |
| 400 | case DECIMAL_NUM: |
| 401 | case FLOAT_NUM: |
| 402 | case BIN_NUM: |
| 403 | case HEX_NUM: |
| 404 | { |
| 405 | bool found_unary; |
| 406 | do |
| 407 | { |
| 408 | found_unary= false; |
| 409 | peek_last_two_tokens(digest_storage, state->m_last_id_index, |
| 410 | &last_token, &last_token2); |
| 411 | |
| 412 | if ((last_token == '-') || (last_token == '+')) |
| 413 | { |
| 414 | /* |
| 415 | We need to differentiate: |
| 416 | - a <unary minus> operator |
| 417 | - a <unary plus> operator |
| 418 | from |
| 419 | - a <binary minus> operator |
| 420 | - a <binary plus> operator |
| 421 | to only reduce "a = -1" to "a = ?", and not change "b - 1" to "b ?" |
| 422 | |
| 423 | Binary operators are found inside an expression, |
| 424 | while unary operators are found at the beginning of an expression, or after operators. |
| 425 | |
| 426 | To achieve this, every token that is followed by an <expr> expression |
| 427 | in the SQL grammar is flagged. |
| 428 | See sql/sql_yacc.yy |
| 429 | See sql/gen_lex_token.cc |
| 430 | |
| 431 | For example, |
| 432 | "(-1)" is parsed as "(", "-", NUM, ")", and lex_token_array["("].m_start_expr is true, |
| 433 | so reduction of the "-" NUM is done, the result is "(?)". |
| 434 | "(a-1)" is parsed as "(", ID, "-", NUM, ")", and lex_token_array[ID].m_start_expr is false, |
| 435 | so the operator is binary, no reduction is done, and the result is "(a-?)". |
| 436 | */ |
| 437 | if (lex_token_array[last_token2].m_start_expr) |
| 438 | { |
| 439 | /* |
| 440 | REDUCE: |
| 441 | TOK_GENERIC_VALUE := (UNARY_PLUS | UNARY_MINUS) (NUM | LOG_NUM | ... | FLOAT_NUM) |
| 442 | |
| 443 | REDUCE: |
| 444 | TOK_GENERIC_VALUE := (UNARY_PLUS | UNARY_MINUS) TOK_GENERIC_VALUE |
| 445 | */ |
| 446 | token= TOK_GENERIC_VALUE; |
| 447 | digest_storage->m_byte_count-= SIZE_OF_A_TOKEN; |
| 448 | found_unary= true; |
| 449 | } |
| 450 | } |
| 451 | } while (found_unary); |
| 452 | } |
| 453 | /* for case NULL_SYM below */ |
| 454 | /* fall through */ |
| 455 | case LEX_HOSTNAME: |
| 456 | case TEXT_STRING: |
| 457 | case NCHAR_STRING: |
| 458 | case PARAM_MARKER: |
| 459 | { |
| 460 | /* |
| 461 | REDUCE: |
| 462 | TOK_GENERIC_VALUE := BIN_NUM | DECIMAL_NUM | ... | ULONGLONG_NUM |
| 463 | */ |
| 464 | token= TOK_GENERIC_VALUE; |
| 465 | |
| 466 | peek_last_two_tokens(digest_storage, state->m_last_id_index, |
| 467 | &last_token, &last_token2); |
| 468 | |
| 469 | if ((last_token2 == TOK_GENERIC_VALUE || |
| 470 | last_token2 == TOK_GENERIC_VALUE_LIST) && |
| 471 | (last_token == ',')) |
| 472 | { |
| 473 | /* |
| 474 | REDUCE: |
| 475 | TOK_GENERIC_VALUE_LIST := |
| 476 | TOK_GENERIC_VALUE ',' TOK_GENERIC_VALUE |
| 477 | |
| 478 | REDUCE: |
| 479 | TOK_GENERIC_VALUE_LIST := |
| 480 | TOK_GENERIC_VALUE_LIST ',' TOK_GENERIC_VALUE |
| 481 | */ |
| 482 | digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; |
| 483 | token= TOK_GENERIC_VALUE_LIST; |
| 484 | } |
| 485 | /* |
| 486 | Add this token or the resulting reduce to digest storage. |
| 487 | */ |
| 488 | store_token(digest_storage, token); |
| 489 | break; |
| 490 | } |
| 491 | case ')': |
| 492 | { |
| 493 | peek_last_two_tokens(digest_storage, state->m_last_id_index, |
| 494 | &last_token, &last_token2); |
| 495 | |
| 496 | if (last_token == TOK_GENERIC_VALUE && |
| 497 | last_token2 == '(') |
| 498 | { |
| 499 | /* |
| 500 | REDUCE: |
| 501 | TOK_ROW_SINGLE_VALUE := |
| 502 | '(' TOK_GENERIC_VALUE ')' |
| 503 | */ |
| 504 | digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; |
| 505 | token= TOK_ROW_SINGLE_VALUE; |
| 506 | |
| 507 | /* Read last two tokens again */ |
| 508 | peek_last_two_tokens(digest_storage, state->m_last_id_index, |
| 509 | &last_token, &last_token2); |
| 510 | |
| 511 | if ((last_token2 == TOK_ROW_SINGLE_VALUE || |
| 512 | last_token2 == TOK_ROW_SINGLE_VALUE_LIST) && |
| 513 | (last_token == ',')) |
| 514 | { |
| 515 | /* |
| 516 | REDUCE: |
| 517 | TOK_ROW_SINGLE_VALUE_LIST := |
| 518 | TOK_ROW_SINGLE_VALUE ',' TOK_ROW_SINGLE_VALUE |
| 519 | |
| 520 | REDUCE: |
| 521 | TOK_ROW_SINGLE_VALUE_LIST := |
| 522 | TOK_ROW_SINGLE_VALUE_LIST ',' TOK_ROW_SINGLE_VALUE |
| 523 | */ |
| 524 | digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; |
| 525 | token= TOK_ROW_SINGLE_VALUE_LIST; |
| 526 | } |
| 527 | } |
| 528 | else if (last_token == TOK_GENERIC_VALUE_LIST && |
| 529 | last_token2 == '(') |
| 530 | { |
| 531 | /* |
| 532 | REDUCE: |
| 533 | TOK_ROW_MULTIPLE_VALUE := |
| 534 | '(' TOK_GENERIC_VALUE_LIST ')' |
| 535 | */ |
| 536 | digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; |
| 537 | token= TOK_ROW_MULTIPLE_VALUE; |
| 538 | |
| 539 | /* Read last two tokens again */ |
| 540 | peek_last_two_tokens(digest_storage, state->m_last_id_index, |
| 541 | &last_token, &last_token2); |
| 542 | |
| 543 | if ((last_token2 == TOK_ROW_MULTIPLE_VALUE || |
| 544 | last_token2 == TOK_ROW_MULTIPLE_VALUE_LIST) && |
| 545 | (last_token == ',')) |
| 546 | { |
| 547 | /* |
| 548 | REDUCE: |
| 549 | TOK_ROW_MULTIPLE_VALUE_LIST := |
| 550 | TOK_ROW_MULTIPLE_VALUE ',' TOK_ROW_MULTIPLE_VALUE |
| 551 | |
| 552 | REDUCE: |
| 553 | TOK_ROW_MULTIPLE_VALUE_LIST := |
| 554 | TOK_ROW_MULTIPLE_VALUE_LIST ',' TOK_ROW_MULTIPLE_VALUE |
| 555 | */ |
| 556 | digest_storage->m_byte_count-= 2*SIZE_OF_A_TOKEN; |
| 557 | token= TOK_ROW_MULTIPLE_VALUE_LIST; |
| 558 | } |
| 559 | } |
| 560 | /* |
| 561 | Add this token or the resulting reduce to digest storage. |
| 562 | */ |
| 563 | store_token(digest_storage, token); |
| 564 | break; |
| 565 | } |
| 566 | case IDENT: |
| 567 | case IDENT_QUOTED: |
| 568 | { |
| 569 | YYSTYPE *lex_token= yylval; |
| 570 | const char *yytext= lex_token->lex_str.str; |
| 571 | size_t yylen= lex_token->lex_str.length; |
| 572 | |
| 573 | /* |
| 574 | REDUCE: |
| 575 | TOK_IDENT := IDENT | IDENT_QUOTED |
| 576 | The parser gives IDENT or IDENT_TOKEN for the same text, |
| 577 | depending on the character set used. |
| 578 | We unify both to always print the same digest text, |
| 579 | and always have the same digest hash. |
| 580 | */ |
| 581 | token= TOK_IDENT; |
| 582 | /* Add this token and identifier string to digest storage. */ |
| 583 | store_token_identifier(digest_storage, token, yylen, yytext); |
| 584 | |
| 585 | /* Update the index of last identifier found. */ |
| 586 | state->m_last_id_index= digest_storage->m_byte_count; |
| 587 | break; |
| 588 | } |
| 589 | default: |
| 590 | { |
| 591 | /* Add this token to digest storage. */ |
| 592 | store_token(digest_storage, token); |
| 593 | break; |
| 594 | } |
| 595 | } |
| 596 | |
| 597 | return state; |
| 598 | } |
| 599 | |
| 600 | sql_digest_state* digest_reduce_token(sql_digest_state *state, |
| 601 | uint token_left, uint token_right) |
| 602 | { |
| 603 | sql_digest_storage *digest_storage= NULL; |
| 604 | |
| 605 | digest_storage= &state->m_digest_storage; |
| 606 | |
| 607 | /* |
| 608 | Stop collecting further tokens if digest storage is full. |
| 609 | */ |
| 610 | if (digest_storage->m_full) |
| 611 | return NULL; |
| 612 | |
| 613 | uint last_token; |
| 614 | uint last_token2; |
| 615 | uint last_token3; |
| 616 | uint token_to_push= TOK_UNUSED; |
| 617 | |
| 618 | peek_last_two_tokens(digest_storage, state->m_last_id_index, |
| 619 | &last_token, &last_token2); |
| 620 | |
| 621 | /* |
| 622 | There is only one caller of digest_reduce_token(), |
| 623 | see sql/sql_yacc.yy, rule literal := NULL_SYM. |
| 624 | REDUCE: |
| 625 | token_left := token_right |
| 626 | Used for: |
| 627 | TOK_GENERIC_VALUE := NULL_SYM |
| 628 | */ |
| 629 | |
| 630 | if (last_token == token_right) |
| 631 | { |
| 632 | /* |
| 633 | Current stream is like: |
| 634 | TOKEN_X TOKEN_RIGHT . |
| 635 | REDUCE to |
| 636 | TOKEN_X TOKEN_LEFT . |
| 637 | */ |
| 638 | digest_storage->m_byte_count-= SIZE_OF_A_TOKEN; |
| 639 | store_token(digest_storage, token_left); |
| 640 | } |
| 641 | else |
| 642 | { |
| 643 | /* |
| 644 | Current stream is like: |
| 645 | TOKEN_X TOKEN_RIGHT TOKEN_Y . |
| 646 | Pop TOKEN_Y |
| 647 | TOKEN_X TOKEN_RIGHT . TOKEN_Y |
| 648 | REDUCE to |
| 649 | TOKEN_X TOKEN_LEFT . TOKEN_Y |
| 650 | */ |
| 651 | DBUG_ASSERT(last_token2 == token_right); |
| 652 | digest_storage->m_byte_count-= 2 * SIZE_OF_A_TOKEN; |
| 653 | store_token(digest_storage, token_left); |
| 654 | token_to_push= last_token; |
| 655 | } |
| 656 | |
| 657 | peek_last_three_tokens(digest_storage, state->m_last_id_index, |
| 658 | &last_token, &last_token2, &last_token3); |
| 659 | |
| 660 | if ((last_token3 == TOK_GENERIC_VALUE || |
| 661 | last_token3 == TOK_GENERIC_VALUE_LIST) && |
| 662 | (last_token2 == ',') && |
| 663 | (last_token == TOK_GENERIC_VALUE)) |
| 664 | { |
| 665 | /* |
| 666 | REDUCE: |
| 667 | TOK_GENERIC_VALUE_LIST := |
| 668 | TOK_GENERIC_VALUE ',' TOK_GENERIC_VALUE |
| 669 | |
| 670 | REDUCE: |
| 671 | TOK_GENERIC_VALUE_LIST := |
| 672 | TOK_GENERIC_VALUE_LIST ',' TOK_GENERIC_VALUE |
| 673 | */ |
| 674 | digest_storage->m_byte_count-= 3*SIZE_OF_A_TOKEN; |
| 675 | store_token(digest_storage, TOK_GENERIC_VALUE_LIST); |
| 676 | } |
| 677 | |
| 678 | if (token_to_push != TOK_UNUSED) |
| 679 | { |
| 680 | /* |
| 681 | Push TOKEN_Y |
| 682 | */ |
| 683 | store_token(digest_storage, token_to_push); |
| 684 | } |
| 685 | |
| 686 | return state; |
| 687 | } |
| 688 | |
| 689 | |