| 1 | #ifndef STRINGS_DEF_INCLUDED |
| 2 | #define STRINGS_DEF_INCLUDED |
| 3 | /* Copyright (C) 2011 Monty Program Ab |
| 4 | |
| 5 | This program is free software; you can redistribute it and/or modify |
| 6 | it under the terms of the GNU General Public License as published by |
| 7 | the Free Software Foundation; version 2 of the License. |
| 8 | |
| 9 | This program is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | GNU General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU General Public License |
| 15 | along with this program; if not, write to the Free Software |
| 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
| 17 | |
| 18 | /* This file is to be include first in all files in the string directory */ |
| 19 | |
| 20 | #undef DBUG_ASSERT_AS_PRINTF |
| 21 | #include <my_global.h> /* Define standard vars */ |
| 22 | #include "m_string.h" /* Exernal definitions of string functions */ |
| 23 | |
| 24 | /* |
| 25 | We can't use the original DBUG_ASSERT() (which includes _db_flush()) |
| 26 | in the strings library as libdbug is compiled after the the strings |
| 27 | library and we don't want to have strings depending on libdbug which |
| 28 | depends on mysys and strings. |
| 29 | */ |
| 30 | |
| 31 | #if !defined(DBUG_OFF) |
| 32 | #undef DBUG_ASSERT |
| 33 | #define DBUG_ASSERT(A) assert(A) |
| 34 | #endif |
| 35 | |
| 36 | #define MY_NOPAD_ID(x) ((x)+0x400) |
| 37 | |
| 38 | /* SPACE_INT is a word that contains only spaces */ |
| 39 | #if SIZEOF_INT == 4 |
| 40 | #define SPACE_INT 0x20202020 |
| 41 | #elif SIZEOF_INT == 8 |
| 42 | #define SPACE_INT 0x2020202020202020 |
| 43 | #else |
| 44 | #error define the appropriate constant for a word full of spaces |
| 45 | #endif |
| 46 | |
| 47 | /** |
| 48 | Skip trailing space. |
| 49 | |
| 50 | On most systems reading memory in larger chunks (ideally equal to the size of |
| 51 | the chinks that the machine physically reads from memory) causes fewer memory |
| 52 | access loops and hence increased performance. |
| 53 | This is why the 'int' type is used : it's closest to that (according to how |
| 54 | it's defined in C). |
| 55 | So when we determine the amount of whitespace at the end of a string we do |
| 56 | the following : |
| 57 | 1. We divide the string into 3 zones : |
| 58 | a) from the start of the string (__start) to the first multiple |
| 59 | of sizeof(int) (__start_words) |
| 60 | b) from the end of the string (__end) to the last multiple of sizeof(int) |
| 61 | (__end_words) |
| 62 | c) a zone that is aligned to sizeof(int) and can be safely accessed |
| 63 | through an int * |
| 64 | 2. We start comparing backwards from (c) char-by-char. If all we find is |
| 65 | space then we continue |
| 66 | 3. If there are elements in zone (b) we compare them as unsigned ints to a |
| 67 | int mask (SPACE_INT) consisting of all spaces |
| 68 | 4. Finally we compare the remaining part (a) of the string char by char. |
| 69 | This covers for the last non-space unsigned int from 3. (if any) |
| 70 | |
| 71 | This algorithm works well for relatively larger strings, but it will slow |
| 72 | the things down for smaller strings (because of the additional calculations |
| 73 | and checks compared to the naive method). Thus the barrier of length 20 |
| 74 | is added. |
| 75 | |
| 76 | @param ptr pointer to the input string |
| 77 | @param len the length of the string |
| 78 | @return the last non-space character |
| 79 | */ |
| 80 | |
| 81 | static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len) |
| 82 | { |
| 83 | const uchar *end= ptr + len; |
| 84 | |
| 85 | if (len > 20) |
| 86 | { |
| 87 | const uchar *end_words= (const uchar *)(intptr) |
| 88 | (((ulonglong)(intptr)end) / SIZEOF_INT * SIZEOF_INT); |
| 89 | const uchar *start_words= (const uchar *)(intptr) |
| 90 | ((((ulonglong)(intptr)ptr) + SIZEOF_INT - 1) / SIZEOF_INT * SIZEOF_INT); |
| 91 | |
| 92 | DBUG_ASSERT(((ulonglong)(intptr)ptr) >= SIZEOF_INT); |
| 93 | if (end_words > ptr) |
| 94 | { |
| 95 | while (end > end_words && end[-1] == 0x20) |
| 96 | end--; |
| 97 | if (end[-1] == 0x20 && start_words < end_words) |
| 98 | while (end > start_words && ((unsigned *)end)[-1] == SPACE_INT) |
| 99 | end -= SIZEOF_INT; |
| 100 | } |
| 101 | } |
| 102 | while (end > ptr && end[-1] == 0x20) |
| 103 | end--; |
| 104 | return (end); |
| 105 | } |
| 106 | |
| 107 | |
| 108 | uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs); |
| 109 | uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs); |
| 110 | |
| 111 | |
| 112 | /* Macros for hashing characters */ |
| 113 | |
| 114 | #define MY_HASH_ADD(A, B, value) \ |
| 115 | do { A^= (((A & 63)+B)*((value)))+ (A << 8); B+=3; } while(0) |
| 116 | |
| 117 | #define MY_HASH_ADD_16(A, B, value) \ |
| 118 | do { MY_HASH_ADD(A, B, ((value) & 0xFF)) ; MY_HASH_ADD(A, B, ((value >>8 ))); } while(0) |
| 119 | |
| 120 | #endif |
| 121 | |