1 | #ifndef STRINGS_DEF_INCLUDED |
2 | #define STRINGS_DEF_INCLUDED |
3 | /* Copyright (C) 2011 Monty Program Ab |
4 | |
5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published by |
7 | the Free Software Foundation; version 2 of the License. |
8 | |
9 | This program is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License |
15 | along with this program; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
17 | |
18 | /* This file is to be include first in all files in the string directory */ |
19 | |
20 | #undef DBUG_ASSERT_AS_PRINTF |
21 | #include <my_global.h> /* Define standard vars */ |
22 | #include "m_string.h" /* Exernal definitions of string functions */ |
23 | |
24 | /* |
25 | We can't use the original DBUG_ASSERT() (which includes _db_flush()) |
26 | in the strings library as libdbug is compiled after the the strings |
27 | library and we don't want to have strings depending on libdbug which |
28 | depends on mysys and strings. |
29 | */ |
30 | |
31 | #if !defined(DBUG_OFF) |
32 | #undef DBUG_ASSERT |
33 | #define DBUG_ASSERT(A) assert(A) |
34 | #endif |
35 | |
36 | #define MY_NOPAD_ID(x) ((x)+0x400) |
37 | |
38 | /* SPACE_INT is a word that contains only spaces */ |
39 | #if SIZEOF_INT == 4 |
40 | #define SPACE_INT 0x20202020 |
41 | #elif SIZEOF_INT == 8 |
42 | #define SPACE_INT 0x2020202020202020 |
43 | #else |
44 | #error define the appropriate constant for a word full of spaces |
45 | #endif |
46 | |
47 | /** |
48 | Skip trailing space. |
49 | |
50 | On most systems reading memory in larger chunks (ideally equal to the size of |
51 | the chinks that the machine physically reads from memory) causes fewer memory |
52 | access loops and hence increased performance. |
53 | This is why the 'int' type is used : it's closest to that (according to how |
54 | it's defined in C). |
55 | So when we determine the amount of whitespace at the end of a string we do |
56 | the following : |
57 | 1. We divide the string into 3 zones : |
58 | a) from the start of the string (__start) to the first multiple |
59 | of sizeof(int) (__start_words) |
60 | b) from the end of the string (__end) to the last multiple of sizeof(int) |
61 | (__end_words) |
62 | c) a zone that is aligned to sizeof(int) and can be safely accessed |
63 | through an int * |
64 | 2. We start comparing backwards from (c) char-by-char. If all we find is |
65 | space then we continue |
66 | 3. If there are elements in zone (b) we compare them as unsigned ints to a |
67 | int mask (SPACE_INT) consisting of all spaces |
68 | 4. Finally we compare the remaining part (a) of the string char by char. |
69 | This covers for the last non-space unsigned int from 3. (if any) |
70 | |
71 | This algorithm works well for relatively larger strings, but it will slow |
72 | the things down for smaller strings (because of the additional calculations |
73 | and checks compared to the naive method). Thus the barrier of length 20 |
74 | is added. |
75 | |
76 | @param ptr pointer to the input string |
77 | @param len the length of the string |
78 | @return the last non-space character |
79 | */ |
80 | |
81 | static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len) |
82 | { |
83 | const uchar *end= ptr + len; |
84 | |
85 | if (len > 20) |
86 | { |
87 | const uchar *end_words= (const uchar *)(intptr) |
88 | (((ulonglong)(intptr)end) / SIZEOF_INT * SIZEOF_INT); |
89 | const uchar *start_words= (const uchar *)(intptr) |
90 | ((((ulonglong)(intptr)ptr) + SIZEOF_INT - 1) / SIZEOF_INT * SIZEOF_INT); |
91 | |
92 | DBUG_ASSERT(((ulonglong)(intptr)ptr) >= SIZEOF_INT); |
93 | if (end_words > ptr) |
94 | { |
95 | while (end > end_words && end[-1] == 0x20) |
96 | end--; |
97 | if (end[-1] == 0x20 && start_words < end_words) |
98 | while (end > start_words && ((unsigned *)end)[-1] == SPACE_INT) |
99 | end -= SIZEOF_INT; |
100 | } |
101 | } |
102 | while (end > ptr && end[-1] == 0x20) |
103 | end--; |
104 | return (end); |
105 | } |
106 | |
107 | |
108 | uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs); |
109 | uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs); |
110 | |
111 | |
112 | /* Macros for hashing characters */ |
113 | |
114 | #define MY_HASH_ADD(A, B, value) \ |
115 | do { A^= (((A & 63)+B)*((value)))+ (A << 8); B+=3; } while(0) |
116 | |
117 | #define MY_HASH_ADD_16(A, B, value) \ |
118 | do { MY_HASH_ADD(A, B, ((value) & 0xFF)) ; MY_HASH_ADD(A, B, ((value >>8 ))); } while(0) |
119 | |
120 | #endif |
121 | |