1 | /* |
2 | Copyright (c) 2015, MariaDB Foundation |
3 | |
4 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published by |
6 | the Free Software Foundation; version 2 of the License. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
16 | */ |
17 | |
18 | |
19 | #ifndef MY_FUNCTION_NAME |
20 | #error MY_FUNCTION_NAME is not defined |
21 | #endif |
22 | |
23 | #if defined(IS_MB3_CHAR) && !defined(IS_MB2_CHAR) |
24 | #error IS_MB3_CHAR is defined, while IS_MB2_CHAR is not! |
25 | #endif |
26 | |
27 | #if defined(IS_MB4_CHAR) && !defined(IS_MB3_CHAR) |
28 | #error IS_MB4_CHAR is defined, while IS_MB3_CHAR is not! |
29 | #endif |
30 | |
31 | |
32 | #ifdef DEFINE_ASIAN_ROUTINES |
33 | #define DEFINE_WELL_FORMED_CHAR_LENGTH |
34 | #define DEFINE_CHARLEN |
35 | #define DEFINE_NATIVE_TO_MB_VARLEN |
36 | #endif |
37 | |
38 | |
39 | #ifdef DEFINE_CHARLEN |
40 | /** |
41 | Returns length of the left-most character of a string. |
42 | @param cs - charset with mbminlen==1 and mbmaxlen<=4 |
43 | @param b - the beginning of the string |
44 | @param e - the end of the string |
45 | |
46 | @return MY_CS_ILSEQ if a bad byte sequence was found |
47 | @return MY_CS_TOOSMALL(N) if the string ended unexpectedly |
48 | @return >0 if a valid character was found |
49 | */ |
50 | static int |
51 | MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)), |
52 | const uchar *b, const uchar *e) |
53 | { |
54 | DBUG_ASSERT(cs->mbminlen == 1); |
55 | DBUG_ASSERT(cs->mbmaxlen <= 4); |
56 | |
57 | if (b >= e) |
58 | return MY_CS_TOOSMALL; |
59 | if ((uchar) b[0] < 128) |
60 | return 1; /* Single byte ASCII character */ |
61 | |
62 | #ifdef IS_8BIT_CHAR |
63 | if (IS_8BIT_CHAR(b[0])) |
64 | { |
65 | /* Single byte non-ASCII character, e.g. half width kana in sjis */ |
66 | return 1; |
67 | } |
68 | #endif |
69 | |
70 | if (b + 2 > e) |
71 | return MY_CS_TOOSMALLN(2); |
72 | if (IS_MB2_CHAR(b[0], b[1])) |
73 | return 2; /* Double byte character */ |
74 | |
75 | #ifdef IS_MB3_CHAR |
76 | if (b + 3 > e) |
77 | { |
78 | #ifdef IS_MB_PREFIX2 |
79 | if (!IS_MB_PREFIX2(b[0], b[1])) |
80 | return MY_CS_ILSEQ; |
81 | #endif |
82 | return MY_CS_TOOSMALLN(3); |
83 | } |
84 | if (IS_MB3_CHAR(b[0], b[1], b[2])) |
85 | return 3; /* Three-byte character */ |
86 | #endif |
87 | |
88 | #ifdef IS_MB4_CHAR |
89 | if (b + 4 > e) |
90 | return MY_CS_TOOSMALLN(4); |
91 | if (IS_MB4_CHAR(b[0], b[1], b[2], b[3])) |
92 | return 4; /* Four-byte character */ |
93 | #endif |
94 | |
95 | /* Wrong byte sequence */ |
96 | return MY_CS_ILSEQ; |
97 | } |
98 | #endif /* DEFINE_CHARLEN */ |
99 | |
100 | |
101 | #ifdef DEFINE_WELL_FORMED_CHAR_LENGTH |
102 | /** |
103 | Returns well formed length of a string |
104 | measured in characters (rather than in bytes). |
105 | Version for character sets that define IS_MB?_CHAR(), e.g. big5. |
106 | */ |
107 | static size_t |
108 | MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)), |
109 | const char *b, const char *e, |
110 | size_t nchars, |
111 | MY_STRCOPY_STATUS *status) |
112 | { |
113 | size_t nchars0= nchars; |
114 | for ( ; b < e && nchars ; nchars--) |
115 | { |
116 | if ((uchar) b[0] < 128) |
117 | { |
118 | b++; /* Single byte ASCII character */ |
119 | continue; |
120 | } |
121 | |
122 | if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1])) |
123 | { |
124 | b+= 2; /* Double byte character */ |
125 | continue; |
126 | } |
127 | |
128 | #ifdef IS_MB3_CHAR |
129 | if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2])) |
130 | { |
131 | b+= 3; /* Three-byte character */ |
132 | continue; |
133 | } |
134 | #endif |
135 | |
136 | #ifdef IS_MB4_CHAR |
137 | if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3])) |
138 | { |
139 | b+= 4; /* Four-byte character */ |
140 | continue; |
141 | } |
142 | #endif |
143 | |
144 | #ifdef IS_8BIT_CHAR |
145 | if (IS_8BIT_CHAR(b[0])) |
146 | { |
147 | b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */ |
148 | continue; |
149 | } |
150 | #endif |
151 | |
152 | /* Wrong byte sequence */ |
153 | status->m_source_end_pos= status->m_well_formed_error_pos= b; |
154 | return nchars0 - nchars; |
155 | } |
156 | status->m_source_end_pos= b; |
157 | status->m_well_formed_error_pos= NULL; |
158 | return nchars0 - nchars; |
159 | } |
160 | #endif /* DEFINE_WELL_FORMED_CHAR_LENGTH */ |
161 | |
162 | |
163 | #ifdef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN |
164 | #ifndef CHARLEN |
165 | #error CHARLEN is not defined |
166 | #endif |
167 | /** |
168 | Returns well formed length of a string |
169 | measured in characters (rather than in bytes). |
170 | Version for character sets that define CHARLEN(), e.g. utf8. |
171 | CHARLEN(cs,b,e) must use the same return code convension that mb_wc() does: |
172 | - a positive number in the range [1-mbmaxlen] if a valid |
173 | single-byte or multi-byte character was found |
174 | - MY_CS_ILSEQ (0) on a bad byte sequence |
175 | - MY_CS_TOOSMALLxx if the incoming sequence is incomplete |
176 | */ |
177 | static size_t |
178 | MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)), |
179 | const char *b, const char *e, |
180 | size_t nchars, |
181 | MY_STRCOPY_STATUS *status) |
182 | { |
183 | size_t nchars0= nchars; |
184 | int chlen; |
185 | for ( ; nchars ; nchars--, b+= chlen) |
186 | { |
187 | if ((chlen= CHARLEN(cs, (uchar*) b, (uchar*) e)) <= 0) |
188 | { |
189 | status->m_well_formed_error_pos= b < e ? b : NULL; |
190 | status->m_source_end_pos= b; |
191 | return nchars0 - nchars; |
192 | } |
193 | } |
194 | status->m_well_formed_error_pos= NULL; |
195 | status->m_source_end_pos= b; |
196 | return nchars0 - nchars; |
197 | } |
198 | #endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */ |
199 | |
200 | |
201 | #ifdef DEFINE_NATIVE_TO_MB_VARLEN |
202 | /* |
203 | Write a native 2-byte character. |
204 | If the full character does not fit, only the first byte is written. |
205 | */ |
206 | static inline int |
207 | my_native_to_mb_fixed2(my_wc_t wc, uchar *s, uchar *e) |
208 | { |
209 | /* The caller must insure there is a space for at least one byte */ |
210 | DBUG_ASSERT(s < e); |
211 | s[0]= (uchar) (wc >> 8); |
212 | if (s + 2 > e) |
213 | return MY_CS_TOOSMALL2; |
214 | s[1]= wc & 0xFF; |
215 | return 2; |
216 | } |
217 | |
218 | |
219 | /* |
220 | Write a native 3-byte character. |
221 | If the full character does not fit, only the leading bytes are written. |
222 | */ |
223 | static inline int |
224 | my_native_to_mb_fixed3(my_wc_t wc, uchar *s, uchar *e) |
225 | { |
226 | /* The caller must insure there is a space for at least one byte */ |
227 | DBUG_ASSERT(s < e); |
228 | s[0]= (uchar) (wc >> 16); |
229 | if (s + 2 > e) |
230 | return MY_CS_TOOSMALL2; |
231 | s[1]= (wc >> 8) & 0xFF; |
232 | if (s + 3 > e) |
233 | return MY_CS_TOOSMALL3; |
234 | s[2]= wc & 0xFF; |
235 | return 3; |
236 | } |
237 | |
238 | |
239 | /* |
240 | Write a native 1-byte or 2-byte or 3-byte character. |
241 | */ |
242 | |
243 | static int |
244 | MY_FUNCTION_NAME(native_to_mb)(CHARSET_INFO *cs __attribute__((unused)), |
245 | my_wc_t wc, uchar *s, uchar *e) |
246 | { |
247 | if (s >= e) |
248 | return MY_CS_TOOSMALL; |
249 | if ((int) wc <= 0xFF) |
250 | { |
251 | s[0]= (uchar) wc; |
252 | return 1; |
253 | } |
254 | #ifdef IS_MB3_HEAD |
255 | if (wc > 0xFFFF) |
256 | return my_native_to_mb_fixed3(wc, s, e); |
257 | #endif |
258 | return my_native_to_mb_fixed2(wc, s, e); |
259 | } |
260 | #endif /* DEFINE_NATIVE_TO_MB_VARLEN */ |
261 | |
262 | |
263 | #undef MY_FUNCTION_NAME |
264 | |