1/*
2 Copyright (c) 2015, MariaDB Foundation
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16*/
17
18
19#ifndef MY_FUNCTION_NAME
20#error MY_FUNCTION_NAME is not defined
21#endif
22
23#if defined(IS_MB3_CHAR) && !defined(IS_MB2_CHAR)
24#error IS_MB3_CHAR is defined, while IS_MB2_CHAR is not!
25#endif
26
27#if defined(IS_MB4_CHAR) && !defined(IS_MB3_CHAR)
28#error IS_MB4_CHAR is defined, while IS_MB3_CHAR is not!
29#endif
30
31
32#ifdef DEFINE_ASIAN_ROUTINES
33#define DEFINE_WELL_FORMED_CHAR_LENGTH
34#define DEFINE_CHARLEN
35#define DEFINE_NATIVE_TO_MB_VARLEN
36#endif
37
38
39#ifdef DEFINE_CHARLEN
40/**
41 Returns length of the left-most character of a string.
42 @param cs - charset with mbminlen==1 and mbmaxlen<=4
43 @param b - the beginning of the string
44 @param e - the end of the string
45
46 @return MY_CS_ILSEQ if a bad byte sequence was found
47 @return MY_CS_TOOSMALL(N) if the string ended unexpectedly
48 @return >0 if a valid character was found
49*/
50static int
51MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
52 const uchar *b, const uchar *e)
53{
54 DBUG_ASSERT(cs->mbminlen == 1);
55 DBUG_ASSERT(cs->mbmaxlen <= 4);
56
57 if (b >= e)
58 return MY_CS_TOOSMALL;
59 if ((uchar) b[0] < 128)
60 return 1; /* Single byte ASCII character */
61
62#ifdef IS_8BIT_CHAR
63 if (IS_8BIT_CHAR(b[0]))
64 {
65 /* Single byte non-ASCII character, e.g. half width kana in sjis */
66 return 1;
67 }
68#endif
69
70 if (b + 2 > e)
71 return MY_CS_TOOSMALLN(2);
72 if (IS_MB2_CHAR(b[0], b[1]))
73 return 2; /* Double byte character */
74
75#ifdef IS_MB3_CHAR
76 if (b + 3 > e)
77 {
78#ifdef IS_MB_PREFIX2
79 if (!IS_MB_PREFIX2(b[0], b[1]))
80 return MY_CS_ILSEQ;
81#endif
82 return MY_CS_TOOSMALLN(3);
83 }
84 if (IS_MB3_CHAR(b[0], b[1], b[2]))
85 return 3; /* Three-byte character */
86#endif
87
88#ifdef IS_MB4_CHAR
89 if (b + 4 > e)
90 return MY_CS_TOOSMALLN(4);
91 if (IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
92 return 4; /* Four-byte character */
93#endif
94
95 /* Wrong byte sequence */
96 return MY_CS_ILSEQ;
97}
98#endif /* DEFINE_CHARLEN */
99
100
101#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH
102/**
103 Returns well formed length of a string
104 measured in characters (rather than in bytes).
105 Version for character sets that define IS_MB?_CHAR(), e.g. big5.
106*/
107static size_t
108MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
109 const char *b, const char *e,
110 size_t nchars,
111 MY_STRCOPY_STATUS *status)
112{
113 size_t nchars0= nchars;
114 for ( ; b < e && nchars ; nchars--)
115 {
116 if ((uchar) b[0] < 128)
117 {
118 b++; /* Single byte ASCII character */
119 continue;
120 }
121
122 if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1]))
123 {
124 b+= 2; /* Double byte character */
125 continue;
126 }
127
128#ifdef IS_MB3_CHAR
129 if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2]))
130 {
131 b+= 3; /* Three-byte character */
132 continue;
133 }
134#endif
135
136#ifdef IS_MB4_CHAR
137 if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
138 {
139 b+= 4; /* Four-byte character */
140 continue;
141 }
142#endif
143
144#ifdef IS_8BIT_CHAR
145 if (IS_8BIT_CHAR(b[0]))
146 {
147 b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */
148 continue;
149 }
150#endif
151
152 /* Wrong byte sequence */
153 status->m_source_end_pos= status->m_well_formed_error_pos= b;
154 return nchars0 - nchars;
155 }
156 status->m_source_end_pos= b;
157 status->m_well_formed_error_pos= NULL;
158 return nchars0 - nchars;
159}
160#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH */
161
162
163#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
164#ifndef CHARLEN
165#error CHARLEN is not defined
166#endif
167/**
168 Returns well formed length of a string
169 measured in characters (rather than in bytes).
170 Version for character sets that define CHARLEN(), e.g. utf8.
171 CHARLEN(cs,b,e) must use the same return code convension that mb_wc() does:
172 - a positive number in the range [1-mbmaxlen] if a valid
173 single-byte or multi-byte character was found
174 - MY_CS_ILSEQ (0) on a bad byte sequence
175 - MY_CS_TOOSMALLxx if the incoming sequence is incomplete
176*/
177static size_t
178MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
179 const char *b, const char *e,
180 size_t nchars,
181 MY_STRCOPY_STATUS *status)
182{
183 size_t nchars0= nchars;
184 int chlen;
185 for ( ; nchars ; nchars--, b+= chlen)
186 {
187 if ((chlen= CHARLEN(cs, (uchar*) b, (uchar*) e)) <= 0)
188 {
189 status->m_well_formed_error_pos= b < e ? b : NULL;
190 status->m_source_end_pos= b;
191 return nchars0 - nchars;
192 }
193 }
194 status->m_well_formed_error_pos= NULL;
195 status->m_source_end_pos= b;
196 return nchars0 - nchars;
197}
198#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */
199
200
201#ifdef DEFINE_NATIVE_TO_MB_VARLEN
202/*
203 Write a native 2-byte character.
204 If the full character does not fit, only the first byte is written.
205*/
206static inline int
207my_native_to_mb_fixed2(my_wc_t wc, uchar *s, uchar *e)
208{
209 /* The caller must insure there is a space for at least one byte */
210 DBUG_ASSERT(s < e);
211 s[0]= (uchar) (wc >> 8);
212 if (s + 2 > e)
213 return MY_CS_TOOSMALL2;
214 s[1]= wc & 0xFF;
215 return 2;
216}
217
218
219/*
220 Write a native 3-byte character.
221 If the full character does not fit, only the leading bytes are written.
222*/
223static inline int
224my_native_to_mb_fixed3(my_wc_t wc, uchar *s, uchar *e)
225{
226 /* The caller must insure there is a space for at least one byte */
227 DBUG_ASSERT(s < e);
228 s[0]= (uchar) (wc >> 16);
229 if (s + 2 > e)
230 return MY_CS_TOOSMALL2;
231 s[1]= (wc >> 8) & 0xFF;
232 if (s + 3 > e)
233 return MY_CS_TOOSMALL3;
234 s[2]= wc & 0xFF;
235 return 3;
236}
237
238
239/*
240 Write a native 1-byte or 2-byte or 3-byte character.
241*/
242
243static int
244MY_FUNCTION_NAME(native_to_mb)(CHARSET_INFO *cs __attribute__((unused)),
245 my_wc_t wc, uchar *s, uchar *e)
246{
247 if (s >= e)
248 return MY_CS_TOOSMALL;
249 if ((int) wc <= 0xFF)
250 {
251 s[0]= (uchar) wc;
252 return 1;
253 }
254#ifdef IS_MB3_HEAD
255 if (wc > 0xFFFF)
256 return my_native_to_mb_fixed3(wc, s, e);
257#endif
258 return my_native_to_mb_fixed2(wc, s, e);
259}
260#endif /* DEFINE_NATIVE_TO_MB_VARLEN */
261
262
263#undef MY_FUNCTION_NAME
264