1/* mbutil.c -- readline multibyte character utility functions */
2
3/* Copyright (C) 2001-2005 Free Software Foundation, Inc.
4
5 This file is part of the GNU Readline Library, a library for
6 reading lines of text with interactive input and history editing.
7
8 The GNU Readline Library is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2, or
11 (at your option) any later version.
12
13 The GNU Readline Library is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 The GNU General Public License is often shipped with GNU software, and
19 is generally kept in a file called COPYING or LICENSE. If you do not
20 have a copy of the license, write to the Free Software Foundation,
21 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
22#define READLINE_LIBRARY
23
24#if defined (HAVE_CONFIG_H)
25# include "config_readline.h"
26#endif
27
28#include <sys/types.h>
29#include <fcntl.h>
30#include "posixjmp.h"
31
32#if defined (HAVE_UNISTD_H)
33# include <unistd.h> /* for _POSIX_VERSION */
34#endif /* HAVE_UNISTD_H */
35
36#if defined (HAVE_STDLIB_H)
37# include <stdlib.h>
38#else
39# include "ansi_stdlib.h"
40#endif /* HAVE_STDLIB_H */
41
42#include <stdio.h>
43#include <ctype.h>
44
45/* System-specific feature definitions and include files. */
46#include "rldefs.h"
47#include "rlmbutil.h"
48
49#if defined (TIOCSTAT_IN_SYS_IOCTL)
50# include <sys/ioctl.h>
51#endif /* TIOCSTAT_IN_SYS_IOCTL */
52
53/* Some standard library routines. */
54#include "readline.h"
55
56#include "rlprivate.h"
57#include "xmalloc.h"
58
59/* Declared here so it can be shared between the readline and history
60 libraries. */
61#if defined (HANDLE_MULTIBYTE)
62int rl_byte_oriented = 0;
63#else
64int rl_byte_oriented = 1;
65#endif
66
67/* **************************************************************** */
68/* */
69/* Multibyte Character Utility Functions */
70/* */
71/* **************************************************************** */
72
73#if defined(HANDLE_MULTIBYTE)
74
75static int
76_rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77 char *string;
78 int seed, count, find_non_zero;
79{
80 size_t tmp;
81 mbstate_t ps;
82 int point;
83 wchar_t wc;
84
85 tmp = 0;
86
87 memset(&ps, 0, sizeof (mbstate_t));
88 if (seed < 0)
89 seed = 0;
90 if (count <= 0)
91 return seed;
92
93 point = seed + _rl_adjust_point (string, seed, &ps);
94 /* if this is true, means that seed was not pointed character
95 started byte. So correct the point and consume count */
96 if (seed < point)
97 count--;
98
99 while (count > 0)
100 {
101 tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
102 if (MB_INVALIDCH ((size_t)tmp))
103 {
104 /* invalid bytes. asume a byte represents a character */
105 point++;
106 count--;
107 /* reset states. */
108 memset(&ps, 0, sizeof(mbstate_t));
109 }
110 else if (MB_NULLWCH (tmp))
111 break; /* found wide '\0' */
112 else
113 {
114 /* valid bytes */
115 point += tmp;
116 if (find_non_zero)
117 {
118 if (wcwidth (wc) == 0)
119 continue;
120 else
121 count--;
122 }
123 else
124 count--;
125 }
126 }
127
128 if (find_non_zero)
129 {
130 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
131 while (tmp > 0 && wcwidth (wc) == 0)
132 {
133 point += tmp;
134 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
135 if (MB_NULLWCH (tmp) || MB_INVALIDCH (tmp))
136 break;
137 }
138 }
139
140 return point;
141}
142
143static int
144_rl_find_prev_mbchar_internal (string, seed, find_non_zero)
145 char *string;
146 int seed, find_non_zero;
147{
148 mbstate_t ps;
149 int prev, non_zero_prev, point, length;
150 size_t tmp;
151 wchar_t wc;
152
153 memset(&ps, 0, sizeof(mbstate_t));
154 length = strlen(string);
155
156 if (seed < 0)
157 return 0;
158 else if (length < seed)
159 return length;
160
161 prev = non_zero_prev = point = 0;
162 while (point < seed)
163 {
164 tmp = mbrtowc (&wc, string + point, length - point, &ps);
165 if (MB_INVALIDCH ((size_t)tmp))
166 {
167 /* in this case, bytes are invalid or shorted to compose
168 multibyte char, so assume that the first byte represents
169 a single character anyway. */
170 tmp = 1;
171 /* clear the state of the byte sequence, because
172 in this case effect of mbstate is undefined */
173 memset(&ps, 0, sizeof (mbstate_t));
174
175 /* Since we're assuming that this byte represents a single
176 non-zero-width character, don't forget about it. */
177 prev = point;
178 }
179 else if (MB_NULLWCH (tmp))
180 break; /* Found '\0' char. Can this happen? */
181 else
182 {
183 if (find_non_zero)
184 {
185 if (wcwidth (wc) != 0)
186 prev = point;
187 }
188 else
189 prev = point;
190 }
191
192 point += tmp;
193 }
194
195 return prev;
196}
197
198/* return the number of bytes parsed from the multibyte sequence starting
199 at src, if a non-L'\0' wide character was recognized. It returns 0,
200 if a L'\0' wide character was recognized. It returns (size_t)(-1),
201 if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
202 if it couldn't parse a complete multibyte character. */
203int
204_rl_get_char_len (src, ps)
205 char *src;
206 mbstate_t *ps;
207{
208 size_t tmp;
209
210 tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
211 if (tmp == (size_t)(-2))
212 {
213 /* shorted to compose multibyte char */
214 if (ps)
215 memset (ps, 0, sizeof(mbstate_t));
216 return -2;
217 }
218 else if (tmp == (size_t)(-1))
219 {
220 /* invalid to compose multibyte char */
221 /* initialize the conversion state */
222 if (ps)
223 memset (ps, 0, sizeof(mbstate_t));
224 return -1;
225 }
226 else if (tmp == (size_t)0)
227 return 0;
228 else
229 return (int)tmp;
230}
231
232/* compare the specified two characters. If the characters matched,
233 return 1. Otherwise return 0. */
234int
235_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
236 char *buf1;
237 int pos1;
238 mbstate_t *ps1;
239 char *buf2;
240 int pos2;
241 mbstate_t *ps2;
242{
243 int i, w1, w2;
244
245 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
246 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
247 (w1 != w2) ||
248 (buf1[pos1] != buf2[pos2]))
249 return 0;
250
251 for (i = 1; i < w1; i++)
252 if (buf1[pos1+i] != buf2[pos2+i])
253 return 0;
254
255 return 1;
256}
257
258/* adjust pointed byte and find mbstate of the point of string.
259 adjusted point will be point <= adjusted_point, and returns
260 differences of the byte(adjusted_point - point).
261 if point is invalied (point < 0 || more than string length),
262 it returns -1 */
263int
264_rl_adjust_point(string, point, ps)
265 char *string;
266 int point;
267 mbstate_t *ps;
268{
269 size_t tmp = 0;
270 int length;
271 int pos = 0;
272
273 length = strlen(string);
274 if (point < 0)
275 return -1;
276 if (length < point)
277 return -1;
278
279 while (pos < point)
280 {
281 tmp = mbrlen (string + pos, length - pos, ps);
282 if (MB_INVALIDCH ((size_t)tmp))
283 {
284 /* in this case, bytes are invalid or shorted to compose
285 multibyte char, so assume that the first byte represents
286 a single character anyway. */
287 pos++;
288 /* clear the state of the byte sequence, because
289 in this case effect of mbstate is undefined */
290 if (ps)
291 memset (ps, 0, sizeof (mbstate_t));
292 }
293 else if (MB_NULLWCH (tmp))
294 pos++;
295 else
296 pos += tmp;
297 }
298
299 return (pos - point);
300}
301
302int
303_rl_is_mbchar_matched (string, seed, end, mbchar, length)
304 char *string;
305 int seed, end;
306 char *mbchar;
307 int length;
308{
309 int i;
310
311 if ((end - seed) < length)
312 return 0;
313
314 for (i = 0; i < length; i++)
315 if (string[seed + i] != mbchar[i])
316 return 0;
317 return 1;
318}
319
320wchar_t
321_rl_char_value (buf, ind)
322 char *buf;
323 int ind;
324{
325 size_t tmp;
326 wchar_t wc;
327 mbstate_t ps;
328 int l;
329
330 if (MB_LEN_MAX == 1 || rl_byte_oriented)
331 return ((wchar_t) buf[ind]);
332 l = strlen (buf);
333 if (ind >= l - 1)
334 return ((wchar_t) buf[ind]);
335 memset (&ps, 0, sizeof (mbstate_t));
336 tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
337 if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
338 return ((wchar_t) buf[ind]);
339 return wc;
340}
341#endif /* HANDLE_MULTIBYTE */
342
343/* Find next `count' characters started byte point of the specified seed.
344 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
345 characters. */
346#undef _rl_find_next_mbchar
347int
348_rl_find_next_mbchar (string, seed, count, flags)
349 char *string __attribute__((unused));
350 int seed, count, flags __attribute__((unused));
351{
352#if defined (HANDLE_MULTIBYTE)
353 return _rl_find_next_mbchar_internal (string, seed, count, flags);
354#else
355 return (seed + count);
356#endif
357}
358
359/* Find previous character started byte point of the specified seed.
360 Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
361 we look for non-zero-width multibyte characters. */
362#undef _rl_find_prev_mbchar
363int
364_rl_find_prev_mbchar (string, seed, flags)
365 char *string __attribute__((unused));
366 int seed, flags __attribute__((unused));
367{
368#if defined (HANDLE_MULTIBYTE)
369 return _rl_find_prev_mbchar_internal (string, seed, flags);
370#else
371 return ((seed == 0) ? seed : seed - 1);
372#endif
373}
374