mbutil.c source code [MariaDB/extra/readline/mbutil.c]

1	/ mbutil.c -- readline multibyte character utility functions /
2
3	/ Copyright (C) 2001-2005 Free Software Foundation, Inc.*
4
5	This file is part of the GNU Readline Library, a library for
6	reading lines of text with interactive input and history editing.
7
8	The GNU Readline Library is free software; you can redistribute it
9	and/or modify it under the terms of the GNU General Public License
10	as published by the Free Software Foundation; either version 2, or
11	(at your option) any later version.
12
13	The GNU Readline Library is distributed in the hope that it will be
14	useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15	of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	GNU General Public License for more details.
17
18	The GNU General Public License is often shipped with GNU software, and
19	is generally kept in a file called COPYING or LICENSE. If you do not
20	have a copy of the license, write to the Free Software Foundation,
21	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. /*
22	#define READLINE_LIBRARY
23
24	#if defined (HAVE_CONFIG_H)
25	# include "config_readline.h"
26	#endif
27
28	#include <sys/types.h>
29	#include <fcntl.h>
30	#include "posixjmp.h"
31
32	#if defined (HAVE_UNISTD_H)
33	# include <unistd.h> /* for _POSIX_VERSION */
34	#endif /* HAVE_UNISTD_H */
35
36	#if defined (HAVE_STDLIB_H)
37	# include <stdlib.h>
38	#else
39	# include "ansi_stdlib.h"
40	#endif /* HAVE_STDLIB_H */
41
42	#include <stdio.h>
43	#include <ctype.h>
44
45	/ System-specific feature definitions and include files. /
46	#include "rldefs.h"
47	#include "rlmbutil.h"
48
49	#if defined (TIOCSTAT_IN_SYS_IOCTL)
50	# include <sys/ioctl.h>
51	#endif /* TIOCSTAT_IN_SYS_IOCTL */
52
53	/ Some standard library routines. /
54	#include "readline.h"
55
56	#include "rlprivate.h"
57	#include "xmalloc.h"
58
59	/ Declared here so it can be shared between the readline and history*
60	libraries. /*
61	#if defined (HANDLE_MULTIBYTE)
62	int rl_byte_oriented = `0`;
63	#else
64	int rl_byte_oriented = `1`;
65	#endif
66
67	/ **************************************************************** /
68	/ /
69	/ Multibyte Character Utility Functions /
70	/ /
71	/ **************************************************************** /
72
73	#if defined(HANDLE_MULTIBYTE)
74
75	static int
76	_rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77	char *string;
78	int seed, count, find_non_zero;
79	{
80	size_t tmp;
81	mbstate_t ps;
82	int point;
83	wchar_t wc;
84
85	tmp = `0`;
86
87	memset(&ps, `0`, sizeof (mbstate_t));
88	if (seed < `0`)
89	seed = `0`;
90	if (count <= `0`)
91	return seed;
92
93	point = seed + _rl_adjust_point (string, seed, &ps);
94	/ if this is true, means that seed was not pointed character*
95	started byte. So correct the point and consume count /*
96	if (seed < point)
97	count--;
98
99	while (count > `0`)
100	{
101	tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
102	if (MB_INVALIDCH ((size_t)tmp))
103	{
104	/ invalid bytes. asume a byte represents a character /
105	point++;
106	count--;
107	/ reset states. /
108	memset(&ps, `0`, sizeof(mbstate_t));
109	}
110	else if (MB_NULLWCH (tmp))
111	break; / found wide '\0' /
112	else
113	{
114	/ valid bytes /
115	point += tmp;
116	if (find_non_zero)
117	{
118	if (wcwidth (wc) == `0`)
119	continue;
120	else
121	count--;
122	}
123	else
124	count--;
125	}
126	}
127
128	if (find_non_zero)
129	{
130	tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
131	while (tmp > `0` && wcwidth (wc) == `0`)
132	{
133	point += tmp;
134	tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
135	if (MB_NULLWCH (tmp) \|\| MB_INVALIDCH (tmp))
136	break;
137	}
138	}
139
140	return point;
141	}
142
143	static int
144	_rl_find_prev_mbchar_internal (string, seed, find_non_zero)
145	char *string;
146	int seed, find_non_zero;
147	{
148	mbstate_t ps;
149	int prev, non_zero_prev, point, length;
150	size_t tmp;
151	wchar_t wc;
152
153	memset(&ps, `0`, sizeof(mbstate_t));
154	length = strlen(string);
155
156	if (seed < `0`)
157	return `0`;
158	else if (length < seed)
159	return length;
160
161	prev = non_zero_prev = point = `0`;
162	while (point < seed)
163	{
164	tmp = mbrtowc (&wc, string + point, length - point, &ps);
165	if (MB_INVALIDCH ((size_t)tmp))
166	{
167	/ in this case, bytes are invalid or shorted to compose*
168	multibyte char, so assume that the first byte represents
169	a single character anyway. /*
170	tmp = `1`;
171	/ clear the state of the byte sequence, because*
172	in this case effect of mbstate is undefined /*
173	memset(&ps, `0`, sizeof (mbstate_t));
174
175	/ Since we're assuming that this byte represents a single*
176	non-zero-width character, don't forget about it. /*
177	prev = point;
178	}
179	else if (MB_NULLWCH (tmp))
180	break; / Found '\0' char. Can this happen? /
181	else
182	{
183	if (find_non_zero)
184	{
185	if (wcwidth (wc) != `0`)
186	prev = point;
187	}
188	else
189	prev = point;
190	}
191
192	point += tmp;
193	}
194
195	return prev;
196	}
197
198	/ return the number of bytes parsed from the multibyte sequence starting*
199	at src, if a non-L'\0' wide character was recognized. It returns 0,
200	if a L'\0' wide character was recognized. It returns (size_t)(-1),
201	if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
202	if it couldn't parse a complete multibyte character. /*
203	int
204	_rl_get_char_len (src, ps)
205	char *src;
206	mbstate_t *ps;
207	{
208	size_t tmp;
209
210	tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
211	if (tmp == (size_t)(-`2`))
212	{
213	/ shorted to compose multibyte char /
214	if (ps)
215	memset (ps, `0`, sizeof(mbstate_t));
216	return -`2`;
217	}
218	else if (tmp == (size_t)(-`1`))
219	{
220	/ invalid to compose multibyte char /
221	/ initialize the conversion state /
222	if (ps)
223	memset (ps, `0`, sizeof(mbstate_t));
224	return -`1`;
225	}
226	else if (tmp == (size_t)`0`)
227	return `0`;
228	else
229	return (int)tmp;
230	}
231
232	/ compare the specified two characters. If the characters matched,*
233	return 1. Otherwise return 0. /*
234	int
235	_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
236	char *buf1;
237	int pos1;
238	mbstate_t *ps1;
239	char *buf2;
240	int pos2;
241	mbstate_t *ps2;
242	{
243	int i, w1, w2;
244
245	if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= `0` \|\|
246	(w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= `0` \|\|
247	(w1 != w2) \|\|
248	(buf1[pos1] != buf2[pos2]))
249	return `0`;
250
251	for (i = `1`; i < w1; i++)
252	if (buf1[pos1+i] != buf2[pos2+i])
253	return `0`;
254
255	return `1`;
256	}
257
258	/ adjust pointed byte and find mbstate of the point of string.*
259	adjusted point will be point <= adjusted_point, and returns
260	differences of the byte(adjusted_point - point).
261	if point is invalied (point < 0 \|\| more than string length),
262	it returns -1 /*
263	int
264	_rl_adjust_point(string, point, ps)
265	char *string;
266	int point;
267	mbstate_t *ps;
268	{
269	size_t tmp = `0`;
270	int length;
271	int pos = `0`;
272
273	length = strlen(string);
274	if (point < `0`)
275	return -`1`;
276	if (length < point)
277	return -`1`;
278
279	while (pos < point)
280	{
281	tmp = mbrlen (string + pos, length - pos, ps);
282	if (MB_INVALIDCH ((size_t)tmp))
283	{
284	/ in this case, bytes are invalid or shorted to compose*
285	multibyte char, so assume that the first byte represents
286	a single character anyway. /*
287	pos++;
288	/ clear the state of the byte sequence, because*
289	in this case effect of mbstate is undefined /*
290	if (ps)
291	memset (ps, `0`, sizeof (mbstate_t));
292	}
293	else if (MB_NULLWCH (tmp))
294	pos++;
295	else
296	pos += tmp;
297	}
298
299	return (pos - point);
300	}
301
302	int
303	_rl_is_mbchar_matched (string, seed, end, mbchar, length)
304	char *string;
305	int seed, end;
306	char *mbchar;
307	int length;
308	{
309	int i;
310
311	if ((end - seed) < length)
312	return `0`;
313
314	for (i = `0`; i < length; i++)
315	if (string[seed + i] != mbchar[i])
316	return `0`;
317	return `1`;
318	}
319
320	wchar_t
321	_rl_char_value (buf, ind)
322	char *buf;
323	int ind;
324	{
325	size_t tmp;
326	wchar_t wc;
327	mbstate_t ps;
328	int l;
329
330	if (MB_LEN_MAX == `1` \|\| rl_byte_oriented)
331	return ((wchar_t) buf[ind]);
332	l = strlen (buf);
333	if (ind >= l - `1`)
334	return ((wchar_t) buf[ind]);
335	memset (&ps, `0`, sizeof (mbstate_t));
336	tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
337	if (MB_INVALIDCH (tmp) \|\| MB_NULLWCH (tmp))
338	return ((wchar_t) buf[ind]);
339	return wc;
340	}
341	#endif /* HANDLE_MULTIBYTE */
342
343	/ Find next `count' characters started byte point of the specified seed.*
344	If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
345	characters. /*
346	#undef _rl_find_next_mbchar
347	int
348	_rl_find_next_mbchar (string, seed, count, flags)
349	char string __attribute__*((unused));
350	int seed, count, flags __attribute__((unused));
351	{
352	#if defined (HANDLE_MULTIBYTE)
353	return _rl_find_next_mbchar_internal (string, seed, count, flags);
354	#else
355	return (seed + count);
356	#endif
357	}
358
359	/ Find previous character started byte point of the specified seed.*
360	Returned point will be point <= seed. If flags is MB_FIND_NONZERO,
361	we look for non-zero-width multibyte characters. /*
362	#undef _rl_find_prev_mbchar
363	int
364	_rl_find_prev_mbchar (string, seed, flags)
365	char string __attribute__*((unused));
366	int seed, flags __attribute__((unused));
367	{
368	#if defined (HANDLE_MULTIBYTE)
369	return _rl_find_prev_mbchar_internal (string, seed, flags);
370	#else
371	return ((seed == `0`) ? seed : seed - `1`);
372	#endif
373	}
374

Browse the source code of MariaDB/extra/readline/mbutil.c