charset.c source code [neovim/src/nvim/charset.c]

1	// This is an open source non-commercial project. Dear PVS-Studio, please check
2	// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
3
4	/// @file charset.c
5	///
6	/// Code related to character sets.
7
8	#include <assert.h>
9	#include <string.h>
10	#include <wctype.h>
11	#include <wchar.h> // for towupper() and towlower()
12	#include <inttypes.h>
13
14	#include "nvim/vim.h"
15	#include "nvim/ascii.h"
16	#include "nvim/charset.h"
17	#include "nvim/func_attr.h"
18	#include "nvim/indent.h"
19	#include "nvim/main.h"
20	#include "nvim/mark.h"
21	#include "nvim/mbyte.h"
22	#include "nvim/memline.h"
23	#include "nvim/memory.h"
24	#include "nvim/misc1.h"
25	#include "nvim/garray.h"
26	#include "nvim/move.h"
27	#include "nvim/option.h"
28	#include "nvim/os_unix.h"
29	#include "nvim/state.h"
30	#include "nvim/strings.h"
31	#include "nvim/path.h"
32	#include "nvim/cursor.h"
33
34	#ifdef INCLUDE_GENERATED_DECLARATIONS
35	# include "charset.c.generated.h"
36	#endif
37
38
39	static bool chartab_initialized = false;
40
41	// b_chartab[] is an array with 256 bits, each bit representing one of the
42	// characters 0-255.
43	#define SET_CHARTAB(buf, c) \
44	(buf)->b_chartab[(unsigned)(c) >> 6] \|= (1ull << ((c) & 0x3f))
45	#define RESET_CHARTAB(buf, c) \
46	(buf)->b_chartab[(unsigned)(c) >> 6] &= ~(1ull << ((c) & 0x3f))
47	#define GET_CHARTAB_TAB(chartab, c) \
48	((chartab)[(unsigned)(c) >> 6] & (1ull << ((c) & 0x3f)))
49	#define GET_CHARTAB(buf, c) \
50	GET_CHARTAB_TAB((buf)->b_chartab, c)
51
52	// Table used below, see init_chartab() for an explanation
53	static char_u g_chartab[`256`];
54
55	// Flags for g_chartab[].
56	#define CT_CELL_MASK 0x07 ///< mask: nr of display cells (1, 2 or 4)
57	#define CT_PRINT_CHAR 0x10 ///< flag: set for printable chars
58	#define CT_ID_CHAR 0x20 ///< flag: set for ID chars
59	#define CT_FNAME_CHAR 0x40 ///< flag: set for file name chars
60
61	/// Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword
62	/// characters for current buffer.
63	///
64	/// Depends on the option settings 'iskeyword', 'isident', 'isfname',
65	/// 'isprint' and 'encoding'.
66	///
67	/// The index in g_chartab[] is the character when first byte is up to 0x80,
68	/// if the first byte is 0x80 and above it depends on further bytes.
69	///
70	/// The contents of g_chartab[]:
71	/// - The lower two bits, masked by CT_CELL_MASK, give the number of display
72	/// cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80.
73	/// - CT_PRINT_CHAR bit is set when the character is printable (no need to
74	/// translate the character before displaying it). Note that only DBCS
75	/// characters can have 2 display cells and still be printable.
76	/// - CT_FNAME_CHAR bit is set when the character can be in a file name.
77	/// - CT_ID_CHAR bit is set when the character can be in an identifier.
78	///
79	/// @return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has
80	/// an error, OK otherwise.
81	int init_chartab(void)
82	{
83	return buf_init_chartab(curbuf, true);
84	}
85
86	/// Helper for init_chartab
87	///
88	/// @param global false: only set buf->b_chartab[]
89	///
90	/// @return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has
91	/// an error, OK otherwise.
92	int buf_init_chartab(buf_T buf, int* global)
93	{
94	int c;
95	int c2;
96	int i;
97	bool tilde;
98	bool do_isalpha;
99
100	if (global) {
101	// Set the default size for printable characters:
102	// From <Space> to '~' is 1 (printable), others are 2 (not printable).
103	// This also inits all 'isident' and 'isfname' flags to false.
104	c = `0`;
105
106	while (c < `' '`) {
107	g_chartab[c++] = (dy_flags & DY_UHEX) ? `4` : `2`;
108	}
109
110	while (c <= `'~'`) {
111	g_chartab[c++] = `1` + CT_PRINT_CHAR;
112	}
113
114	while (c < `256`) {
115	if (c >= `0xa0`) {
116	// UTF-8: bytes 0xa0 - 0xff are printable (latin1)
117	g_chartab[c++] = CT_PRINT_CHAR + `1`;
118	} else {
119	// the rest is unprintable by default
120	g_chartab[c++] = (dy_flags & DY_UHEX) ? `4` : `2`;
121	}
122	}
123
124	// Assume that every multi-byte char is a filename character.
125	for (c = `1`; c < `256`; c++) {
126	if (c >= `0xa0`) {
127	g_chartab[c] \|= CT_FNAME_CHAR;
128	}
129	}
130	}
131
132	// Init word char flags all to false
133	memset(buf->b_chartab, `0`, (size_t)`32`);
134
135	// In lisp mode the '-' character is included in keywords.
136	if (buf->b_p_lisp) {
137	SET_CHARTAB(buf, `'-'`);
138	}
139
140	// Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint'
141	// options Each option is a list of characters, character numbers or
142	// ranges, separated by commas, e.g.: "200-210,x,#-178,-"
143	for (i = global ? `0` : `3`; i <= `3`; i++) {
144	const char_u *p;
145	if (i == `0`) {
146	// first round: 'isident'
147	p = p_isi;
148	} else if (i == `1`) {
149	// second round: 'isprint'
150	p = p_isp;
151	} else if (i == `2`) {
152	// third round: 'isfname'
153	p = p_isf;
154	} else { // i == 3
155	// fourth round: 'iskeyword'
156	p = buf->b_p_isk;
157	}
158
159	while (*p) {
160	tilde = false;
161	do_isalpha = false;
162
163	if ((*p == `'^'`) && (p[`1`] != NUL)) {
164	tilde = true;
165	++p;
166	}
167
168	if (ascii_isdigit(*p)) {
169	c = getdigits_int((char_u **)&p, true, `0`);
170	} else {
171	c = mb_ptr2char_adv(&p);
172	}
173	c2 = -`1`;
174
175	if ((*p == `'-'`) && (p[`1`] != NUL)) {
176	++p;
177
178	if (ascii_isdigit(*p)) {
179	c2 = getdigits_int((char_u **)&p, true, `0`);
180	} else {
181	c2 = mb_ptr2char_adv(&p);
182	}
183	}
184
185	if ((c <= `0`)
186	\|\| (c >= `256`)
187	\|\| ((c2 < c) && (c2 != -`1`))
188	\|\| (c2 >= `256`)
189	\|\| !((p == NUL) \|\| (p == `','`))) {
190	return FAIL;
191	}
192
193	if (c2 == -`1`) { // not a range
194	// A single '@' (not "@-@"):
195	// Decide on letters being ID/printable/keyword chars with
196	// standard function isalpha(). This takes care of locale for
197	// single-byte characters).
198	if (c == `'@'`) {
199	do_isalpha = true;
200	c = `1`;
201	c2 = `255`;
202	} else {
203	c2 = c;
204	}
205	}
206
207	while (c <= c2) {
208	// Use the MB_ functions here, because isalpha() doesn't
209	// work properly when 'encoding' is "latin1" and the locale is
210	// "C".
211	if (!do_isalpha
212	\|\| mb_islower(c)
213	\|\| mb_isupper(c)) {
214	if (i == `0`) {
215	// (re)set ID flag
216	if (tilde) {
217	g_chartab[c] &= (uint8_t)~CT_ID_CHAR;
218	} else {
219	g_chartab[c] \|= CT_ID_CHAR;
220	}
221	} else if (i == `1`) {
222	// (re)set printable
223	// For double-byte we keep the cell width, so
224	// that we can detect it from the first byte.
225	if (((c < `' '`) \|\| (c > `'~'`))) {
226	if (tilde) {
227	g_chartab[c] = (uint8_t)((g_chartab[c] & ~CT_CELL_MASK)
228	+ ((dy_flags & DY_UHEX) ? `4` : `2`));
229	g_chartab[c] &= (uint8_t)~CT_PRINT_CHAR;
230	} else {
231	g_chartab[c] = (uint8_t)((g_chartab[c] & ~CT_CELL_MASK) + `1`);
232	g_chartab[c] \|= CT_PRINT_CHAR;
233	}
234	}
235	} else if (i == `2`) {
236	// (re)set fname flag
237	if (tilde) {
238	g_chartab[c] &= (uint8_t)~CT_FNAME_CHAR;
239	} else {
240	g_chartab[c] \|= CT_FNAME_CHAR;
241	}
242	} else { // i == 3
243	// (re)set keyword flag
244	if (tilde) {
245	RESET_CHARTAB(buf, c);
246	} else {
247	SET_CHARTAB(buf, c);
248	}
249	}
250	}
251	++c;
252	}
253
254	c = *p;
255	p = skip_to_option_part(p);
256
257	if ((c == `','`) && (*p == NUL)) {
258	// Trailing comma is not allowed.
259	return FAIL;
260	}
261	}
262	}
263	chartab_initialized = true;
264	return OK;
265	}
266
267	/// Translate any special characters in buf[bufsize] in-place.
268	///
269	/// The result is a string with only printable characters, but if there is not
270	/// enough room, not all characters will be translated.
271	///
272	/// @param buf
273	/// @param bufsize
274	void trans_characters(char_u buf, int* bufsize)
275	{
276	int len; // length of string needing translation
277	int room; // room in buffer after string
278	char_u trs; // translated character*
279	int trs_len; // length of trs[]
280
281	len = (int)STRLEN(buf);
282	room = bufsize - len;
283
284	while (*buf != `0`) {
285	// Assume a multi-byte character doesn't need translation.
286	if ((trs_len = (*mb_ptr2len)(buf)) > `1`) {
287	len -= trs_len;
288	} else {
289	trs = transchar_byte(*buf);
290	trs_len = (int)STRLEN(trs);
291
292	if (trs_len > `1`) {
293	room -= trs_len - `1`;
294	if (room <= `0`) {
295	return;
296	}
297	memmove(buf + trs_len, buf + `1`, (size_t)len);
298	}
299	memmove(buf, trs, (size_t)trs_len);
300	--len;
301	}
302	buf += trs_len;
303	}
304	}
305
306	/// Find length of a string capable of holding s with all specials replaced
307	///
308	/// Assumes replacing special characters with printable ones just like
309	/// strtrans() does.
310	///
311	/// @param[in] s String to check.
312	///
313	/// @return number of bytes needed to hold a translation of `s`, NUL byte not
314	/// included.
315	size_t transstr_len(const char *const s)
316	FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
317	{
318	const char *p = s;
319	size_t len = `0`;
320
321	while (*p) {
322	const size_t l = (size_t)utfc_ptr2len((const char_u *)p);
323	if (l > `1`) {
324	int pcc[MAX_MCO + `1`];
325	pcc[`0`] = utfc_ptr2char((const char_u *)p, &pcc[`1`]);
326
327	if (vim_isprintc(pcc[`0`])) {
328	len += l;
329	} else {
330	for (size_t i = `0`; i < ARRAY_SIZE(pcc) && pcc[i]; i++) {
331	char hexbuf[`9`];
332	len += transchar_hex(hexbuf, pcc[i]);
333	}
334	}
335	p += l;
336	} else {
337	const int b2c_l = byte2cells((uint8_t)(*p++));
338	// Illegal byte sequence may occupy up to 4 characters.
339	len += (size_t)(b2c_l > `0` ? b2c_l : `4`);
340	}
341	}
342	return len;
343	}
344
345	/// Replace special characters with printable ones
346	///
347	/// @param[in] s String to replace characters from.
348	/// @param[out] buf Buffer to which result should be saved.
349	/// @param[in] len Buffer length. Resulting string may not occupy more then
350	/// len - 1 bytes (one for trailing NUL byte).
351	///
352	/// @return length of the resulting string, without the NUL byte.
353	size_t transstr_buf(const char *const s, char *const buf, const size_t len)
354	FUNC_ATTR_NONNULL_ALL
355	{
356	const char *p = s;
357	char *buf_p = buf;
358	char *const buf_e = buf_p + len - `1`;
359
360	while (*p != NUL && buf_p < buf_e) {
361	const size_t l = (size_t)utfc_ptr2len((const char_u *)p);
362	if (l > `1`) {
363	if (buf_p + l > buf_e) {
364	break; // Exceeded `buf` size.
365	}
366	int pcc[MAX_MCO + `1`];
367	pcc[`0`] = utfc_ptr2char((const char_u *)p, &pcc[`1`]);
368
369	if (vim_isprintc(pcc[`0`])) {
370	memmove(buf_p, p, l);
371	buf_p += l;
372	} else {
373	for (size_t i = `0`; i < ARRAY_SIZE(pcc) && pcc[i]; i++) {
374	char hexbuf[`9`]; // <up to 6 bytes>NUL
375	const size_t hexlen = transchar_hex(hexbuf, pcc[i]);
376	if (buf_p + hexlen > buf_e) {
377	break;
378	}
379	memmove(buf_p, hexbuf, hexlen);
380	buf_p += hexlen;
381	}
382	}
383	p += l;
384	} else {
385	const char *const tb = (const char )transchar_byte((uint8_t)(p++));
386	const size_t tb_len = strlen(tb);
387	if (buf_p + tb_len > buf_e) {
388	break; // Exceeded `buf` size.
389	}
390	memmove(buf_p, tb, tb_len);
391	buf_p += tb_len;
392	}
393	}
394	*buf_p = NUL;
395	assert(buf_p <= buf_e);
396	return (size_t)(buf_p - buf);
397	}
398
399	/// Copy string and replace special characters with printable characters
400	///
401	/// Works like `strtrans()` does, used for that and in some other places.
402	///
403	/// @param[in] s String to replace characters from.
404	///
405	/// @return [allocated] translated string
406	char transstr(const* char *const s)
407	FUNC_ATTR_NONNULL_RET
408	{
409	// Compute the length of the result, taking account of unprintable
410	// multi-byte characters.
411	const size_t len = transstr_len((const char *)s) + `1`;
412	char *const buf = xmalloc(len);
413	transstr_buf(s, buf, len);
414	return buf;
415	}
416
417	/// Convert the string "str[orglen]" to do ignore-case comparing.
418	/// Use the current locale.
419	///
420	/// When "buf" is NULL, return an allocated string.
421	/// Otherwise, put the result in buf, limited by buflen, and return buf.
422	char_u* str_foldcase(char_u str, int* orglen, char_u buf, int* buflen)
423	FUNC_ATTR_NONNULL_RET
424	{
425	garray_T ga;
426	int i;
427	int len = orglen;
428
429	#define GA_CHAR(i) ((char_u *)ga.ga_data)[i]
430	#define GA_PTR(i) ((char_u *)ga.ga_data + i)
431	#define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
432	#define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i)
433
434	// Copy "str" into "buf" or allocated memory, unmodified.
435	if (buf == NULL) {
436	ga_init(&ga, `1`, `10`);
437
438	ga_grow(&ga, len + `1`);
439	memmove(ga.ga_data, str, (size_t)len);
440	ga.ga_len = len;
441	} else {
442	if (len >= buflen) {
443	// Ugly!
444	len = buflen - `1`;
445	}
446	memmove(buf, str, (size_t)len);
447	}
448
449	if (buf == NULL) {
450	GA_CHAR(len) = NUL;
451	} else {
452	buf[len] = NUL;
453	}
454
455	// Make each character lower case.
456	i = `0`;
457	while (STR_CHAR(i) != NUL) {
458	int c = utf_ptr2char(STR_PTR(i));
459	int olen = utf_ptr2len(STR_PTR(i));
460	int lc = mb_tolower(c);
461
462	// Only replace the character when it is not an invalid
463	// sequence (ASCII character or more than one byte) and
464	// mb_tolower() doesn't return the original character.
465	if (((c < `0x80`) \|\| (olen > `1`)) && (c != lc)) {
466	int nlen = utf_char2len(lc);
467
468	// If the byte length changes need to shift the following
469	// characters forward or backward.
470	if (olen != nlen) {
471	if (nlen > olen) {
472	if (buf == NULL) {
473	ga_grow(&ga, nlen - olen + `1`);
474	} else {
475	if (len + nlen - olen >= buflen) {
476	// out of memory, keep old char
477	lc = c;
478	nlen = olen;
479	}
480	}
481	}
482
483	if (olen != nlen) {
484	if (buf == NULL) {
485	STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
486	ga.ga_len += nlen - olen;
487	} else {
488	STRMOVE(buf + i + nlen, buf + i + olen);
489	len += nlen - olen;
490	}
491	}
492	}
493	(void)utf_char2bytes(lc, STR_PTR(i));
494	}
495
496	// skip to next multi-byte char
497	i += (*mb_ptr2len)(STR_PTR(i));
498	}
499
500
501	if (buf == NULL) {
502	return (char_u *)ga.ga_data;
503	}
504	return buf;
505	}
506
507	// Catch 22: g_chartab[] can't be initialized before the options are
508	// initialized, and initializing options may cause transchar() to be called!
509	// When chartab_initialized == false don't use g_chartab[].
510	// Does NOT work for multi-byte characters, c must be <= 255.
511	// Also doesn't work for the first byte of a multi-byte, "c" must be a
512	// character!
513	static char_u transchar_buf[`11`];
514
515	/// Translate a character into a printable one, leaving printable ASCII intact
516	///
517	/// All unicode characters are considered non-printable in this function.
518	///
519	/// @param[in] c Character to translate.
520	///
521	/// @return translated character into a static buffer.
522	char_u transchar(int* c)
523	{
524	int i = `0`;
525	if (IS_SPECIAL(c)) {
526	// special key code, display as ~@ char
527	transchar_buf[`0`] = `'~'`;
528	transchar_buf[`1`] = `'@'`;
529	i = `2`;
530	c = K_SECOND(c);
531	}
532
533	if ((!chartab_initialized && (((c >= `' '`) && (c <= `'~'`))))
534	\|\| ((c <= `0xFF`) && vim_isprintc_strict(c))) {
535	// printable character
536	transchar_buf[i] = (char_u)c;
537	transchar_buf[i + `1`] = NUL;
538	} else if (c <= `0xFF`) {
539	transchar_nonprint(transchar_buf + i, c);
540	} else {
541	transchar_hex((char *)transchar_buf + i, c);
542	}
543	return transchar_buf;
544	}
545
546	/// Like transchar(), but called with a byte instead of a character
547	///
548	/// Checks for an illegal UTF-8 byte.
549	///
550	/// @param[in] c Byte to translate.
551	///
552	/// @return pointer to translated character in transchar_buf.
553	char_u transchar_byte(const* int c)
554	FUNC_ATTR_WARN_UNUSED_RESULT
555	{
556	if (c >= `0x80`) {
557	transchar_nonprint(transchar_buf, c);
558	return transchar_buf;
559	}
560	return transchar(c);
561	}
562
563	/// Convert non-printable characters to 2..4 printable ones
564	///
565	/// @warning Does not work for multi-byte characters, c must be <= 255.
566	///
567	/// @param[out] buf Buffer to store result in, must be able to hold at least
568	/// 5 bytes (conversion result + NUL).
569	/// @param[in] c Character to convert. NUL is assumed to be NL according to
570	/// `:h NL-used-for-NUL`.
571	void transchar_nonprint(char_u buf, int* c)
572	{
573	if (c == NL) {
574	// we use newline in place of a NUL
575	c = NUL;
576	} else if ((c == CAR) && (get_fileformat(curbuf) == EOL_MAC)) {
577	// we use CR in place of NL in this case
578	c = NL;
579	}
580	assert(c <= `0xff`);
581
582	if (dy_flags & DY_UHEX \|\| c > `0x7f`) {
583	// 'display' has "uhex"
584	transchar_hex((char *)buf, c);
585	} else {
586	// 0x00 - 0x1f and 0x7f
587	buf[`0`] = `'^'`;
588	// DEL displayed as ^?
589	buf[`1`] = (char_u)(c ^ `0x40`);
590
591	buf[`2`] = NUL;
592	}
593	}
594
595	/// Convert a non-printable character to hex C string like "<FFFF>"
596	///
597	/// @param[out] buf Buffer to store result in.
598	/// @param[in] c Character to convert.
599	///
600	/// @return Number of bytes stored in buffer, excluding trailing NUL byte.
601	size_t transchar_hex(char *const buf, const int c)
602	FUNC_ATTR_NONNULL_ALL
603	{
604	size_t i = `0`;
605
606	buf[i++] = `'<'`;
607	if (c > `255`) {
608	if (c > `255` * `256`) {
609	buf[i++] = (char)nr2hex((unsigned)c >> `20`);
610	buf[i++] = (char)nr2hex((unsigned)c >> `16`);
611	}
612	buf[i++] = (char)nr2hex((unsigned)c >> `12`);
613	buf[i++] = (char)nr2hex((unsigned)c >> `8`);
614	}
615	buf[i++] = (char)(nr2hex((unsigned)c >> `4`));
616	buf[i++] = (char)(nr2hex((unsigned)c));
617	buf[i++] = `'>'`;
618	buf[i] = NUL;
619	return i;
620	}
621
622	/// Convert the lower 4 bits of byte "c" to its hex character
623	///
624	/// Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
625	/// function key 1.
626	///
627	/// @param[in] n Number to convert.
628	///
629	/// @return the hex character.
630	static inline unsigned nr2hex(unsigned n)
631	FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
632	{
633	if ((n & `0xf`) <= `9`) {
634	return (n & `0xf`) + `'0'`;
635	}
636	return (n & `0xf`) - `10` + `'a'`;
637	}
638
639	/// Return number of display cells occupied by byte "b".
640	///
641	/// Caller must make sure 0 <= b <= 255.
642	/// For multi-byte mode "b" must be the first byte of a character.
643	/// A TAB is counted as two cells: "^I".
644	/// This will return 0 for bytes >= 0x80, because the number of
645	/// cells depends on further bytes in UTF-8.
646	///
647	/// @param b
648	///
649	/// @reeturn Number of display cells.
650	int byte2cells(int b)
651	{
652	if (b >= `0x80`) {
653	return `0`;
654	}
655	return g_chartab[b] & CT_CELL_MASK;
656	}
657
658	/// Return number of display cells occupied by character "c".
659	///
660	/// "c" can be a special key (negative number) in which case 3 or 4 is returned.
661	/// A TAB is counted as two cells: "^I" or four: "<09>".
662	///
663	/// @param c
664	///
665	/// @return Number of display cells.
666	int char2cells(int c)
667	{
668	if (IS_SPECIAL(c)) {
669	return char2cells(K_SECOND(c)) + `2`;
670	}
671
672	if (c >= `0x80`) {
673	// UTF-8: above 0x80 need to check the value
674	return utf_char2cells(c);
675	}
676	return g_chartab[c & `0xff`] & CT_CELL_MASK;
677	}
678
679	/// Return number of display cells occupied by character at "p".*
680	/// A TAB is counted as two cells: "^I" or four: "<09>".
681	///
682	/// @param p
683	///
684	/// @return number of display cells.
685	int ptr2cells(const char_u *p)
686	{
687	// For UTF-8 we need to look at more bytes if the first byte is >= 0x80.
688	if (*p >= `0x80`) {
689	return utf_ptr2cells(p);
690	}
691
692	// For DBCS we can tell the cell count from the first byte.
693	return g_chartab[*p] & CT_CELL_MASK;
694	}
695
696	/// Return the number of character cells string "s" will take on the screen,
697	/// counting TABs as two characters: "^I".
698	///
699	/// 's' must be non-null.
700	///
701	/// @param s
702	///
703	/// @return number of character cells.
704	int vim_strsize(char_u *s)
705	{
706	return vim_strnsize(s, (int)MAXCOL);
707	}
708
709	/// Return the number of character cells string "s[len]" will take on the
710	/// screen, counting TABs as two characters: "^I".
711	///
712	/// 's' must be non-null.
713	///
714	/// @param s
715	/// @param len
716	///
717	/// @return Number of character cells.
718	int vim_strnsize(char_u s, int* len)
719	{
720	assert(s != NULL);
721	int size = `0`;
722	while (*s != NUL && --len >= `0`) {
723	int l = (*mb_ptr2len)(s);
724	size += ptr2cells(s);
725	s += l;
726	len -= l - `1`;
727	}
728	return size;
729	}
730
731	/// Return the number of characters 'c' will take on the screen, taking
732	/// into account the size of a tab.
733	/// Use a define to make it fast, this is used very often!!!
734	/// Also see getvcol() below.
735	///
736	/// @param p
737	/// @param col
738	///
739	/// @return Number of characters.
740	#define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \
741	if (*(p) == TAB && (!(wp)->w_p_list \|\| wp->w_p_lcs_chars.tab1)) { \
742	const int ts = (int)(buf)->b_p_ts; \
743	return (ts - (int)(col % ts)); \
744	} else { \
745	return ptr2cells(p); \
746	}
747
748	int chartabsize(char_u *p, colnr_T col)
749	{
750	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col)
751	}
752
753	static int win_chartabsize(win_T wp, char_u p, colnr_T col)
754	{
755	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col)
756	}
757
758	/// Return the number of characters the string 's' will take on the screen,
759	/// taking into account the size of a tab.
760	///
761	/// @param s
762	///
763	/// @return Number of characters the string will take on the screen.
764	int linetabsize(char_u *s)
765	{
766	return linetabsize_col(`0`, s);
767	}
768
769	/// Like linetabsize(), but starting at column "startcol".
770	///
771	/// @param startcol
772	/// @param s
773	///
774	/// @return Number of characters the string will take on the screen.
775	int linetabsize_col(int startcol, char_u *s)
776	{
777	colnr_T col = startcol;
778	char_u line = s; /* pointer to start of line, for breakindent /
779
780	while (*s != NUL) {
781	col += lbr_chartabsize_adv(line, &s, col);
782	}
783	return (int)col;
784	}
785
786	/// Like linetabsize(), but for a given window instead of the current one.
787	///
788	/// @param wp
789	/// @param line
790	/// @param len
791	///
792	/// @return Number of characters the string will take on the screen.
793	unsigned int win_linetabsize(win_T wp, char_u line, colnr_T len)
794	{
795	colnr_T col = `0`;
796
797	for (char_u *s = line;
798	*s != NUL && (len == MAXCOL \|\| s < line + len);
799	MB_PTR_ADV(s)) {
800	col += win_lbr_chartabsize(wp, line, s, col, NULL);
801	}
802
803	return (unsigned int)col;
804	}
805
806	/// Check that "c" is a normal identifier character:
807	/// Letters and characters from the 'isident' option.
808	///
809	/// @param c character to check
810	bool vim_isIDc(int c)
811	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
812	{
813	return c > `0` && c < `0x100` && (g_chartab[c] & CT_ID_CHAR);
814	}
815
816	/// Check that "c" is a keyword character:
817	/// Letters and characters from 'iskeyword' option for the current buffer.
818	/// For multi-byte characters mb_get_class() is used (builtin rules).
819	///
820	/// @param c character to check
821	bool vim_iswordc(const int c)
822	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
823	{
824	return vim_iswordc_buf(c, curbuf);
825	}
826
827	/// Check that "c" is a keyword character
828	/// Letters and characters from 'iskeyword' option for given buffer.
829	/// For multi-byte characters mb_get_class() is used (builtin rules).
830	///
831	/// @param[in] c Character to check.
832	/// @param[in] chartab Buffer chartab.
833	bool vim_iswordc_tab(const int c, const uint64_t *const chartab)
834	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
835	{
836	return (c >= `0x100`
837	? (utf_class_tab(c, chartab) >= `2`)
838	: (c > `0` && GET_CHARTAB_TAB(chartab, c) != `0`));
839	}
840
841	/// Check that "c" is a keyword character:
842	/// Letters and characters from 'iskeyword' option for given buffer.
843	/// For multi-byte characters mb_get_class() is used (builtin rules).
844	///
845	/// @param c character to check
846	/// @param buf buffer whose keywords to use
847	bool vim_iswordc_buf(const int c, buf_T *const buf)
848	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ARG(`2`)
849	{
850	return vim_iswordc_tab(c, buf->b_chartab);
851	}
852
853	/// Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
854	///
855	/// @param p pointer to the multi-byte character
856	///
857	/// @return true if "p" points to a keyword character.
858	bool vim_iswordp(const char_u *const p)
859	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
860	{
861	return vim_iswordp_buf(p, curbuf);
862	}
863
864	/// Just like vim_iswordc_buf() but uses a pointer to the (multi-byte)
865	/// character.
866	///
867	/// @param p pointer to the multi-byte character
868	/// @param buf buffer whose keywords to use
869	///
870	/// @return true if "p" points to a keyword character.
871	bool vim_iswordp_buf(const char_u *const p, buf_T *const buf)
872	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
873	{
874	int c = *p;
875
876	if (MB_BYTE2LEN(c) > `1`) {
877	c = utf_ptr2char(p);
878	}
879	return vim_iswordc_buf(c, buf);
880	}
881
882	/// Check that "c" is a valid file-name character.
883	/// Assume characters above 0x100 are valid (multi-byte).
884	///
885	/// @param c character to check
886	bool vim_isfilec(int c)
887	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
888	{
889	return c >= `0x100` \|\| (c > `0` && (g_chartab[c] & CT_FNAME_CHAR));
890	}
891
892	/// Check that "c" is a valid file-name character or a wildcard character
893	/// Assume characters above 0x100 are valid (multi-byte).
894	/// Explicitly interpret ']' as a wildcard character as path_has_wildcard("]")
895	/// returns false.
896	///
897	/// @param c character to check
898	bool vim_isfilec_or_wc(int c)
899	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
900	{
901	char_u buf[`2`];
902	buf[`0`] = (char_u)c;
903	buf[`1`] = NUL;
904	return vim_isfilec(c) \|\| c == `']'` \|\| path_has_wildcard(buf);
905	}
906
907	/// Check that "c" is a printable character.
908	/// Assume characters above 0x100 are printable for double-byte encodings.
909	///
910	/// @param c character to check
911	bool vim_isprintc(int c)
912	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
913	{
914	if (c >= `0x100`) {
915	return utf_printable(c);
916	}
917	return c > `0` && (g_chartab[c] & CT_PRINT_CHAR);
918	}
919
920	/// Strict version of vim_isprintc(c), don't return true if "c" is the head
921	/// byte of a double-byte character.
922	///
923	/// @param c character to check
924	///
925	/// @return true if "c" is a printable character.
926	bool vim_isprintc_strict(int c)
927	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
928	{
929	if (c >= `0x100`) {
930	return utf_printable(c);
931	}
932	return c > `0` && (g_chartab[c] & CT_PRINT_CHAR);
933	}
934
935	/// like chartabsize(), but also check for line breaks on the screen
936	///
937	/// @param line
938	/// @param s
939	/// @param col
940	///
941	/// @return The number of characters taken up on the screen.
942	int lbr_chartabsize(char_u line, unsigned* char *s, colnr_T col)
943	{
944	if (!curwin->w_p_lbr && (*p_sbr == NUL) && !curwin->w_p_bri) {
945	if (curwin->w_p_wrap) {
946	return win_nolbr_chartabsize(curwin, s, col, NULL);
947	}
948	RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col)
949	}
950	return win_lbr_chartabsize(curwin, line == NULL ? s: line, s, col, NULL);
951	}
952
953	/// Call lbr_chartabsize() and advance the pointer.
954	///
955	/// @param line
956	/// @param s
957	/// @param col
958	///
959	/// @return The number of characters take up on the screen.
960	int lbr_chartabsize_adv(char_u line, char_u *s, colnr_T col)
961	{
962	int retval;
963
964	retval = lbr_chartabsize(line, *s, col);
965	MB_PTR_ADV(*s);
966	return retval;
967	}
968
969	/// This function is used very often, keep it fast!!!!
970	///
971	/// If "headp" not NULL, set headp to the size of what we for 'showbreak'*
972	/// string at start of line. Warning: headp is only set if it's a non-zero*
973	/// value, init to 0 before calling.
974	///
975	/// @param wp
976	/// @param line
977	/// @param s
978	/// @param col
979	/// @param headp
980	///
981	/// @return The number of characters taken up on the screen.
982	int win_lbr_chartabsize(win_T wp, char_u line, char_u s, colnr_T col, int* *headp)
983	{
984	colnr_T col2;
985	colnr_T col_adj = `0`; / col + screen size of tab /
986	colnr_T colmax;
987	int added;
988	int mb_added = `0`;
989	int numberextra;
990	char_u *ps;
991	int n;
992
993	// No 'linebreak', 'showbreak' and 'breakindent': return quickly.
994	if (!wp->w_p_lbr && !wp->w_p_bri && (*p_sbr == NUL)) {
995	if (wp->w_p_wrap) {
996	return win_nolbr_chartabsize(wp, s, col, headp);
997	}
998	RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col)
999	}
1000
1001	// First get normal size, without 'linebreak'
1002	int size = win_chartabsize(wp, s, col);
1003	int c = *s;
1004	if (*s == TAB) {
1005	col_adj = size - `1`;
1006	}
1007
1008	// If 'linebreak' set check at a blank before a non-blank if the line
1009	// needs a break here
1010	if (wp->w_p_lbr
1011	&& vim_isbreak(c)
1012	&& !vim_isbreak((int)s[`1`])
1013	&& wp->w_p_wrap
1014	&& (wp->w_width_inner != `0`)) {
1015	// Count all characters from first non-blank after a blank up to next
1016	// non-blank after a blank.
1017	numberextra = win_col_off(wp);
1018	col2 = col;
1019	colmax = (colnr_T)(wp->w_width_inner - numberextra - col_adj);
1020
1021	if (col >= colmax) {
1022	colmax += col_adj;
1023	n = colmax + win_col_off2(wp);
1024
1025	if (n > `0`) {
1026	colmax += (((col - colmax) / n) + `1`) * n - col_adj;
1027	}
1028	}
1029
1030	for (;;) {
1031	ps = s;
1032	MB_PTR_ADV(s);
1033	c = *s;
1034
1035	if (!(c != NUL
1036	&& (vim_isbreak(c) \|\| col2 == col \|\| !vim_isbreak((int)(*ps))))) {
1037	break;
1038	}
1039
1040	col2 += win_chartabsize(wp, s, col2);
1041
1042	if (col2 >= colmax) { / doesn't fit /
1043	size = colmax - col + col_adj;
1044	break;
1045	}
1046	}
1047	} else if ((size == `2`)
1048	&& (MB_BYTE2LEN(*s) > `1`)
1049	&& wp->w_p_wrap
1050	&& in_win_border(wp, col)) {
1051	// Count the ">" in the last column.
1052	++size;
1053	mb_added = `1`;
1054	}
1055
1056	// May have to add something for 'breakindent' and/or 'showbreak'
1057	// string at start of line.
1058	// Set headp to the size of what we add.*
1059	added = `0`;
1060
1061	if ((*p_sbr != NUL \|\| wp->w_p_bri) && wp->w_p_wrap && (col != `0`)) {
1062	colnr_T sbrlen = `0`;
1063	int numberwidth = win_col_off(wp);
1064
1065	numberextra = numberwidth;
1066	col += numberextra + mb_added;
1067
1068	if (col >= (colnr_T)wp->w_width_inner) {
1069	col -= wp->w_width_inner;
1070	numberextra = wp->w_width_inner - (numberextra - win_col_off2(wp));
1071	if (col >= numberextra && numberextra > `0`) {
1072	col %= numberextra;
1073	}
1074	if (*p_sbr != NUL) {
1075	sbrlen = (colnr_T)MB_CHARLEN(p_sbr);
1076	if (col >= sbrlen) {
1077	col -= sbrlen;
1078	}
1079	}
1080	if (col >= numberextra && numberextra > `0`) {
1081	col %= numberextra;
1082	} else if (col > `0` && numberextra > `0`) {
1083	col += numberwidth - win_col_off2(wp);
1084	}
1085
1086	numberwidth -= win_col_off2(wp);
1087	}
1088
1089	if (col == `0` \|\| (col + size + sbrlen > (colnr_T)wp->w_width_inner)) {
1090	added = `0`;
1091
1092	if (*p_sbr != NUL) {
1093	if (size + sbrlen + numberwidth > (colnr_T)wp->w_width_inner) {
1094	// Calculate effective window width.
1095	int width = (colnr_T)wp->w_width_inner - sbrlen - numberwidth;
1096	int prev_width = col ? ((colnr_T)wp->w_width_inner - (sbrlen + col))
1097	: `0`;
1098
1099	if (width <= `0`) {
1100	width = `1`;
1101	}
1102	added += ((size - prev_width) / width) * vim_strsize(p_sbr);
1103	if ((size - prev_width) % width) {
1104	// Wrapped, add another length of 'sbr'.
1105	added += vim_strsize(p_sbr);
1106	}
1107	} else {
1108	added += vim_strsize(p_sbr);
1109	}
1110	}
1111
1112	if (wp->w_p_bri)
1113	added += get_breakindent_win(wp, line);
1114
1115	size += added;
1116	if (col != `0`) {
1117	added = `0`;
1118	}
1119	}
1120	}
1121
1122	if (headp != NULL) {
1123	*headp = added + mb_added;
1124	}
1125	return size;
1126	}
1127
1128	/// Like win_lbr_chartabsize(), except that we know 'linebreak' is off and
1129	/// 'wrap' is on. This means we need to check for a double-byte character that
1130	/// doesn't fit at the end of the screen line.
1131	///
1132	/// @param wp
1133	/// @param s
1134	/// @param col
1135	/// @param headp
1136	///
1137	/// @return The number of characters take up on the screen.
1138	static int win_nolbr_chartabsize(win_T wp, char_u s, colnr_T col, int *headp)
1139	{
1140	int n;
1141
1142	if ((*s == TAB) && (!wp->w_p_list \|\| wp->w_p_lcs_chars.tab1)) {
1143	n = (int)wp->w_buffer->b_p_ts;
1144	return n - (col % n);
1145	}
1146	n = ptr2cells(s);
1147
1148	// Add one cell for a double-width character in the last column of the
1149	// window, displayed with a ">".
1150	if ((n == `2`) && (MB_BYTE2LEN(*s) > `1`) && in_win_border(wp, col)) {
1151	if (headp != NULL) {
1152	*headp = `1`;
1153	}
1154	return `3`;
1155	}
1156	return n;
1157	}
1158
1159	/// Check that virtual column "vcol" is in the rightmost column of window "wp".
1160	///
1161	/// @param wp window
1162	/// @param vcol column number
1163	bool in_win_border(win_T *wp, colnr_T vcol)
1164	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ARG(`1`)
1165	{
1166	int width1; // width of first line (after line number)
1167	int width2; // width of further lines
1168
1169	if (wp->w_width_inner == `0`) {
1170	// there is no border
1171	return false;
1172	}
1173	width1 = wp->w_width_inner - win_col_off(wp);
1174
1175	if ((int)vcol < width1 - `1`) {
1176	return false;
1177	}
1178
1179	if ((int)vcol == width1 - `1`) {
1180	return true;
1181	}
1182	width2 = width1 + win_col_off2(wp);
1183
1184	if (width2 <= `0`) {
1185	return false;
1186	}
1187	return (vcol - width1) % width2 == width2 - `1`;
1188	}
1189
1190	/// Get virtual column number of pos.
1191	/// start: on the first position of this character (TAB, ctrl)
1192	/// cursor: where the cursor is on this character (first char, except for TAB)
1193	/// end: on the last position of this character (TAB, ctrl)
1194	///
1195	/// This is used very often, keep it fast!
1196	///
1197	/// @param wp
1198	/// @param pos
1199	/// @param start
1200	/// @param cursor
1201	/// @param end
1202	void getvcol(win_T wp, pos_T pos, colnr_T start, colnr_T cursor,
1203	colnr_T *end)
1204	{
1205	colnr_T vcol;
1206	char_u ptr; // points to current char*
1207	char_u posptr; // points to char at pos->col*
1208	char_u line; // start of the line*
1209	int incr;
1210	int head;
1211	int ts = (int)wp->w_buffer->b_p_ts;
1212	int c;
1213
1214	vcol = `0`;
1215	line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, false);
1216
1217	if (pos->col == MAXCOL) {
1218	// continue until the NUL
1219	posptr = NULL;
1220	} else {
1221	// Special check for an empty line, which can happen on exit, when
1222	// ml_get_buf() always returns an empty string.
1223	if (*ptr == NUL) {
1224	pos->col = `0`;
1225	}
1226	posptr = ptr + pos->col;
1227	posptr -= utf_head_off(line, posptr);
1228	}
1229
1230	// This function is used very often, do some speed optimizations.
1231	// When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set
1232	// use a simple loop.
1233	// Also use this when 'list' is set but tabs take their normal size.
1234	if ((!wp->w_p_list \|\| (wp->w_p_lcs_chars.tab1 != NUL))
1235	&& !wp->w_p_lbr
1236	&& (*p_sbr == NUL)
1237	&& !wp->w_p_bri ) {
1238	for (;;) {
1239	head = `0`;
1240	c = *ptr;
1241
1242	// make sure we don't go past the end of the line
1243	if (c == NUL) {
1244	// NUL at end of line only takes one column
1245	incr = `1`;
1246	break;
1247	}
1248
1249	// A tab gets expanded, depending on the current column
1250	if (c == TAB) {
1251	incr = ts - (vcol % ts);
1252	} else {
1253	// For utf-8, if the byte is >= 0x80, need to look at
1254	// further bytes to find the cell width.
1255	if (c >= `0x80`) {
1256	incr = utf_ptr2cells(ptr);
1257	} else {
1258	incr = g_chartab[c] & CT_CELL_MASK;
1259	}
1260
1261	// If a double-cell char doesn't fit at the end of a line
1262	// it wraps to the next line, it's like this char is three
1263	// cells wide.
1264	if ((incr == `2`)
1265	&& wp->w_p_wrap
1266	&& (MB_BYTE2LEN(*ptr) > `1`)
1267	&& in_win_border(wp, vcol)) {
1268	incr++;
1269	head = `1`;
1270	}
1271	}
1272
1273	if ((posptr != NULL) && (ptr >= posptr)) {
1274	// character at pos->col
1275	break;
1276	}
1277
1278	vcol += incr;
1279	MB_PTR_ADV(ptr);
1280	}
1281	} else {
1282	for (;;) {
1283	// A tab gets expanded, depending on the current column
1284	head = `0`;
1285	incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head);
1286
1287	// make sure we don't go past the end of the line
1288	if (*ptr == NUL) {
1289	// NUL at end of line only takes one column
1290	incr = `1`;
1291	break;
1292	}
1293
1294	if ((posptr != NULL) && (ptr >= posptr)) {
1295	// character at pos->col
1296	break;
1297	}
1298
1299	vcol += incr;
1300	MB_PTR_ADV(ptr);
1301	}
1302	}
1303
1304	if (start != NULL) {
1305	*start = vcol + head;
1306	}
1307
1308	if (end != NULL) {
1309	*end = vcol + incr - `1`;
1310	}
1311
1312	if (cursor != NULL) {
1313	if ((*ptr == TAB)
1314	&& (State & NORMAL)
1315	&& !wp->w_p_list
1316	&& !virtual_active()
1317	&& !(VIsual_active && ((p_sel == `'e'`) \|\| ltoreq(pos, VIsual)))) {
1318	// cursor at end
1319	*cursor = vcol + incr - `1`;
1320	} else {
1321	// cursor at start
1322	*cursor = vcol + head;
1323	}
1324	}
1325	}
1326
1327	/// Get virtual cursor column in the current window, pretending 'list' is off.
1328	///
1329	/// @param posp
1330	///
1331	/// @retujrn The virtual cursor column.
1332	colnr_T getvcol_nolist(pos_T *posp)
1333	{
1334	int list_save = curwin->w_p_list;
1335	colnr_T vcol;
1336
1337	curwin->w_p_list = false;
1338	if (posp->coladd) {
1339	getvvcol(curwin, posp, NULL, &vcol, NULL);
1340	} else {
1341	getvcol(curwin, posp, NULL, &vcol, NULL);
1342	}
1343	curwin->w_p_list = list_save;
1344	return vcol;
1345	}
1346
1347	/// Get virtual column in virtual mode.
1348	///
1349	/// @param wp
1350	/// @param pos
1351	/// @param start
1352	/// @param cursor
1353	/// @param end
1354	void getvvcol(win_T wp, pos_T pos, colnr_T start, colnr_T cursor,
1355	colnr_T *end)
1356	{
1357	colnr_T col;
1358	colnr_T coladd;
1359	colnr_T endadd;
1360	char_u *ptr;
1361
1362	if (virtual_active()) {
1363	// For virtual mode, only want one value
1364	getvcol(wp, pos, &col, NULL, NULL);
1365
1366	coladd = pos->coladd;
1367	endadd = `0`;
1368
1369	// Cannot put the cursor on part of a wide character.
1370	ptr = ml_get_buf(wp->w_buffer, pos->lnum, false);
1371
1372	if (pos->col < (colnr_T)STRLEN(ptr)) {
1373	int c = utf_ptr2char(ptr + pos->col);
1374	if ((c != TAB) && vim_isprintc(c)) {
1375	endadd = (colnr_T)(char2cells(c) - `1`);
1376	if (coladd > endadd) {
1377	// past end of line
1378	endadd = `0`;
1379	} else {
1380	coladd = `0`;
1381	}
1382	}
1383	}
1384	col += coladd;
1385
1386	if (start != NULL) {
1387	*start = col;
1388	}
1389
1390	if (cursor != NULL) {
1391	*cursor = col;
1392	}
1393
1394	if (end != NULL) {
1395	*end = col + endadd;
1396	}
1397	} else {
1398	getvcol(wp, pos, start, cursor, end);
1399	}
1400	}
1401
1402	/// Get the leftmost and rightmost virtual column of pos1 and pos2.
1403	/// Used for Visual block mode.
1404	///
1405	/// @param wp
1406	/// @param pos1
1407	/// @param pos2
1408	/// @param left
1409	/// @param right
1410	void getvcols(win_T wp, pos_T pos1, pos_T pos2, colnr_T left,
1411	colnr_T *right)
1412	{
1413	colnr_T from1;
1414	colnr_T from2;
1415	colnr_T to1;
1416	colnr_T to2;
1417
1418	if (lt(pos1, pos2)) {
1419	getvvcol(wp, pos1, &from1, NULL, &to1);
1420	getvvcol(wp, pos2, &from2, NULL, &to2);
1421	} else {
1422	getvvcol(wp, pos2, &from1, NULL, &to1);
1423	getvvcol(wp, pos1, &from2, NULL, &to2);
1424	}
1425
1426	if (from2 < from1) {
1427	*left = from2;
1428	} else {
1429	*left = from1;
1430	}
1431
1432	if (to2 > to1) {
1433	if ((*p_sel == `'e'`) && (from2 - `1` >= to1)) {
1434	*right = from2 - `1`;
1435	} else {
1436	*right = to2;
1437	}
1438	} else {
1439	*right = to1;
1440	}
1441	}
1442
1443	/// skipwhite: skip over ' ' and '\t'.
1444	///
1445	/// @param[in] q String to skip in.
1446	///
1447	/// @return Pointer to character after the skipped whitespace.
1448	char_u skipwhite(const* char_u *q)
1449	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
1450	FUNC_ATTR_NONNULL_RET
1451	{
1452	const char_u *p = q;
1453	while (ascii_iswhite(*p)) {
1454	p++;
1455	}
1456	return (char_u *)p;
1457	}
1458
1459	// getwhitecols: return the number of whitespace
1460	// columns (bytes) at the start of a given line
1461	intptr_t getwhitecols_curline(void)
1462	{
1463	return getwhitecols(get_cursor_line_ptr());
1464	}
1465
1466	intptr_t getwhitecols(const char_u *p)
1467	{
1468	return skipwhite(p) - p;
1469	}
1470
1471	/// Skip over digits
1472	///
1473	/// @param[in] q String to skip digits in.
1474	///
1475	/// @return Pointer to the character after the skipped digits.
1476	char_u skipdigits(const* char_u *q)
1477	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
1478	FUNC_ATTR_NONNULL_RET
1479	{
1480	const char_u *p = q;
1481	while (ascii_isdigit(*p)) {
1482	// skip to next non-digit
1483	p++;
1484	}
1485	return (char_u *)p;
1486	}
1487
1488	/// skip over binary digits
1489	///
1490	/// @param q pointer to string
1491	///
1492	/// @return Pointer to the character after the skipped digits.
1493	const char* skipbin(const char *q)
1494	FUNC_ATTR_PURE
1495	FUNC_ATTR_NONNULL_ALL
1496	FUNC_ATTR_NONNULL_RET
1497	{
1498	const char *p = q;
1499	while (ascii_isbdigit(*p)) {
1500	// skip to next non-digit
1501	p++;
1502	}
1503	return p;
1504	}
1505
1506	/// skip over digits and hex characters
1507	///
1508	/// @param q
1509	///
1510	/// @return Pointer to the character after the skipped digits and hex
1511	/// characters.
1512	char_u* skiphex(char_u *q)
1513	{
1514	char_u *p = q;
1515	while (ascii_isxdigit(*p)) {
1516	// skip to next non-digit
1517	p++;
1518	}
1519	return p;
1520	}
1521
1522	/// skip to digit (or NUL after the string)
1523	///
1524	/// @param q
1525	///
1526	/// @return Pointer to the digit or (NUL after the string).
1527	char_u* skiptodigit(char_u *q)
1528	{
1529	char_u *p = q;
1530	while (p != NUL && !ascii_isdigit(p)) {
1531	// skip to next digit
1532	p++;
1533	}
1534	return p;
1535	}
1536
1537	/// skip to binary character (or NUL after the string)
1538	///
1539	/// @param q pointer to string
1540	///
1541	/// @return Pointer to the binary character or (NUL after the string).
1542	const char* skiptobin(const char *q)
1543	FUNC_ATTR_PURE
1544	FUNC_ATTR_NONNULL_ALL
1545	FUNC_ATTR_NONNULL_RET
1546	{
1547	const char *p = q;
1548	while (p != NUL && !ascii_isbdigit(p)) {
1549	// skip to next digit
1550	p++;
1551	}
1552	return p;
1553	}
1554
1555	/// skip to hex character (or NUL after the string)
1556	///
1557	/// @param q
1558	///
1559	/// @return Pointer to the hex character or (NUL after the string).
1560	char_u* skiptohex(char_u *q)
1561	{
1562	char_u *p = q;
1563	while (p != NUL && !ascii_isxdigit(p)) {
1564	// skip to next digit
1565	p++;
1566	}
1567	return p;
1568	}
1569
1570	/// Skip over text until ' ' or '\t' or NUL
1571	///
1572	/// @param[in] p Text to skip over.
1573	///
1574	/// @return Pointer to the next whitespace or NUL character.
1575	char_u skiptowhite(const* char_u *p)
1576	{
1577	while (p != `' '` && p != `'\t'` && *p != NUL) {
1578	p++;
1579	}
1580	return (char_u *)p;
1581	}
1582
1583	/// skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
1584	///
1585	/// @param p
1586	///
1587	/// @return Pointer to the next whitespace character.
1588	char_u* skiptowhite_esc(char_u *p) {
1589	while (p != `' '` && p != `'\t'` && *p != NUL) {
1590	if (((p == `'\\'`) \|\| (p == Ctrl_V)) && (*(p + `1`) != NUL)) {
1591	++p;
1592	}
1593	++p;
1594	}
1595	return p;
1596	}
1597
1598	/// Gets a number from a string and skips over it, signalling overflow.
1599	///
1600	/// @param[out] pp A pointer to a pointer to char_u.
1601	/// It will be advanced past the read number.
1602	/// @param[out] nr Number read from the string.
1603	///
1604	/// @return true on success, false on error/overflow
1605	bool try_getdigits(char_u *pp, intmax_t nr)
1606	{
1607	errno = `0`;
1608	nr = strtoimax((char* )(pp), (char **)pp, `10`);
1609	if (errno == ERANGE && (nr == INTMAX_MIN \|\| nr == INTMAX_MAX)) {
1610	return false;
1611	}
1612	return true;
1613	}
1614
1615	/// Gets a number from a string and skips over it.
1616	///
1617	/// @param[out] pp Pointer to a pointer to char_u.
1618	/// It will be advanced past the read number.
1619	/// @param strict Abort on overflow.
1620	/// @param def Default value, if parsing fails or overflow occurs.
1621	///
1622	/// @return Number read from the string, or `def` on parse failure or overflow.
1623	intmax_t getdigits(char_u **pp, bool strict, intmax_t def)
1624	{
1625	intmax_t number;
1626	int ok = try_getdigits(pp, &number);
1627	if (strict && !ok) {
1628	abort();
1629	}
1630	return ok ? number : def;
1631	}
1632
1633	/// Gets an int number from a string.
1634	///
1635	/// @see getdigits
1636	int getdigits_int(char_u *pp, bool strict, int* def)
1637	{
1638	intmax_t number = getdigits(pp, strict, def);
1639	#if SIZEOF_INTMAX_T > SIZEOF_INT
1640	if (strict) {
1641	assert(number >= INT_MIN && number <= INT_MAX);
1642	} else if (!(number >= INT_MIN && number <= INT_MAX)) {
1643	return def;
1644	}
1645	#endif
1646	return (int)number;
1647	}
1648
1649	/// Gets a long number from a string.
1650	///
1651	/// @see getdigits
1652	long getdigits_long(char_u *pp, bool strict, long* def)
1653	{
1654	intmax_t number = getdigits(pp, strict, def);
1655	#if SIZEOF_INTMAX_T > SIZEOF_LONG
1656	if (strict) {
1657	assert(number >= LONG_MIN && number <= LONG_MAX);
1658	} else if (!(number >= LONG_MIN && number <= LONG_MAX)) {
1659	return def;
1660	}
1661	#endif
1662	return (long)number;
1663	}
1664
1665	/// Check that "lbuf" is empty or only contains blanks.
1666	///
1667	/// @param lbuf line buffer to check
1668	bool vim_isblankline(char_u *lbuf)
1669	{
1670	char_u *p = skipwhite(lbuf);
1671	return p == NUL \|\| p == `'\r'` \|\| *p == `'\n'`;
1672	}
1673
1674	/// Convert a string into a long and/or unsigned long, taking care of
1675	/// hexadecimal, octal and binary numbers. Accepts a '-' sign.
1676	/// If "prep" is not NULL, returns a flag to indicate the type of the number:
1677	/// 0 decimal
1678	/// '0' octal
1679	/// 'B' bin
1680	/// 'b' bin
1681	/// 'X' hex
1682	/// 'x' hex
1683	/// If "len" is not NULL, the length of the number in characters is returned.
1684	/// If "nptr" is not NULL, the signed result is returned in it.
1685	/// If "unptr" is not NULL, the unsigned result is returned in it.
1686	/// If "what" contains STR2NR_BIN recognize binary numbers.
1687	/// If "what" contains STR2NR_OCT recognize octal numbers.
1688	/// If "what" contains STR2NR_HEX recognize hex numbers.
1689	/// If "what" contains STR2NR_FORCE always assume bin/oct/hex.
1690	/// If maxlen > 0, check at a maximum maxlen chars.
1691	///
1692	/// @param start
1693	/// @param prep Returns guessed type of number 0 = decimal, 'x' or 'X' is
1694	/// hexadecimal, '0' = octal, 'b' or 'B' is binary. When using
1695	/// STR2NR_FORCE is always zero.
1696	/// @param len Returns the detected length of number.
1697	/// @param what Recognizes what number passed, @see ChStr2NrFlags.
1698	/// @param nptr Returns the signed result.
1699	/// @param unptr Returns the unsigned result.
1700	/// @param maxlen Max length of string to check.
1701	void vim_str2nr(const char_u *const start, int *const prep, int *const len,
1702	const int what, varnumber_T *const nptr,
1703	uvarnumber_T *const unptr, const int maxlen)
1704	FUNC_ATTR_NONNULL_ARG(`1`)
1705	{
1706	const char ptr = (const* char *)start;
1707	#define STRING_ENDED(ptr) \
1708	(!(maxlen == 0 \|\| (int)((ptr) - (const char *)start) < maxlen))
1709	int pre = `0`; // default is decimal
1710	const bool negative = (ptr[`0`] == `'-'`);
1711	uvarnumber_T un = `0`;
1712
1713	if (negative) {
1714	ptr++;
1715	}
1716
1717	if (what & STR2NR_FORCE) {
1718	// When forcing main consideration is skipping the prefix. Octal and decimal
1719	// numbers have no prefixes to skip. pre is not set.
1720	switch ((unsigned)what & (~(unsigned)STR2NR_FORCE)) {
1721	case STR2NR_HEX: {
1722	if (!STRING_ENDED(ptr + `2`)
1723	&& ptr[`0`] == `'0'`
1724	&& (ptr[`1`] == `'x'` \|\| ptr[`1`] == `'X'`)
1725	&& ascii_isxdigit(ptr[`2`])) {
1726	ptr += `2`;
1727	}
1728	goto vim_str2nr_hex;
1729	}
1730	case STR2NR_BIN: {
1731	if (!STRING_ENDED(ptr + `2`)
1732	&& ptr[`0`] == `'0'`
1733	&& (ptr[`1`] == `'b'` \|\| ptr[`1`] == `'B'`)
1734	&& ascii_isbdigit(ptr[`2`])) {
1735	ptr += `2`;
1736	}
1737	goto vim_str2nr_bin;
1738	}
1739	case STR2NR_OCT: {
1740	goto vim_str2nr_oct;
1741	}
1742	case `0`: {
1743	goto vim_str2nr_dec;
1744	}
1745	default: {
1746	assert(false);
1747	}
1748	}
1749	} else if ((what & (STR2NR_HEX\|STR2NR_OCT\|STR2NR_BIN))
1750	&& !STRING_ENDED(ptr + `1`)
1751	&& ptr[`0`] == `'0'` && ptr[`1`] != `'8'` && ptr[`1`] != `'9'`) {
1752	pre = ptr[`1`];
1753	// Detect hexadecimal: 0x or 0X followed by hex digit.
1754	if ((what & STR2NR_HEX)
1755	&& !STRING_ENDED(ptr + `2`)
1756	&& (pre == `'X'` \|\| pre == `'x'`)
1757	&& ascii_isxdigit(ptr[`2`])) {
1758	ptr += `2`;
1759	goto vim_str2nr_hex;
1760	}
1761	// Detect binary: 0b or 0B followed by 0 or 1.
1762	if ((what & STR2NR_BIN)
1763	&& !STRING_ENDED(ptr + `2`)
1764	&& (pre == `'B'` \|\| pre == `'b'`)
1765	&& ascii_isbdigit(ptr[`2`])) {
1766	ptr += `2`;
1767	goto vim_str2nr_bin;
1768	}
1769	// Detect octal number: zero followed by octal digits without '8' or '9'.
1770	pre = `0`;
1771	if (!(what & STR2NR_OCT)
1772	\|\| !(`'0'` <= ptr[`1`] && ptr[`1`] <= `'7'`)) {
1773	goto vim_str2nr_dec;
1774	}
1775	for (int i = `2`; !STRING_ENDED(ptr + i) && ascii_isdigit(ptr[i]); i++) {
1776	if (ptr[i] > `'7'`) {
1777	goto vim_str2nr_dec;
1778	}
1779	}
1780	pre = `'0'`;
1781	goto vim_str2nr_oct;
1782	} else {
1783	goto vim_str2nr_dec;
1784	}
1785
1786	// Do the string-to-numeric conversion "manually" to avoid sscanf quirks.
1787	assert(false); // Should’ve used goto earlier.
1788	#define PARSE_NUMBER(base, cond, conv) \
1789	do { \
1790	while (!STRING_ENDED(ptr) && (cond)) { \
1791	const uvarnumber_T digit = (uvarnumber_T)(conv); \
1792	/* avoid ubsan error for overflow */ \
1793	if (un < UVARNUMBER_MAX / base \
1794	\|\| (un == UVARNUMBER_MAX / base \
1795	&& (base != 10 \|\| digit <= UVARNUMBER_MAX % 10))) { \
1796	un = base * un + digit; \
1797	} else { \
1798	un = UVARNUMBER_MAX; \
1799	} \
1800	ptr++; \
1801	} \
1802	} while (0)
1803	vim_str2nr_bin:
1804	PARSE_NUMBER(`2`, (ptr == `'0'` \|\| ptr == `'1'`), (*ptr - `'0'`));
1805	goto vim_str2nr_proceed;
1806	vim_str2nr_oct:
1807	PARSE_NUMBER(`8`, (`'0'` <= ptr && ptr <= `'7'`), (*ptr - `'0'`));
1808	goto vim_str2nr_proceed;
1809	vim_str2nr_dec:
1810	PARSE_NUMBER(`10`, (ascii_isdigit(ptr)), (ptr - `'0'`));
1811	goto vim_str2nr_proceed;
1812	vim_str2nr_hex:
1813	PARSE_NUMBER(`16`, (ascii_isxdigit(ptr)), (hex2nr(ptr)));
1814	goto vim_str2nr_proceed;
1815	#undef PARSE_NUMBER
1816
1817	vim_str2nr_proceed:
1818	if (prep != NULL) {
1819	*prep = pre;
1820	}
1821
1822	if (len != NULL) {
1823	len = (int)(ptr - (const* char *)start);
1824	}
1825
1826	if (nptr != NULL) {
1827	if (negative) { // account for leading '-' for decimal numbers
1828	// avoid ubsan error for overflow
1829	if (un > VARNUMBER_MAX) {
1830	*nptr = VARNUMBER_MIN;
1831	} else {
1832	*nptr = -(varnumber_T)un;
1833	}
1834	} else {
1835	if (un > VARNUMBER_MAX) {
1836	un = VARNUMBER_MAX;
1837	}
1838	*nptr = (varnumber_T)un;
1839	}
1840	}
1841
1842	if (unptr != NULL) {
1843	*unptr = un;
1844	}
1845	#undef STRING_ENDED
1846	}
1847
1848	/// Return the value of a single hex character.
1849	/// Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
1850	///
1851	/// @param c
1852	///
1853	/// @return The value of the hex character.
1854	int hex2nr(int c)
1855	{
1856	if ((c >= `'a'`) && (c <= `'f'`)) {
1857	return c - `'a'` + `10`;
1858	}
1859
1860	if ((c >= `'A'`) && (c <= `'F'`)) {
1861	return c - `'A'` + `10`;
1862	}
1863	return c - `'0'`;
1864	}
1865
1866	/// Check that "str" starts with a backslash that should be removed.
1867	/// For Windows this is only done when the character after the
1868	/// backslash is not a normal file name character.
1869	/// '$' is a valid file name character, we don't remove the backslash before
1870	/// it. This means it is not possible to use an environment variable after a
1871	/// backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
1872	/// Although "\ name" is valid, the backslash in "Program\ files" must be
1873	/// removed. Assume a file name doesn't start with a space.
1874	/// For multi-byte names, never remove a backslash before a non-ascii
1875	/// character, assume that all multi-byte characters are valid file name
1876	/// characters.
1877	///
1878	/// @param str file path string to check
1879	bool rem_backslash(const char_u *str)
1880	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
1881	{
1882	#ifdef BACKSLASH_IN_FILENAME
1883	return str[`0`] == `'\\'`
1884	&& str[`1`] < `0x80`
1885	&& (str[`1`] == `' '`
1886	\|\| (str[`1`] != NUL
1887	&& str[`1`] != `'*'`
1888	&& str[`1`] != `'?'`
1889	&& !vim_isfilec(str[`1`])));
1890
1891	#else // ifdef BACKSLASH_IN_FILENAME
1892	return str[`0`] == `'\\'` && str[`1`] != NUL;
1893	#endif // ifdef BACKSLASH_IN_FILENAME
1894	}
1895
1896	/// Halve the number of backslashes in a file name argument.
1897	///
1898	/// @param p
1899	void backslash_halve(char_u *p)
1900	{
1901	for (; *p; ++p) {
1902	if (rem_backslash(p)) {
1903	STRMOVE(p, p + `1`);
1904	}
1905	}
1906	}
1907
1908	/// backslash_halve() plus save the result in allocated memory.
1909	///
1910	/// @param p
1911	///
1912	/// @return String with the number of backslashes halved.
1913	char_u backslash_halve_save(const* char_u *p)
1914	FUNC_ATTR_NONNULL_ALL FUNC_ATTR_NONNULL_RET
1915	{
1916	// TODO(philix): simplify and improve backslash_halve_save algorithm
1917	char_u *res = vim_strsave(p);
1918	backslash_halve(res);
1919	return res;
1920	}
1921

Browse the source code of neovim/src/nvim/charset.c