1 | // This is an open source non-commercial project. Dear PVS-Studio, please check |
2 | // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com |
3 | |
4 | /// @file charset.c |
5 | /// |
6 | /// Code related to character sets. |
7 | |
8 | #include <assert.h> |
9 | #include <string.h> |
10 | #include <wctype.h> |
11 | #include <wchar.h> // for towupper() and towlower() |
12 | #include <inttypes.h> |
13 | |
14 | #include "nvim/vim.h" |
15 | #include "nvim/ascii.h" |
16 | #include "nvim/charset.h" |
17 | #include "nvim/func_attr.h" |
18 | #include "nvim/indent.h" |
19 | #include "nvim/main.h" |
20 | #include "nvim/mark.h" |
21 | #include "nvim/mbyte.h" |
22 | #include "nvim/memline.h" |
23 | #include "nvim/memory.h" |
24 | #include "nvim/misc1.h" |
25 | #include "nvim/garray.h" |
26 | #include "nvim/move.h" |
27 | #include "nvim/option.h" |
28 | #include "nvim/os_unix.h" |
29 | #include "nvim/state.h" |
30 | #include "nvim/strings.h" |
31 | #include "nvim/path.h" |
32 | #include "nvim/cursor.h" |
33 | |
34 | #ifdef INCLUDE_GENERATED_DECLARATIONS |
35 | # include "charset.c.generated.h" |
36 | #endif |
37 | |
38 | |
39 | static bool chartab_initialized = false; |
40 | |
41 | // b_chartab[] is an array with 256 bits, each bit representing one of the |
42 | // characters 0-255. |
43 | #define SET_CHARTAB(buf, c) \ |
44 | (buf)->b_chartab[(unsigned)(c) >> 6] |= (1ull << ((c) & 0x3f)) |
45 | #define RESET_CHARTAB(buf, c) \ |
46 | (buf)->b_chartab[(unsigned)(c) >> 6] &= ~(1ull << ((c) & 0x3f)) |
47 | #define GET_CHARTAB_TAB(chartab, c) \ |
48 | ((chartab)[(unsigned)(c) >> 6] & (1ull << ((c) & 0x3f))) |
49 | #define GET_CHARTAB(buf, c) \ |
50 | GET_CHARTAB_TAB((buf)->b_chartab, c) |
51 | |
52 | // Table used below, see init_chartab() for an explanation |
53 | static char_u g_chartab[256]; |
54 | |
55 | // Flags for g_chartab[]. |
56 | #define CT_CELL_MASK 0x07 ///< mask: nr of display cells (1, 2 or 4) |
57 | #define CT_PRINT_CHAR 0x10 ///< flag: set for printable chars |
58 | #define CT_ID_CHAR 0x20 ///< flag: set for ID chars |
59 | #define CT_FNAME_CHAR 0x40 ///< flag: set for file name chars |
60 | |
61 | /// Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword |
62 | /// characters for current buffer. |
63 | /// |
64 | /// Depends on the option settings 'iskeyword', 'isident', 'isfname', |
65 | /// 'isprint' and 'encoding'. |
66 | /// |
67 | /// The index in g_chartab[] is the character when first byte is up to 0x80, |
68 | /// if the first byte is 0x80 and above it depends on further bytes. |
69 | /// |
70 | /// The contents of g_chartab[]: |
71 | /// - The lower two bits, masked by CT_CELL_MASK, give the number of display |
72 | /// cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80. |
73 | /// - CT_PRINT_CHAR bit is set when the character is printable (no need to |
74 | /// translate the character before displaying it). Note that only DBCS |
75 | /// characters can have 2 display cells and still be printable. |
76 | /// - CT_FNAME_CHAR bit is set when the character can be in a file name. |
77 | /// - CT_ID_CHAR bit is set when the character can be in an identifier. |
78 | /// |
79 | /// @return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has |
80 | /// an error, OK otherwise. |
81 | int init_chartab(void) |
82 | { |
83 | return buf_init_chartab(curbuf, true); |
84 | } |
85 | |
86 | /// Helper for init_chartab |
87 | /// |
88 | /// @param global false: only set buf->b_chartab[] |
89 | /// |
90 | /// @return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has |
91 | /// an error, OK otherwise. |
92 | int buf_init_chartab(buf_T *buf, int global) |
93 | { |
94 | int c; |
95 | int c2; |
96 | int i; |
97 | bool tilde; |
98 | bool do_isalpha; |
99 | |
100 | if (global) { |
101 | // Set the default size for printable characters: |
102 | // From <Space> to '~' is 1 (printable), others are 2 (not printable). |
103 | // This also inits all 'isident' and 'isfname' flags to false. |
104 | c = 0; |
105 | |
106 | while (c < ' ') { |
107 | g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2; |
108 | } |
109 | |
110 | while (c <= '~') { |
111 | g_chartab[c++] = 1 + CT_PRINT_CHAR; |
112 | } |
113 | |
114 | while (c < 256) { |
115 | if (c >= 0xa0) { |
116 | // UTF-8: bytes 0xa0 - 0xff are printable (latin1) |
117 | g_chartab[c++] = CT_PRINT_CHAR + 1; |
118 | } else { |
119 | // the rest is unprintable by default |
120 | g_chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2; |
121 | } |
122 | } |
123 | |
124 | // Assume that every multi-byte char is a filename character. |
125 | for (c = 1; c < 256; c++) { |
126 | if (c >= 0xa0) { |
127 | g_chartab[c] |= CT_FNAME_CHAR; |
128 | } |
129 | } |
130 | } |
131 | |
132 | // Init word char flags all to false |
133 | memset(buf->b_chartab, 0, (size_t)32); |
134 | |
135 | // In lisp mode the '-' character is included in keywords. |
136 | if (buf->b_p_lisp) { |
137 | SET_CHARTAB(buf, '-'); |
138 | } |
139 | |
140 | // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint' |
141 | // options Each option is a list of characters, character numbers or |
142 | // ranges, separated by commas, e.g.: "200-210,x,#-178,-" |
143 | for (i = global ? 0 : 3; i <= 3; i++) { |
144 | const char_u *p; |
145 | if (i == 0) { |
146 | // first round: 'isident' |
147 | p = p_isi; |
148 | } else if (i == 1) { |
149 | // second round: 'isprint' |
150 | p = p_isp; |
151 | } else if (i == 2) { |
152 | // third round: 'isfname' |
153 | p = p_isf; |
154 | } else { // i == 3 |
155 | // fourth round: 'iskeyword' |
156 | p = buf->b_p_isk; |
157 | } |
158 | |
159 | while (*p) { |
160 | tilde = false; |
161 | do_isalpha = false; |
162 | |
163 | if ((*p == '^') && (p[1] != NUL)) { |
164 | tilde = true; |
165 | ++p; |
166 | } |
167 | |
168 | if (ascii_isdigit(*p)) { |
169 | c = getdigits_int((char_u **)&p, true, 0); |
170 | } else { |
171 | c = mb_ptr2char_adv(&p); |
172 | } |
173 | c2 = -1; |
174 | |
175 | if ((*p == '-') && (p[1] != NUL)) { |
176 | ++p; |
177 | |
178 | if (ascii_isdigit(*p)) { |
179 | c2 = getdigits_int((char_u **)&p, true, 0); |
180 | } else { |
181 | c2 = mb_ptr2char_adv(&p); |
182 | } |
183 | } |
184 | |
185 | if ((c <= 0) |
186 | || (c >= 256) |
187 | || ((c2 < c) && (c2 != -1)) |
188 | || (c2 >= 256) |
189 | || !((*p == NUL) || (*p == ','))) { |
190 | return FAIL; |
191 | } |
192 | |
193 | if (c2 == -1) { // not a range |
194 | // A single '@' (not "@-@"): |
195 | // Decide on letters being ID/printable/keyword chars with |
196 | // standard function isalpha(). This takes care of locale for |
197 | // single-byte characters). |
198 | if (c == '@') { |
199 | do_isalpha = true; |
200 | c = 1; |
201 | c2 = 255; |
202 | } else { |
203 | c2 = c; |
204 | } |
205 | } |
206 | |
207 | while (c <= c2) { |
208 | // Use the MB_ functions here, because isalpha() doesn't |
209 | // work properly when 'encoding' is "latin1" and the locale is |
210 | // "C". |
211 | if (!do_isalpha |
212 | || mb_islower(c) |
213 | || mb_isupper(c)) { |
214 | if (i == 0) { |
215 | // (re)set ID flag |
216 | if (tilde) { |
217 | g_chartab[c] &= (uint8_t)~CT_ID_CHAR; |
218 | } else { |
219 | g_chartab[c] |= CT_ID_CHAR; |
220 | } |
221 | } else if (i == 1) { |
222 | // (re)set printable |
223 | // For double-byte we keep the cell width, so |
224 | // that we can detect it from the first byte. |
225 | if (((c < ' ') || (c > '~'))) { |
226 | if (tilde) { |
227 | g_chartab[c] = (uint8_t)((g_chartab[c] & ~CT_CELL_MASK) |
228 | + ((dy_flags & DY_UHEX) ? 4 : 2)); |
229 | g_chartab[c] &= (uint8_t)~CT_PRINT_CHAR; |
230 | } else { |
231 | g_chartab[c] = (uint8_t)((g_chartab[c] & ~CT_CELL_MASK) + 1); |
232 | g_chartab[c] |= CT_PRINT_CHAR; |
233 | } |
234 | } |
235 | } else if (i == 2) { |
236 | // (re)set fname flag |
237 | if (tilde) { |
238 | g_chartab[c] &= (uint8_t)~CT_FNAME_CHAR; |
239 | } else { |
240 | g_chartab[c] |= CT_FNAME_CHAR; |
241 | } |
242 | } else { // i == 3 |
243 | // (re)set keyword flag |
244 | if (tilde) { |
245 | RESET_CHARTAB(buf, c); |
246 | } else { |
247 | SET_CHARTAB(buf, c); |
248 | } |
249 | } |
250 | } |
251 | ++c; |
252 | } |
253 | |
254 | c = *p; |
255 | p = skip_to_option_part(p); |
256 | |
257 | if ((c == ',') && (*p == NUL)) { |
258 | // Trailing comma is not allowed. |
259 | return FAIL; |
260 | } |
261 | } |
262 | } |
263 | chartab_initialized = true; |
264 | return OK; |
265 | } |
266 | |
267 | /// Translate any special characters in buf[bufsize] in-place. |
268 | /// |
269 | /// The result is a string with only printable characters, but if there is not |
270 | /// enough room, not all characters will be translated. |
271 | /// |
272 | /// @param buf |
273 | /// @param bufsize |
274 | void trans_characters(char_u *buf, int bufsize) |
275 | { |
276 | int len; // length of string needing translation |
277 | int room; // room in buffer after string |
278 | char_u *trs; // translated character |
279 | int trs_len; // length of trs[] |
280 | |
281 | len = (int)STRLEN(buf); |
282 | room = bufsize - len; |
283 | |
284 | while (*buf != 0) { |
285 | // Assume a multi-byte character doesn't need translation. |
286 | if ((trs_len = (*mb_ptr2len)(buf)) > 1) { |
287 | len -= trs_len; |
288 | } else { |
289 | trs = transchar_byte(*buf); |
290 | trs_len = (int)STRLEN(trs); |
291 | |
292 | if (trs_len > 1) { |
293 | room -= trs_len - 1; |
294 | if (room <= 0) { |
295 | return; |
296 | } |
297 | memmove(buf + trs_len, buf + 1, (size_t)len); |
298 | } |
299 | memmove(buf, trs, (size_t)trs_len); |
300 | --len; |
301 | } |
302 | buf += trs_len; |
303 | } |
304 | } |
305 | |
306 | /// Find length of a string capable of holding s with all specials replaced |
307 | /// |
308 | /// Assumes replacing special characters with printable ones just like |
309 | /// strtrans() does. |
310 | /// |
311 | /// @param[in] s String to check. |
312 | /// |
313 | /// @return number of bytes needed to hold a translation of `s`, NUL byte not |
314 | /// included. |
315 | size_t transstr_len(const char *const s) |
316 | FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE |
317 | { |
318 | const char *p = s; |
319 | size_t len = 0; |
320 | |
321 | while (*p) { |
322 | const size_t l = (size_t)utfc_ptr2len((const char_u *)p); |
323 | if (l > 1) { |
324 | int pcc[MAX_MCO + 1]; |
325 | pcc[0] = utfc_ptr2char((const char_u *)p, &pcc[1]); |
326 | |
327 | if (vim_isprintc(pcc[0])) { |
328 | len += l; |
329 | } else { |
330 | for (size_t i = 0; i < ARRAY_SIZE(pcc) && pcc[i]; i++) { |
331 | char hexbuf[9]; |
332 | len += transchar_hex(hexbuf, pcc[i]); |
333 | } |
334 | } |
335 | p += l; |
336 | } else { |
337 | const int b2c_l = byte2cells((uint8_t)(*p++)); |
338 | // Illegal byte sequence may occupy up to 4 characters. |
339 | len += (size_t)(b2c_l > 0 ? b2c_l : 4); |
340 | } |
341 | } |
342 | return len; |
343 | } |
344 | |
345 | /// Replace special characters with printable ones |
346 | /// |
347 | /// @param[in] s String to replace characters from. |
348 | /// @param[out] buf Buffer to which result should be saved. |
349 | /// @param[in] len Buffer length. Resulting string may not occupy more then |
350 | /// len - 1 bytes (one for trailing NUL byte). |
351 | /// |
352 | /// @return length of the resulting string, without the NUL byte. |
353 | size_t transstr_buf(const char *const s, char *const buf, const size_t len) |
354 | FUNC_ATTR_NONNULL_ALL |
355 | { |
356 | const char *p = s; |
357 | char *buf_p = buf; |
358 | char *const buf_e = buf_p + len - 1; |
359 | |
360 | while (*p != NUL && buf_p < buf_e) { |
361 | const size_t l = (size_t)utfc_ptr2len((const char_u *)p); |
362 | if (l > 1) { |
363 | if (buf_p + l > buf_e) { |
364 | break; // Exceeded `buf` size. |
365 | } |
366 | int pcc[MAX_MCO + 1]; |
367 | pcc[0] = utfc_ptr2char((const char_u *)p, &pcc[1]); |
368 | |
369 | if (vim_isprintc(pcc[0])) { |
370 | memmove(buf_p, p, l); |
371 | buf_p += l; |
372 | } else { |
373 | for (size_t i = 0; i < ARRAY_SIZE(pcc) && pcc[i]; i++) { |
374 | char hexbuf[9]; // <up to 6 bytes>NUL |
375 | const size_t hexlen = transchar_hex(hexbuf, pcc[i]); |
376 | if (buf_p + hexlen > buf_e) { |
377 | break; |
378 | } |
379 | memmove(buf_p, hexbuf, hexlen); |
380 | buf_p += hexlen; |
381 | } |
382 | } |
383 | p += l; |
384 | } else { |
385 | const char *const tb = (const char *)transchar_byte((uint8_t)(*p++)); |
386 | const size_t tb_len = strlen(tb); |
387 | if (buf_p + tb_len > buf_e) { |
388 | break; // Exceeded `buf` size. |
389 | } |
390 | memmove(buf_p, tb, tb_len); |
391 | buf_p += tb_len; |
392 | } |
393 | } |
394 | *buf_p = NUL; |
395 | assert(buf_p <= buf_e); |
396 | return (size_t)(buf_p - buf); |
397 | } |
398 | |
399 | /// Copy string and replace special characters with printable characters |
400 | /// |
401 | /// Works like `strtrans()` does, used for that and in some other places. |
402 | /// |
403 | /// @param[in] s String to replace characters from. |
404 | /// |
405 | /// @return [allocated] translated string |
406 | char *transstr(const char *const s) |
407 | FUNC_ATTR_NONNULL_RET |
408 | { |
409 | // Compute the length of the result, taking account of unprintable |
410 | // multi-byte characters. |
411 | const size_t len = transstr_len((const char *)s) + 1; |
412 | char *const buf = xmalloc(len); |
413 | transstr_buf(s, buf, len); |
414 | return buf; |
415 | } |
416 | |
417 | /// Convert the string "str[orglen]" to do ignore-case comparing. |
418 | /// Use the current locale. |
419 | /// |
420 | /// When "buf" is NULL, return an allocated string. |
421 | /// Otherwise, put the result in buf, limited by buflen, and return buf. |
422 | char_u* str_foldcase(char_u *str, int orglen, char_u *buf, int buflen) |
423 | FUNC_ATTR_NONNULL_RET |
424 | { |
425 | garray_T ga; |
426 | int i; |
427 | int len = orglen; |
428 | |
429 | #define GA_CHAR(i) ((char_u *)ga.ga_data)[i] |
430 | #define GA_PTR(i) ((char_u *)ga.ga_data + i) |
431 | #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i]) |
432 | #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i) |
433 | |
434 | // Copy "str" into "buf" or allocated memory, unmodified. |
435 | if (buf == NULL) { |
436 | ga_init(&ga, 1, 10); |
437 | |
438 | ga_grow(&ga, len + 1); |
439 | memmove(ga.ga_data, str, (size_t)len); |
440 | ga.ga_len = len; |
441 | } else { |
442 | if (len >= buflen) { |
443 | // Ugly! |
444 | len = buflen - 1; |
445 | } |
446 | memmove(buf, str, (size_t)len); |
447 | } |
448 | |
449 | if (buf == NULL) { |
450 | GA_CHAR(len) = NUL; |
451 | } else { |
452 | buf[len] = NUL; |
453 | } |
454 | |
455 | // Make each character lower case. |
456 | i = 0; |
457 | while (STR_CHAR(i) != NUL) { |
458 | int c = utf_ptr2char(STR_PTR(i)); |
459 | int olen = utf_ptr2len(STR_PTR(i)); |
460 | int lc = mb_tolower(c); |
461 | |
462 | // Only replace the character when it is not an invalid |
463 | // sequence (ASCII character or more than one byte) and |
464 | // mb_tolower() doesn't return the original character. |
465 | if (((c < 0x80) || (olen > 1)) && (c != lc)) { |
466 | int nlen = utf_char2len(lc); |
467 | |
468 | // If the byte length changes need to shift the following |
469 | // characters forward or backward. |
470 | if (olen != nlen) { |
471 | if (nlen > olen) { |
472 | if (buf == NULL) { |
473 | ga_grow(&ga, nlen - olen + 1); |
474 | } else { |
475 | if (len + nlen - olen >= buflen) { |
476 | // out of memory, keep old char |
477 | lc = c; |
478 | nlen = olen; |
479 | } |
480 | } |
481 | } |
482 | |
483 | if (olen != nlen) { |
484 | if (buf == NULL) { |
485 | STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen); |
486 | ga.ga_len += nlen - olen; |
487 | } else { |
488 | STRMOVE(buf + i + nlen, buf + i + olen); |
489 | len += nlen - olen; |
490 | } |
491 | } |
492 | } |
493 | (void)utf_char2bytes(lc, STR_PTR(i)); |
494 | } |
495 | |
496 | // skip to next multi-byte char |
497 | i += (*mb_ptr2len)(STR_PTR(i)); |
498 | } |
499 | |
500 | |
501 | if (buf == NULL) { |
502 | return (char_u *)ga.ga_data; |
503 | } |
504 | return buf; |
505 | } |
506 | |
507 | // Catch 22: g_chartab[] can't be initialized before the options are |
508 | // initialized, and initializing options may cause transchar() to be called! |
509 | // When chartab_initialized == false don't use g_chartab[]. |
510 | // Does NOT work for multi-byte characters, c must be <= 255. |
511 | // Also doesn't work for the first byte of a multi-byte, "c" must be a |
512 | // character! |
513 | static char_u transchar_buf[11]; |
514 | |
515 | /// Translate a character into a printable one, leaving printable ASCII intact |
516 | /// |
517 | /// All unicode characters are considered non-printable in this function. |
518 | /// |
519 | /// @param[in] c Character to translate. |
520 | /// |
521 | /// @return translated character into a static buffer. |
522 | char_u *transchar(int c) |
523 | { |
524 | int i = 0; |
525 | if (IS_SPECIAL(c)) { |
526 | // special key code, display as ~@ char |
527 | transchar_buf[0] = '~'; |
528 | transchar_buf[1] = '@'; |
529 | i = 2; |
530 | c = K_SECOND(c); |
531 | } |
532 | |
533 | if ((!chartab_initialized && (((c >= ' ') && (c <= '~')))) |
534 | || ((c <= 0xFF) && vim_isprintc_strict(c))) { |
535 | // printable character |
536 | transchar_buf[i] = (char_u)c; |
537 | transchar_buf[i + 1] = NUL; |
538 | } else if (c <= 0xFF) { |
539 | transchar_nonprint(transchar_buf + i, c); |
540 | } else { |
541 | transchar_hex((char *)transchar_buf + i, c); |
542 | } |
543 | return transchar_buf; |
544 | } |
545 | |
546 | /// Like transchar(), but called with a byte instead of a character |
547 | /// |
548 | /// Checks for an illegal UTF-8 byte. |
549 | /// |
550 | /// @param[in] c Byte to translate. |
551 | /// |
552 | /// @return pointer to translated character in transchar_buf. |
553 | char_u *transchar_byte(const int c) |
554 | FUNC_ATTR_WARN_UNUSED_RESULT |
555 | { |
556 | if (c >= 0x80) { |
557 | transchar_nonprint(transchar_buf, c); |
558 | return transchar_buf; |
559 | } |
560 | return transchar(c); |
561 | } |
562 | |
563 | /// Convert non-printable characters to 2..4 printable ones |
564 | /// |
565 | /// @warning Does not work for multi-byte characters, c must be <= 255. |
566 | /// |
567 | /// @param[out] buf Buffer to store result in, must be able to hold at least |
568 | /// 5 bytes (conversion result + NUL). |
569 | /// @param[in] c Character to convert. NUL is assumed to be NL according to |
570 | /// `:h NL-used-for-NUL`. |
571 | void transchar_nonprint(char_u *buf, int c) |
572 | { |
573 | if (c == NL) { |
574 | // we use newline in place of a NUL |
575 | c = NUL; |
576 | } else if ((c == CAR) && (get_fileformat(curbuf) == EOL_MAC)) { |
577 | // we use CR in place of NL in this case |
578 | c = NL; |
579 | } |
580 | assert(c <= 0xff); |
581 | |
582 | if (dy_flags & DY_UHEX || c > 0x7f) { |
583 | // 'display' has "uhex" |
584 | transchar_hex((char *)buf, c); |
585 | } else { |
586 | // 0x00 - 0x1f and 0x7f |
587 | buf[0] = '^'; |
588 | // DEL displayed as ^? |
589 | buf[1] = (char_u)(c ^ 0x40); |
590 | |
591 | buf[2] = NUL; |
592 | } |
593 | } |
594 | |
595 | /// Convert a non-printable character to hex C string like "<FFFF>" |
596 | /// |
597 | /// @param[out] buf Buffer to store result in. |
598 | /// @param[in] c Character to convert. |
599 | /// |
600 | /// @return Number of bytes stored in buffer, excluding trailing NUL byte. |
601 | size_t transchar_hex(char *const buf, const int c) |
602 | FUNC_ATTR_NONNULL_ALL |
603 | { |
604 | size_t i = 0; |
605 | |
606 | buf[i++] = '<'; |
607 | if (c > 255) { |
608 | if (c > 255 * 256) { |
609 | buf[i++] = (char)nr2hex((unsigned)c >> 20); |
610 | buf[i++] = (char)nr2hex((unsigned)c >> 16); |
611 | } |
612 | buf[i++] = (char)nr2hex((unsigned)c >> 12); |
613 | buf[i++] = (char)nr2hex((unsigned)c >> 8); |
614 | } |
615 | buf[i++] = (char)(nr2hex((unsigned)c >> 4)); |
616 | buf[i++] = (char)(nr2hex((unsigned)c)); |
617 | buf[i++] = '>'; |
618 | buf[i] = NUL; |
619 | return i; |
620 | } |
621 | |
622 | /// Convert the lower 4 bits of byte "c" to its hex character |
623 | /// |
624 | /// Lower case letters are used to avoid the confusion of <F1> being 0xf1 or |
625 | /// function key 1. |
626 | /// |
627 | /// @param[in] n Number to convert. |
628 | /// |
629 | /// @return the hex character. |
630 | static inline unsigned nr2hex(unsigned n) |
631 | FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT |
632 | { |
633 | if ((n & 0xf) <= 9) { |
634 | return (n & 0xf) + '0'; |
635 | } |
636 | return (n & 0xf) - 10 + 'a'; |
637 | } |
638 | |
639 | /// Return number of display cells occupied by byte "b". |
640 | /// |
641 | /// Caller must make sure 0 <= b <= 255. |
642 | /// For multi-byte mode "b" must be the first byte of a character. |
643 | /// A TAB is counted as two cells: "^I". |
644 | /// This will return 0 for bytes >= 0x80, because the number of |
645 | /// cells depends on further bytes in UTF-8. |
646 | /// |
647 | /// @param b |
648 | /// |
649 | /// @reeturn Number of display cells. |
650 | int byte2cells(int b) |
651 | { |
652 | if (b >= 0x80) { |
653 | return 0; |
654 | } |
655 | return g_chartab[b] & CT_CELL_MASK; |
656 | } |
657 | |
658 | /// Return number of display cells occupied by character "c". |
659 | /// |
660 | /// "c" can be a special key (negative number) in which case 3 or 4 is returned. |
661 | /// A TAB is counted as two cells: "^I" or four: "<09>". |
662 | /// |
663 | /// @param c |
664 | /// |
665 | /// @return Number of display cells. |
666 | int char2cells(int c) |
667 | { |
668 | if (IS_SPECIAL(c)) { |
669 | return char2cells(K_SECOND(c)) + 2; |
670 | } |
671 | |
672 | if (c >= 0x80) { |
673 | // UTF-8: above 0x80 need to check the value |
674 | return utf_char2cells(c); |
675 | } |
676 | return g_chartab[c & 0xff] & CT_CELL_MASK; |
677 | } |
678 | |
679 | /// Return number of display cells occupied by character at "*p". |
680 | /// A TAB is counted as two cells: "^I" or four: "<09>". |
681 | /// |
682 | /// @param p |
683 | /// |
684 | /// @return number of display cells. |
685 | int ptr2cells(const char_u *p) |
686 | { |
687 | // For UTF-8 we need to look at more bytes if the first byte is >= 0x80. |
688 | if (*p >= 0x80) { |
689 | return utf_ptr2cells(p); |
690 | } |
691 | |
692 | // For DBCS we can tell the cell count from the first byte. |
693 | return g_chartab[*p] & CT_CELL_MASK; |
694 | } |
695 | |
696 | /// Return the number of character cells string "s" will take on the screen, |
697 | /// counting TABs as two characters: "^I". |
698 | /// |
699 | /// 's' must be non-null. |
700 | /// |
701 | /// @param s |
702 | /// |
703 | /// @return number of character cells. |
704 | int vim_strsize(char_u *s) |
705 | { |
706 | return vim_strnsize(s, (int)MAXCOL); |
707 | } |
708 | |
709 | /// Return the number of character cells string "s[len]" will take on the |
710 | /// screen, counting TABs as two characters: "^I". |
711 | /// |
712 | /// 's' must be non-null. |
713 | /// |
714 | /// @param s |
715 | /// @param len |
716 | /// |
717 | /// @return Number of character cells. |
718 | int vim_strnsize(char_u *s, int len) |
719 | { |
720 | assert(s != NULL); |
721 | int size = 0; |
722 | while (*s != NUL && --len >= 0) { |
723 | int l = (*mb_ptr2len)(s); |
724 | size += ptr2cells(s); |
725 | s += l; |
726 | len -= l - 1; |
727 | } |
728 | return size; |
729 | } |
730 | |
731 | /// Return the number of characters 'c' will take on the screen, taking |
732 | /// into account the size of a tab. |
733 | /// Use a define to make it fast, this is used very often!!! |
734 | /// Also see getvcol() below. |
735 | /// |
736 | /// @param p |
737 | /// @param col |
738 | /// |
739 | /// @return Number of characters. |
740 | #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \ |
741 | if (*(p) == TAB && (!(wp)->w_p_list || wp->w_p_lcs_chars.tab1)) { \ |
742 | const int ts = (int)(buf)->b_p_ts; \ |
743 | return (ts - (int)(col % ts)); \ |
744 | } else { \ |
745 | return ptr2cells(p); \ |
746 | } |
747 | |
748 | int chartabsize(char_u *p, colnr_T col) |
749 | { |
750 | RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col) |
751 | } |
752 | |
753 | static int win_chartabsize(win_T *wp, char_u *p, colnr_T col) |
754 | { |
755 | RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col) |
756 | } |
757 | |
758 | /// Return the number of characters the string 's' will take on the screen, |
759 | /// taking into account the size of a tab. |
760 | /// |
761 | /// @param s |
762 | /// |
763 | /// @return Number of characters the string will take on the screen. |
764 | int linetabsize(char_u *s) |
765 | { |
766 | return linetabsize_col(0, s); |
767 | } |
768 | |
769 | /// Like linetabsize(), but starting at column "startcol". |
770 | /// |
771 | /// @param startcol |
772 | /// @param s |
773 | /// |
774 | /// @return Number of characters the string will take on the screen. |
775 | int linetabsize_col(int startcol, char_u *s) |
776 | { |
777 | colnr_T col = startcol; |
778 | char_u *line = s; /* pointer to start of line, for breakindent */ |
779 | |
780 | while (*s != NUL) { |
781 | col += lbr_chartabsize_adv(line, &s, col); |
782 | } |
783 | return (int)col; |
784 | } |
785 | |
786 | /// Like linetabsize(), but for a given window instead of the current one. |
787 | /// |
788 | /// @param wp |
789 | /// @param line |
790 | /// @param len |
791 | /// |
792 | /// @return Number of characters the string will take on the screen. |
793 | unsigned int win_linetabsize(win_T *wp, char_u *line, colnr_T len) |
794 | { |
795 | colnr_T col = 0; |
796 | |
797 | for (char_u *s = line; |
798 | *s != NUL && (len == MAXCOL || s < line + len); |
799 | MB_PTR_ADV(s)) { |
800 | col += win_lbr_chartabsize(wp, line, s, col, NULL); |
801 | } |
802 | |
803 | return (unsigned int)col; |
804 | } |
805 | |
806 | /// Check that "c" is a normal identifier character: |
807 | /// Letters and characters from the 'isident' option. |
808 | /// |
809 | /// @param c character to check |
810 | bool vim_isIDc(int c) |
811 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT |
812 | { |
813 | return c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR); |
814 | } |
815 | |
816 | /// Check that "c" is a keyword character: |
817 | /// Letters and characters from 'iskeyword' option for the current buffer. |
818 | /// For multi-byte characters mb_get_class() is used (builtin rules). |
819 | /// |
820 | /// @param c character to check |
821 | bool vim_iswordc(const int c) |
822 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT |
823 | { |
824 | return vim_iswordc_buf(c, curbuf); |
825 | } |
826 | |
827 | /// Check that "c" is a keyword character |
828 | /// Letters and characters from 'iskeyword' option for given buffer. |
829 | /// For multi-byte characters mb_get_class() is used (builtin rules). |
830 | /// |
831 | /// @param[in] c Character to check. |
832 | /// @param[in] chartab Buffer chartab. |
833 | bool vim_iswordc_tab(const int c, const uint64_t *const chartab) |
834 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL |
835 | { |
836 | return (c >= 0x100 |
837 | ? (utf_class_tab(c, chartab) >= 2) |
838 | : (c > 0 && GET_CHARTAB_TAB(chartab, c) != 0)); |
839 | } |
840 | |
841 | /// Check that "c" is a keyword character: |
842 | /// Letters and characters from 'iskeyword' option for given buffer. |
843 | /// For multi-byte characters mb_get_class() is used (builtin rules). |
844 | /// |
845 | /// @param c character to check |
846 | /// @param buf buffer whose keywords to use |
847 | bool vim_iswordc_buf(const int c, buf_T *const buf) |
848 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ARG(2) |
849 | { |
850 | return vim_iswordc_tab(c, buf->b_chartab); |
851 | } |
852 | |
853 | /// Just like vim_iswordc() but uses a pointer to the (multi-byte) character. |
854 | /// |
855 | /// @param p pointer to the multi-byte character |
856 | /// |
857 | /// @return true if "p" points to a keyword character. |
858 | bool vim_iswordp(const char_u *const p) |
859 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL |
860 | { |
861 | return vim_iswordp_buf(p, curbuf); |
862 | } |
863 | |
864 | /// Just like vim_iswordc_buf() but uses a pointer to the (multi-byte) |
865 | /// character. |
866 | /// |
867 | /// @param p pointer to the multi-byte character |
868 | /// @param buf buffer whose keywords to use |
869 | /// |
870 | /// @return true if "p" points to a keyword character. |
871 | bool vim_iswordp_buf(const char_u *const p, buf_T *const buf) |
872 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL |
873 | { |
874 | int c = *p; |
875 | |
876 | if (MB_BYTE2LEN(c) > 1) { |
877 | c = utf_ptr2char(p); |
878 | } |
879 | return vim_iswordc_buf(c, buf); |
880 | } |
881 | |
882 | /// Check that "c" is a valid file-name character. |
883 | /// Assume characters above 0x100 are valid (multi-byte). |
884 | /// |
885 | /// @param c character to check |
886 | bool vim_isfilec(int c) |
887 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT |
888 | { |
889 | return c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)); |
890 | } |
891 | |
892 | /// Check that "c" is a valid file-name character or a wildcard character |
893 | /// Assume characters above 0x100 are valid (multi-byte). |
894 | /// Explicitly interpret ']' as a wildcard character as path_has_wildcard("]") |
895 | /// returns false. |
896 | /// |
897 | /// @param c character to check |
898 | bool vim_isfilec_or_wc(int c) |
899 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT |
900 | { |
901 | char_u buf[2]; |
902 | buf[0] = (char_u)c; |
903 | buf[1] = NUL; |
904 | return vim_isfilec(c) || c == ']' || path_has_wildcard(buf); |
905 | } |
906 | |
907 | /// Check that "c" is a printable character. |
908 | /// Assume characters above 0x100 are printable for double-byte encodings. |
909 | /// |
910 | /// @param c character to check |
911 | bool vim_isprintc(int c) |
912 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT |
913 | { |
914 | if (c >= 0x100) { |
915 | return utf_printable(c); |
916 | } |
917 | return c > 0 && (g_chartab[c] & CT_PRINT_CHAR); |
918 | } |
919 | |
920 | /// Strict version of vim_isprintc(c), don't return true if "c" is the head |
921 | /// byte of a double-byte character. |
922 | /// |
923 | /// @param c character to check |
924 | /// |
925 | /// @return true if "c" is a printable character. |
926 | bool vim_isprintc_strict(int c) |
927 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT |
928 | { |
929 | if (c >= 0x100) { |
930 | return utf_printable(c); |
931 | } |
932 | return c > 0 && (g_chartab[c] & CT_PRINT_CHAR); |
933 | } |
934 | |
935 | /// like chartabsize(), but also check for line breaks on the screen |
936 | /// |
937 | /// @param line |
938 | /// @param s |
939 | /// @param col |
940 | /// |
941 | /// @return The number of characters taken up on the screen. |
942 | int lbr_chartabsize(char_u *line, unsigned char *s, colnr_T col) |
943 | { |
944 | if (!curwin->w_p_lbr && (*p_sbr == NUL) && !curwin->w_p_bri) { |
945 | if (curwin->w_p_wrap) { |
946 | return win_nolbr_chartabsize(curwin, s, col, NULL); |
947 | } |
948 | RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col) |
949 | } |
950 | return win_lbr_chartabsize(curwin, line == NULL ? s: line, s, col, NULL); |
951 | } |
952 | |
953 | /// Call lbr_chartabsize() and advance the pointer. |
954 | /// |
955 | /// @param line |
956 | /// @param s |
957 | /// @param col |
958 | /// |
959 | /// @return The number of characters take up on the screen. |
960 | int lbr_chartabsize_adv(char_u *line, char_u **s, colnr_T col) |
961 | { |
962 | int retval; |
963 | |
964 | retval = lbr_chartabsize(line, *s, col); |
965 | MB_PTR_ADV(*s); |
966 | return retval; |
967 | } |
968 | |
969 | /// This function is used very often, keep it fast!!!! |
970 | /// |
971 | /// If "headp" not NULL, set *headp to the size of what we for 'showbreak' |
972 | /// string at start of line. Warning: *headp is only set if it's a non-zero |
973 | /// value, init to 0 before calling. |
974 | /// |
975 | /// @param wp |
976 | /// @param line |
977 | /// @param s |
978 | /// @param col |
979 | /// @param headp |
980 | /// |
981 | /// @return The number of characters taken up on the screen. |
982 | int win_lbr_chartabsize(win_T *wp, char_u *line, char_u *s, colnr_T col, int *headp) |
983 | { |
984 | colnr_T col2; |
985 | colnr_T col_adj = 0; /* col + screen size of tab */ |
986 | colnr_T colmax; |
987 | int added; |
988 | int mb_added = 0; |
989 | int ; |
990 | char_u *ps; |
991 | int n; |
992 | |
993 | // No 'linebreak', 'showbreak' and 'breakindent': return quickly. |
994 | if (!wp->w_p_lbr && !wp->w_p_bri && (*p_sbr == NUL)) { |
995 | if (wp->w_p_wrap) { |
996 | return win_nolbr_chartabsize(wp, s, col, headp); |
997 | } |
998 | RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col) |
999 | } |
1000 | |
1001 | // First get normal size, without 'linebreak' |
1002 | int size = win_chartabsize(wp, s, col); |
1003 | int c = *s; |
1004 | if (*s == TAB) { |
1005 | col_adj = size - 1; |
1006 | } |
1007 | |
1008 | // If 'linebreak' set check at a blank before a non-blank if the line |
1009 | // needs a break here |
1010 | if (wp->w_p_lbr |
1011 | && vim_isbreak(c) |
1012 | && !vim_isbreak((int)s[1]) |
1013 | && wp->w_p_wrap |
1014 | && (wp->w_width_inner != 0)) { |
1015 | // Count all characters from first non-blank after a blank up to next |
1016 | // non-blank after a blank. |
1017 | numberextra = win_col_off(wp); |
1018 | col2 = col; |
1019 | colmax = (colnr_T)(wp->w_width_inner - numberextra - col_adj); |
1020 | |
1021 | if (col >= colmax) { |
1022 | colmax += col_adj; |
1023 | n = colmax + win_col_off2(wp); |
1024 | |
1025 | if (n > 0) { |
1026 | colmax += (((col - colmax) / n) + 1) * n - col_adj; |
1027 | } |
1028 | } |
1029 | |
1030 | for (;;) { |
1031 | ps = s; |
1032 | MB_PTR_ADV(s); |
1033 | c = *s; |
1034 | |
1035 | if (!(c != NUL |
1036 | && (vim_isbreak(c) || col2 == col || !vim_isbreak((int)(*ps))))) { |
1037 | break; |
1038 | } |
1039 | |
1040 | col2 += win_chartabsize(wp, s, col2); |
1041 | |
1042 | if (col2 >= colmax) { /* doesn't fit */ |
1043 | size = colmax - col + col_adj; |
1044 | break; |
1045 | } |
1046 | } |
1047 | } else if ((size == 2) |
1048 | && (MB_BYTE2LEN(*s) > 1) |
1049 | && wp->w_p_wrap |
1050 | && in_win_border(wp, col)) { |
1051 | // Count the ">" in the last column. |
1052 | ++size; |
1053 | mb_added = 1; |
1054 | } |
1055 | |
1056 | // May have to add something for 'breakindent' and/or 'showbreak' |
1057 | // string at start of line. |
1058 | // Set *headp to the size of what we add. |
1059 | added = 0; |
1060 | |
1061 | if ((*p_sbr != NUL || wp->w_p_bri) && wp->w_p_wrap && (col != 0)) { |
1062 | colnr_T sbrlen = 0; |
1063 | int numberwidth = win_col_off(wp); |
1064 | |
1065 | numberextra = numberwidth; |
1066 | col += numberextra + mb_added; |
1067 | |
1068 | if (col >= (colnr_T)wp->w_width_inner) { |
1069 | col -= wp->w_width_inner; |
1070 | numberextra = wp->w_width_inner - (numberextra - win_col_off2(wp)); |
1071 | if (col >= numberextra && numberextra > 0) { |
1072 | col %= numberextra; |
1073 | } |
1074 | if (*p_sbr != NUL) { |
1075 | sbrlen = (colnr_T)MB_CHARLEN(p_sbr); |
1076 | if (col >= sbrlen) { |
1077 | col -= sbrlen; |
1078 | } |
1079 | } |
1080 | if (col >= numberextra && numberextra > 0) { |
1081 | col %= numberextra; |
1082 | } else if (col > 0 && numberextra > 0) { |
1083 | col += numberwidth - win_col_off2(wp); |
1084 | } |
1085 | |
1086 | numberwidth -= win_col_off2(wp); |
1087 | } |
1088 | |
1089 | if (col == 0 || (col + size + sbrlen > (colnr_T)wp->w_width_inner)) { |
1090 | added = 0; |
1091 | |
1092 | if (*p_sbr != NUL) { |
1093 | if (size + sbrlen + numberwidth > (colnr_T)wp->w_width_inner) { |
1094 | // Calculate effective window width. |
1095 | int width = (colnr_T)wp->w_width_inner - sbrlen - numberwidth; |
1096 | int prev_width = col ? ((colnr_T)wp->w_width_inner - (sbrlen + col)) |
1097 | : 0; |
1098 | |
1099 | if (width <= 0) { |
1100 | width = 1; |
1101 | } |
1102 | added += ((size - prev_width) / width) * vim_strsize(p_sbr); |
1103 | if ((size - prev_width) % width) { |
1104 | // Wrapped, add another length of 'sbr'. |
1105 | added += vim_strsize(p_sbr); |
1106 | } |
1107 | } else { |
1108 | added += vim_strsize(p_sbr); |
1109 | } |
1110 | } |
1111 | |
1112 | if (wp->w_p_bri) |
1113 | added += get_breakindent_win(wp, line); |
1114 | |
1115 | size += added; |
1116 | if (col != 0) { |
1117 | added = 0; |
1118 | } |
1119 | } |
1120 | } |
1121 | |
1122 | if (headp != NULL) { |
1123 | *headp = added + mb_added; |
1124 | } |
1125 | return size; |
1126 | } |
1127 | |
1128 | /// Like win_lbr_chartabsize(), except that we know 'linebreak' is off and |
1129 | /// 'wrap' is on. This means we need to check for a double-byte character that |
1130 | /// doesn't fit at the end of the screen line. |
1131 | /// |
1132 | /// @param wp |
1133 | /// @param s |
1134 | /// @param col |
1135 | /// @param headp |
1136 | /// |
1137 | /// @return The number of characters take up on the screen. |
1138 | static int win_nolbr_chartabsize(win_T *wp, char_u *s, colnr_T col, int *headp) |
1139 | { |
1140 | int n; |
1141 | |
1142 | if ((*s == TAB) && (!wp->w_p_list || wp->w_p_lcs_chars.tab1)) { |
1143 | n = (int)wp->w_buffer->b_p_ts; |
1144 | return n - (col % n); |
1145 | } |
1146 | n = ptr2cells(s); |
1147 | |
1148 | // Add one cell for a double-width character in the last column of the |
1149 | // window, displayed with a ">". |
1150 | if ((n == 2) && (MB_BYTE2LEN(*s) > 1) && in_win_border(wp, col)) { |
1151 | if (headp != NULL) { |
1152 | *headp = 1; |
1153 | } |
1154 | return 3; |
1155 | } |
1156 | return n; |
1157 | } |
1158 | |
1159 | /// Check that virtual column "vcol" is in the rightmost column of window "wp". |
1160 | /// |
1161 | /// @param wp window |
1162 | /// @param vcol column number |
1163 | bool in_win_border(win_T *wp, colnr_T vcol) |
1164 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ARG(1) |
1165 | { |
1166 | int width1; // width of first line (after line number) |
1167 | int width2; // width of further lines |
1168 | |
1169 | if (wp->w_width_inner == 0) { |
1170 | // there is no border |
1171 | return false; |
1172 | } |
1173 | width1 = wp->w_width_inner - win_col_off(wp); |
1174 | |
1175 | if ((int)vcol < width1 - 1) { |
1176 | return false; |
1177 | } |
1178 | |
1179 | if ((int)vcol == width1 - 1) { |
1180 | return true; |
1181 | } |
1182 | width2 = width1 + win_col_off2(wp); |
1183 | |
1184 | if (width2 <= 0) { |
1185 | return false; |
1186 | } |
1187 | return (vcol - width1) % width2 == width2 - 1; |
1188 | } |
1189 | |
1190 | /// Get virtual column number of pos. |
1191 | /// start: on the first position of this character (TAB, ctrl) |
1192 | /// cursor: where the cursor is on this character (first char, except for TAB) |
1193 | /// end: on the last position of this character (TAB, ctrl) |
1194 | /// |
1195 | /// This is used very often, keep it fast! |
1196 | /// |
1197 | /// @param wp |
1198 | /// @param pos |
1199 | /// @param start |
1200 | /// @param cursor |
1201 | /// @param end |
1202 | void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, |
1203 | colnr_T *end) |
1204 | { |
1205 | colnr_T vcol; |
1206 | char_u *ptr; // points to current char |
1207 | char_u *posptr; // points to char at pos->col |
1208 | char_u *line; // start of the line |
1209 | int incr; |
1210 | int head; |
1211 | int ts = (int)wp->w_buffer->b_p_ts; |
1212 | int c; |
1213 | |
1214 | vcol = 0; |
1215 | line = ptr = ml_get_buf(wp->w_buffer, pos->lnum, false); |
1216 | |
1217 | if (pos->col == MAXCOL) { |
1218 | // continue until the NUL |
1219 | posptr = NULL; |
1220 | } else { |
1221 | // Special check for an empty line, which can happen on exit, when |
1222 | // ml_get_buf() always returns an empty string. |
1223 | if (*ptr == NUL) { |
1224 | pos->col = 0; |
1225 | } |
1226 | posptr = ptr + pos->col; |
1227 | posptr -= utf_head_off(line, posptr); |
1228 | } |
1229 | |
1230 | // This function is used very often, do some speed optimizations. |
1231 | // When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set |
1232 | // use a simple loop. |
1233 | // Also use this when 'list' is set but tabs take their normal size. |
1234 | if ((!wp->w_p_list || (wp->w_p_lcs_chars.tab1 != NUL)) |
1235 | && !wp->w_p_lbr |
1236 | && (*p_sbr == NUL) |
1237 | && !wp->w_p_bri ) { |
1238 | for (;;) { |
1239 | head = 0; |
1240 | c = *ptr; |
1241 | |
1242 | // make sure we don't go past the end of the line |
1243 | if (c == NUL) { |
1244 | // NUL at end of line only takes one column |
1245 | incr = 1; |
1246 | break; |
1247 | } |
1248 | |
1249 | // A tab gets expanded, depending on the current column |
1250 | if (c == TAB) { |
1251 | incr = ts - (vcol % ts); |
1252 | } else { |
1253 | // For utf-8, if the byte is >= 0x80, need to look at |
1254 | // further bytes to find the cell width. |
1255 | if (c >= 0x80) { |
1256 | incr = utf_ptr2cells(ptr); |
1257 | } else { |
1258 | incr = g_chartab[c] & CT_CELL_MASK; |
1259 | } |
1260 | |
1261 | // If a double-cell char doesn't fit at the end of a line |
1262 | // it wraps to the next line, it's like this char is three |
1263 | // cells wide. |
1264 | if ((incr == 2) |
1265 | && wp->w_p_wrap |
1266 | && (MB_BYTE2LEN(*ptr) > 1) |
1267 | && in_win_border(wp, vcol)) { |
1268 | incr++; |
1269 | head = 1; |
1270 | } |
1271 | } |
1272 | |
1273 | if ((posptr != NULL) && (ptr >= posptr)) { |
1274 | // character at pos->col |
1275 | break; |
1276 | } |
1277 | |
1278 | vcol += incr; |
1279 | MB_PTR_ADV(ptr); |
1280 | } |
1281 | } else { |
1282 | for (;;) { |
1283 | // A tab gets expanded, depending on the current column |
1284 | head = 0; |
1285 | incr = win_lbr_chartabsize(wp, line, ptr, vcol, &head); |
1286 | |
1287 | // make sure we don't go past the end of the line |
1288 | if (*ptr == NUL) { |
1289 | // NUL at end of line only takes one column |
1290 | incr = 1; |
1291 | break; |
1292 | } |
1293 | |
1294 | if ((posptr != NULL) && (ptr >= posptr)) { |
1295 | // character at pos->col |
1296 | break; |
1297 | } |
1298 | |
1299 | vcol += incr; |
1300 | MB_PTR_ADV(ptr); |
1301 | } |
1302 | } |
1303 | |
1304 | if (start != NULL) { |
1305 | *start = vcol + head; |
1306 | } |
1307 | |
1308 | if (end != NULL) { |
1309 | *end = vcol + incr - 1; |
1310 | } |
1311 | |
1312 | if (cursor != NULL) { |
1313 | if ((*ptr == TAB) |
1314 | && (State & NORMAL) |
1315 | && !wp->w_p_list |
1316 | && !virtual_active() |
1317 | && !(VIsual_active && ((*p_sel == 'e') || ltoreq(*pos, VIsual)))) { |
1318 | // cursor at end |
1319 | *cursor = vcol + incr - 1; |
1320 | } else { |
1321 | // cursor at start |
1322 | *cursor = vcol + head; |
1323 | } |
1324 | } |
1325 | } |
1326 | |
1327 | /// Get virtual cursor column in the current window, pretending 'list' is off. |
1328 | /// |
1329 | /// @param posp |
1330 | /// |
1331 | /// @retujrn The virtual cursor column. |
1332 | colnr_T getvcol_nolist(pos_T *posp) |
1333 | { |
1334 | int list_save = curwin->w_p_list; |
1335 | colnr_T vcol; |
1336 | |
1337 | curwin->w_p_list = false; |
1338 | if (posp->coladd) { |
1339 | getvvcol(curwin, posp, NULL, &vcol, NULL); |
1340 | } else { |
1341 | getvcol(curwin, posp, NULL, &vcol, NULL); |
1342 | } |
1343 | curwin->w_p_list = list_save; |
1344 | return vcol; |
1345 | } |
1346 | |
1347 | /// Get virtual column in virtual mode. |
1348 | /// |
1349 | /// @param wp |
1350 | /// @param pos |
1351 | /// @param start |
1352 | /// @param cursor |
1353 | /// @param end |
1354 | void getvvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, |
1355 | colnr_T *end) |
1356 | { |
1357 | colnr_T col; |
1358 | colnr_T coladd; |
1359 | colnr_T endadd; |
1360 | char_u *ptr; |
1361 | |
1362 | if (virtual_active()) { |
1363 | // For virtual mode, only want one value |
1364 | getvcol(wp, pos, &col, NULL, NULL); |
1365 | |
1366 | coladd = pos->coladd; |
1367 | endadd = 0; |
1368 | |
1369 | // Cannot put the cursor on part of a wide character. |
1370 | ptr = ml_get_buf(wp->w_buffer, pos->lnum, false); |
1371 | |
1372 | if (pos->col < (colnr_T)STRLEN(ptr)) { |
1373 | int c = utf_ptr2char(ptr + pos->col); |
1374 | if ((c != TAB) && vim_isprintc(c)) { |
1375 | endadd = (colnr_T)(char2cells(c) - 1); |
1376 | if (coladd > endadd) { |
1377 | // past end of line |
1378 | endadd = 0; |
1379 | } else { |
1380 | coladd = 0; |
1381 | } |
1382 | } |
1383 | } |
1384 | col += coladd; |
1385 | |
1386 | if (start != NULL) { |
1387 | *start = col; |
1388 | } |
1389 | |
1390 | if (cursor != NULL) { |
1391 | *cursor = col; |
1392 | } |
1393 | |
1394 | if (end != NULL) { |
1395 | *end = col + endadd; |
1396 | } |
1397 | } else { |
1398 | getvcol(wp, pos, start, cursor, end); |
1399 | } |
1400 | } |
1401 | |
1402 | /// Get the leftmost and rightmost virtual column of pos1 and pos2. |
1403 | /// Used for Visual block mode. |
1404 | /// |
1405 | /// @param wp |
1406 | /// @param pos1 |
1407 | /// @param pos2 |
1408 | /// @param left |
1409 | /// @param right |
1410 | void getvcols(win_T *wp, pos_T *pos1, pos_T *pos2, colnr_T *left, |
1411 | colnr_T *right) |
1412 | { |
1413 | colnr_T from1; |
1414 | colnr_T from2; |
1415 | colnr_T to1; |
1416 | colnr_T to2; |
1417 | |
1418 | if (lt(*pos1, *pos2)) { |
1419 | getvvcol(wp, pos1, &from1, NULL, &to1); |
1420 | getvvcol(wp, pos2, &from2, NULL, &to2); |
1421 | } else { |
1422 | getvvcol(wp, pos2, &from1, NULL, &to1); |
1423 | getvvcol(wp, pos1, &from2, NULL, &to2); |
1424 | } |
1425 | |
1426 | if (from2 < from1) { |
1427 | *left = from2; |
1428 | } else { |
1429 | *left = from1; |
1430 | } |
1431 | |
1432 | if (to2 > to1) { |
1433 | if ((*p_sel == 'e') && (from2 - 1 >= to1)) { |
1434 | *right = from2 - 1; |
1435 | } else { |
1436 | *right = to2; |
1437 | } |
1438 | } else { |
1439 | *right = to1; |
1440 | } |
1441 | } |
1442 | |
1443 | /// skipwhite: skip over ' ' and '\t'. |
1444 | /// |
1445 | /// @param[in] q String to skip in. |
1446 | /// |
1447 | /// @return Pointer to character after the skipped whitespace. |
1448 | char_u *skipwhite(const char_u *q) |
1449 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL |
1450 | FUNC_ATTR_NONNULL_RET |
1451 | { |
1452 | const char_u *p = q; |
1453 | while (ascii_iswhite(*p)) { |
1454 | p++; |
1455 | } |
1456 | return (char_u *)p; |
1457 | } |
1458 | |
1459 | // getwhitecols: return the number of whitespace |
1460 | // columns (bytes) at the start of a given line |
1461 | intptr_t getwhitecols_curline(void) |
1462 | { |
1463 | return getwhitecols(get_cursor_line_ptr()); |
1464 | } |
1465 | |
1466 | intptr_t getwhitecols(const char_u *p) |
1467 | { |
1468 | return skipwhite(p) - p; |
1469 | } |
1470 | |
1471 | /// Skip over digits |
1472 | /// |
1473 | /// @param[in] q String to skip digits in. |
1474 | /// |
1475 | /// @return Pointer to the character after the skipped digits. |
1476 | char_u *skipdigits(const char_u *q) |
1477 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL |
1478 | FUNC_ATTR_NONNULL_RET |
1479 | { |
1480 | const char_u *p = q; |
1481 | while (ascii_isdigit(*p)) { |
1482 | // skip to next non-digit |
1483 | p++; |
1484 | } |
1485 | return (char_u *)p; |
1486 | } |
1487 | |
1488 | /// skip over binary digits |
1489 | /// |
1490 | /// @param q pointer to string |
1491 | /// |
1492 | /// @return Pointer to the character after the skipped digits. |
1493 | const char* skipbin(const char *q) |
1494 | FUNC_ATTR_PURE |
1495 | FUNC_ATTR_NONNULL_ALL |
1496 | FUNC_ATTR_NONNULL_RET |
1497 | { |
1498 | const char *p = q; |
1499 | while (ascii_isbdigit(*p)) { |
1500 | // skip to next non-digit |
1501 | p++; |
1502 | } |
1503 | return p; |
1504 | } |
1505 | |
1506 | /// skip over digits and hex characters |
1507 | /// |
1508 | /// @param q |
1509 | /// |
1510 | /// @return Pointer to the character after the skipped digits and hex |
1511 | /// characters. |
1512 | char_u* skiphex(char_u *q) |
1513 | { |
1514 | char_u *p = q; |
1515 | while (ascii_isxdigit(*p)) { |
1516 | // skip to next non-digit |
1517 | p++; |
1518 | } |
1519 | return p; |
1520 | } |
1521 | |
1522 | /// skip to digit (or NUL after the string) |
1523 | /// |
1524 | /// @param q |
1525 | /// |
1526 | /// @return Pointer to the digit or (NUL after the string). |
1527 | char_u* skiptodigit(char_u *q) |
1528 | { |
1529 | char_u *p = q; |
1530 | while (*p != NUL && !ascii_isdigit(*p)) { |
1531 | // skip to next digit |
1532 | p++; |
1533 | } |
1534 | return p; |
1535 | } |
1536 | |
1537 | /// skip to binary character (or NUL after the string) |
1538 | /// |
1539 | /// @param q pointer to string |
1540 | /// |
1541 | /// @return Pointer to the binary character or (NUL after the string). |
1542 | const char* skiptobin(const char *q) |
1543 | FUNC_ATTR_PURE |
1544 | FUNC_ATTR_NONNULL_ALL |
1545 | FUNC_ATTR_NONNULL_RET |
1546 | { |
1547 | const char *p = q; |
1548 | while (*p != NUL && !ascii_isbdigit(*p)) { |
1549 | // skip to next digit |
1550 | p++; |
1551 | } |
1552 | return p; |
1553 | } |
1554 | |
1555 | /// skip to hex character (or NUL after the string) |
1556 | /// |
1557 | /// @param q |
1558 | /// |
1559 | /// @return Pointer to the hex character or (NUL after the string). |
1560 | char_u* skiptohex(char_u *q) |
1561 | { |
1562 | char_u *p = q; |
1563 | while (*p != NUL && !ascii_isxdigit(*p)) { |
1564 | // skip to next digit |
1565 | p++; |
1566 | } |
1567 | return p; |
1568 | } |
1569 | |
1570 | /// Skip over text until ' ' or '\t' or NUL |
1571 | /// |
1572 | /// @param[in] p Text to skip over. |
1573 | /// |
1574 | /// @return Pointer to the next whitespace or NUL character. |
1575 | char_u *skiptowhite(const char_u *p) |
1576 | { |
1577 | while (*p != ' ' && *p != '\t' && *p != NUL) { |
1578 | p++; |
1579 | } |
1580 | return (char_u *)p; |
1581 | } |
1582 | |
1583 | /// skiptowhite_esc: Like skiptowhite(), but also skip escaped chars |
1584 | /// |
1585 | /// @param p |
1586 | /// |
1587 | /// @return Pointer to the next whitespace character. |
1588 | char_u* skiptowhite_esc(char_u *p) { |
1589 | while (*p != ' ' && *p != '\t' && *p != NUL) { |
1590 | if (((*p == '\\') || (*p == Ctrl_V)) && (*(p + 1) != NUL)) { |
1591 | ++p; |
1592 | } |
1593 | ++p; |
1594 | } |
1595 | return p; |
1596 | } |
1597 | |
1598 | /// Gets a number from a string and skips over it, signalling overflow. |
1599 | /// |
1600 | /// @param[out] pp A pointer to a pointer to char_u. |
1601 | /// It will be advanced past the read number. |
1602 | /// @param[out] nr Number read from the string. |
1603 | /// |
1604 | /// @return true on success, false on error/overflow |
1605 | bool try_getdigits(char_u **pp, intmax_t *nr) |
1606 | { |
1607 | errno = 0; |
1608 | *nr = strtoimax((char *)(*pp), (char **)pp, 10); |
1609 | if (errno == ERANGE && (*nr == INTMAX_MIN || *nr == INTMAX_MAX)) { |
1610 | return false; |
1611 | } |
1612 | return true; |
1613 | } |
1614 | |
1615 | /// Gets a number from a string and skips over it. |
1616 | /// |
1617 | /// @param[out] pp Pointer to a pointer to char_u. |
1618 | /// It will be advanced past the read number. |
1619 | /// @param strict Abort on overflow. |
1620 | /// @param def Default value, if parsing fails or overflow occurs. |
1621 | /// |
1622 | /// @return Number read from the string, or `def` on parse failure or overflow. |
1623 | intmax_t getdigits(char_u **pp, bool strict, intmax_t def) |
1624 | { |
1625 | intmax_t number; |
1626 | int ok = try_getdigits(pp, &number); |
1627 | if (strict && !ok) { |
1628 | abort(); |
1629 | } |
1630 | return ok ? number : def; |
1631 | } |
1632 | |
1633 | /// Gets an int number from a string. |
1634 | /// |
1635 | /// @see getdigits |
1636 | int getdigits_int(char_u **pp, bool strict, int def) |
1637 | { |
1638 | intmax_t number = getdigits(pp, strict, def); |
1639 | #if SIZEOF_INTMAX_T > SIZEOF_INT |
1640 | if (strict) { |
1641 | assert(number >= INT_MIN && number <= INT_MAX); |
1642 | } else if (!(number >= INT_MIN && number <= INT_MAX)) { |
1643 | return def; |
1644 | } |
1645 | #endif |
1646 | return (int)number; |
1647 | } |
1648 | |
1649 | /// Gets a long number from a string. |
1650 | /// |
1651 | /// @see getdigits |
1652 | long getdigits_long(char_u **pp, bool strict, long def) |
1653 | { |
1654 | intmax_t number = getdigits(pp, strict, def); |
1655 | #if SIZEOF_INTMAX_T > SIZEOF_LONG |
1656 | if (strict) { |
1657 | assert(number >= LONG_MIN && number <= LONG_MAX); |
1658 | } else if (!(number >= LONG_MIN && number <= LONG_MAX)) { |
1659 | return def; |
1660 | } |
1661 | #endif |
1662 | return (long)number; |
1663 | } |
1664 | |
1665 | /// Check that "lbuf" is empty or only contains blanks. |
1666 | /// |
1667 | /// @param lbuf line buffer to check |
1668 | bool vim_isblankline(char_u *lbuf) |
1669 | { |
1670 | char_u *p = skipwhite(lbuf); |
1671 | return *p == NUL || *p == '\r' || *p == '\n'; |
1672 | } |
1673 | |
1674 | /// Convert a string into a long and/or unsigned long, taking care of |
1675 | /// hexadecimal, octal and binary numbers. Accepts a '-' sign. |
1676 | /// If "prep" is not NULL, returns a flag to indicate the type of the number: |
1677 | /// 0 decimal |
1678 | /// '0' octal |
1679 | /// 'B' bin |
1680 | /// 'b' bin |
1681 | /// 'X' hex |
1682 | /// 'x' hex |
1683 | /// If "len" is not NULL, the length of the number in characters is returned. |
1684 | /// If "nptr" is not NULL, the signed result is returned in it. |
1685 | /// If "unptr" is not NULL, the unsigned result is returned in it. |
1686 | /// If "what" contains STR2NR_BIN recognize binary numbers. |
1687 | /// If "what" contains STR2NR_OCT recognize octal numbers. |
1688 | /// If "what" contains STR2NR_HEX recognize hex numbers. |
1689 | /// If "what" contains STR2NR_FORCE always assume bin/oct/hex. |
1690 | /// If maxlen > 0, check at a maximum maxlen chars. |
1691 | /// |
1692 | /// @param start |
1693 | /// @param prep Returns guessed type of number 0 = decimal, 'x' or 'X' is |
1694 | /// hexadecimal, '0' = octal, 'b' or 'B' is binary. When using |
1695 | /// STR2NR_FORCE is always zero. |
1696 | /// @param len Returns the detected length of number. |
1697 | /// @param what Recognizes what number passed, @see ChStr2NrFlags. |
1698 | /// @param nptr Returns the signed result. |
1699 | /// @param unptr Returns the unsigned result. |
1700 | /// @param maxlen Max length of string to check. |
1701 | void vim_str2nr(const char_u *const start, int *const prep, int *const len, |
1702 | const int what, varnumber_T *const nptr, |
1703 | uvarnumber_T *const unptr, const int maxlen) |
1704 | FUNC_ATTR_NONNULL_ARG(1) |
1705 | { |
1706 | const char *ptr = (const char *)start; |
1707 | #define STRING_ENDED(ptr) \ |
1708 | (!(maxlen == 0 || (int)((ptr) - (const char *)start) < maxlen)) |
1709 | int pre = 0; // default is decimal |
1710 | const bool negative = (ptr[0] == '-'); |
1711 | uvarnumber_T un = 0; |
1712 | |
1713 | if (negative) { |
1714 | ptr++; |
1715 | } |
1716 | |
1717 | if (what & STR2NR_FORCE) { |
1718 | // When forcing main consideration is skipping the prefix. Octal and decimal |
1719 | // numbers have no prefixes to skip. pre is not set. |
1720 | switch ((unsigned)what & (~(unsigned)STR2NR_FORCE)) { |
1721 | case STR2NR_HEX: { |
1722 | if (!STRING_ENDED(ptr + 2) |
1723 | && ptr[0] == '0' |
1724 | && (ptr[1] == 'x' || ptr[1] == 'X') |
1725 | && ascii_isxdigit(ptr[2])) { |
1726 | ptr += 2; |
1727 | } |
1728 | goto vim_str2nr_hex; |
1729 | } |
1730 | case STR2NR_BIN: { |
1731 | if (!STRING_ENDED(ptr + 2) |
1732 | && ptr[0] == '0' |
1733 | && (ptr[1] == 'b' || ptr[1] == 'B') |
1734 | && ascii_isbdigit(ptr[2])) { |
1735 | ptr += 2; |
1736 | } |
1737 | goto vim_str2nr_bin; |
1738 | } |
1739 | case STR2NR_OCT: { |
1740 | goto vim_str2nr_oct; |
1741 | } |
1742 | case 0: { |
1743 | goto vim_str2nr_dec; |
1744 | } |
1745 | default: { |
1746 | assert(false); |
1747 | } |
1748 | } |
1749 | } else if ((what & (STR2NR_HEX|STR2NR_OCT|STR2NR_BIN)) |
1750 | && !STRING_ENDED(ptr + 1) |
1751 | && ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9') { |
1752 | pre = ptr[1]; |
1753 | // Detect hexadecimal: 0x or 0X followed by hex digit. |
1754 | if ((what & STR2NR_HEX) |
1755 | && !STRING_ENDED(ptr + 2) |
1756 | && (pre == 'X' || pre == 'x') |
1757 | && ascii_isxdigit(ptr[2])) { |
1758 | ptr += 2; |
1759 | goto vim_str2nr_hex; |
1760 | } |
1761 | // Detect binary: 0b or 0B followed by 0 or 1. |
1762 | if ((what & STR2NR_BIN) |
1763 | && !STRING_ENDED(ptr + 2) |
1764 | && (pre == 'B' || pre == 'b') |
1765 | && ascii_isbdigit(ptr[2])) { |
1766 | ptr += 2; |
1767 | goto vim_str2nr_bin; |
1768 | } |
1769 | // Detect octal number: zero followed by octal digits without '8' or '9'. |
1770 | pre = 0; |
1771 | if (!(what & STR2NR_OCT) |
1772 | || !('0' <= ptr[1] && ptr[1] <= '7')) { |
1773 | goto vim_str2nr_dec; |
1774 | } |
1775 | for (int i = 2; !STRING_ENDED(ptr + i) && ascii_isdigit(ptr[i]); i++) { |
1776 | if (ptr[i] > '7') { |
1777 | goto vim_str2nr_dec; |
1778 | } |
1779 | } |
1780 | pre = '0'; |
1781 | goto vim_str2nr_oct; |
1782 | } else { |
1783 | goto vim_str2nr_dec; |
1784 | } |
1785 | |
1786 | // Do the string-to-numeric conversion "manually" to avoid sscanf quirks. |
1787 | assert(false); // Should’ve used goto earlier. |
1788 | #define PARSE_NUMBER(base, cond, conv) \ |
1789 | do { \ |
1790 | while (!STRING_ENDED(ptr) && (cond)) { \ |
1791 | const uvarnumber_T digit = (uvarnumber_T)(conv); \ |
1792 | /* avoid ubsan error for overflow */ \ |
1793 | if (un < UVARNUMBER_MAX / base \ |
1794 | || (un == UVARNUMBER_MAX / base \ |
1795 | && (base != 10 || digit <= UVARNUMBER_MAX % 10))) { \ |
1796 | un = base * un + digit; \ |
1797 | } else { \ |
1798 | un = UVARNUMBER_MAX; \ |
1799 | } \ |
1800 | ptr++; \ |
1801 | } \ |
1802 | } while (0) |
1803 | vim_str2nr_bin: |
1804 | PARSE_NUMBER(2, (*ptr == '0' || *ptr == '1'), (*ptr - '0')); |
1805 | goto vim_str2nr_proceed; |
1806 | vim_str2nr_oct: |
1807 | PARSE_NUMBER(8, ('0' <= *ptr && *ptr <= '7'), (*ptr - '0')); |
1808 | goto vim_str2nr_proceed; |
1809 | vim_str2nr_dec: |
1810 | PARSE_NUMBER(10, (ascii_isdigit(*ptr)), (*ptr - '0')); |
1811 | goto vim_str2nr_proceed; |
1812 | vim_str2nr_hex: |
1813 | PARSE_NUMBER(16, (ascii_isxdigit(*ptr)), (hex2nr(*ptr))); |
1814 | goto vim_str2nr_proceed; |
1815 | #undef PARSE_NUMBER |
1816 | |
1817 | vim_str2nr_proceed: |
1818 | if (prep != NULL) { |
1819 | *prep = pre; |
1820 | } |
1821 | |
1822 | if (len != NULL) { |
1823 | *len = (int)(ptr - (const char *)start); |
1824 | } |
1825 | |
1826 | if (nptr != NULL) { |
1827 | if (negative) { // account for leading '-' for decimal numbers |
1828 | // avoid ubsan error for overflow |
1829 | if (un > VARNUMBER_MAX) { |
1830 | *nptr = VARNUMBER_MIN; |
1831 | } else { |
1832 | *nptr = -(varnumber_T)un; |
1833 | } |
1834 | } else { |
1835 | if (un > VARNUMBER_MAX) { |
1836 | un = VARNUMBER_MAX; |
1837 | } |
1838 | *nptr = (varnumber_T)un; |
1839 | } |
1840 | } |
1841 | |
1842 | if (unptr != NULL) { |
1843 | *unptr = un; |
1844 | } |
1845 | #undef STRING_ENDED |
1846 | } |
1847 | |
1848 | /// Return the value of a single hex character. |
1849 | /// Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'. |
1850 | /// |
1851 | /// @param c |
1852 | /// |
1853 | /// @return The value of the hex character. |
1854 | int hex2nr(int c) |
1855 | { |
1856 | if ((c >= 'a') && (c <= 'f')) { |
1857 | return c - 'a' + 10; |
1858 | } |
1859 | |
1860 | if ((c >= 'A') && (c <= 'F')) { |
1861 | return c - 'A' + 10; |
1862 | } |
1863 | return c - '0'; |
1864 | } |
1865 | |
1866 | /// Check that "str" starts with a backslash that should be removed. |
1867 | /// For Windows this is only done when the character after the |
1868 | /// backslash is not a normal file name character. |
1869 | /// '$' is a valid file name character, we don't remove the backslash before |
1870 | /// it. This means it is not possible to use an environment variable after a |
1871 | /// backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works. |
1872 | /// Although "\ name" is valid, the backslash in "Program\ files" must be |
1873 | /// removed. Assume a file name doesn't start with a space. |
1874 | /// For multi-byte names, never remove a backslash before a non-ascii |
1875 | /// character, assume that all multi-byte characters are valid file name |
1876 | /// characters. |
1877 | /// |
1878 | /// @param str file path string to check |
1879 | bool rem_backslash(const char_u *str) |
1880 | FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL |
1881 | { |
1882 | #ifdef BACKSLASH_IN_FILENAME |
1883 | return str[0] == '\\' |
1884 | && str[1] < 0x80 |
1885 | && (str[1] == ' ' |
1886 | || (str[1] != NUL |
1887 | && str[1] != '*' |
1888 | && str[1] != '?' |
1889 | && !vim_isfilec(str[1]))); |
1890 | |
1891 | #else // ifdef BACKSLASH_IN_FILENAME |
1892 | return str[0] == '\\' && str[1] != NUL; |
1893 | #endif // ifdef BACKSLASH_IN_FILENAME |
1894 | } |
1895 | |
1896 | /// Halve the number of backslashes in a file name argument. |
1897 | /// |
1898 | /// @param p |
1899 | void backslash_halve(char_u *p) |
1900 | { |
1901 | for (; *p; ++p) { |
1902 | if (rem_backslash(p)) { |
1903 | STRMOVE(p, p + 1); |
1904 | } |
1905 | } |
1906 | } |
1907 | |
1908 | /// backslash_halve() plus save the result in allocated memory. |
1909 | /// |
1910 | /// @param p |
1911 | /// |
1912 | /// @return String with the number of backslashes halved. |
1913 | char_u *backslash_halve_save(const char_u *p) |
1914 | FUNC_ATTR_NONNULL_ALL FUNC_ATTR_NONNULL_RET |
1915 | { |
1916 | // TODO(philix): simplify and improve backslash_halve_save algorithm |
1917 | char_u *res = vim_strsave(p); |
1918 | backslash_halve(res); |
1919 | return res; |
1920 | } |
1921 | |